Semantic Segmentation Inference Using PyTorch

The semantic segmentation sample in CVCUDA uses the fcn_resnet101 deep learning model from the torchvision library. Since the model does not come with the softmax layer at the end, we are going to add one. The following code snippet shows how the model is setup for inference use case with PyTorch.

class SegmentationPyTorch:  # noqa: E302
    def __init__(
        self,
        output_dir,
        seg_class_name,
        batch_size,
        image_size,
        device_id,
        cvcuda_perf,
    ):
        self.logger = logging.getLogger(__name__)
        self.output_dir = output_dir
        self.device_id = device_id
        self.cvcuda_perf = cvcuda_perf
        # Fetch the segmentation index to class name information from the weights
        # meta properties.
        # The underlying pytorch model that we use for inference is the FCN model
        # from torchvision.
        torch_model = segmentation_models.fcn_resnet101
        weights = segmentation_models.FCN_ResNet101_Weights.DEFAULT

        try:
            self.class_index = weights.meta["categories"].index(seg_class_name)
        except ValueError:
            raise ValueError(
                "Requested segmentation class '%s' is not supported by the "
                "fcn_resnet101 model. All supported class names are: %s"
                % (seg_class_name, ", ".join(weights.meta["categories"]))
            )

        # Inference uses PyTorch to run a segmentation model on the pre-processed
        # input and outputs the segmentation masks.
        class FCN_Softmax(torch.nn.Module):
            def __init__(self, fcn):
                super(FCN_Softmax, self).__init__()
                self.fcn = fcn

            def forward(self, x):
                infer_output = self.fcn(x)["out"]
                return torch.nn.functional.softmax(infer_output, dim=1)

        fcn_base = torch_model(weights=weights)
        fcn_base.eval()
        self.model = FCN_Softmax(fcn_base).cuda(self.device_id)
        self.model.eval()

        self.logger.info("Using PyTorch as the inference engine.")

To run the inference the __call__ method is used. It makes sure to use the CUDA stream and perform the forward inference pass without computing gradients.

def __call__(self, tensor):
    self.cvcuda_perf.push_range("inference.torch")

    with torch.no_grad():

        if isinstance(tensor, torch.Tensor):
            if not tensor.is_cuda:
                tensor = tensor.to("cuda:%d" % self.device_id)
        else:
            # Convert CVCUDA tensor to Torch tensor.
            tensor = torch.as_tensor(
                tensor.cuda(), device="cuda:%d" % self.device_id
            )

        segmented = self.model(tensor)

    self.cvcuda_perf.pop_range()
    return segmented