Semantic Segmentation Inference Using PyTorch

The semantic segmentation sample in CVCUDA uses the fcn_resnet101 deep learning model from the torchvision library. Since the model does not come with the softmax layer at the end, we are going to add one. The following code snippet shows how the model is setup for inference use case with PyTorch.

 1class SegmentationPyTorch:  # noqa: E302
 2    def __init__(
 3        self,
 4        output_dir,
 5        seg_class_name,
 6        batch_size,
 7        image_size,
 8        device_id,
 9        cvcuda_perf,
10    ):
11        self.logger = logging.getLogger(__name__)
12        self.output_dir = output_dir
13        self.device_id = device_id
14        self.cvcuda_perf = cvcuda_perf
15        # Fetch the segmentation index to class name information from the weights
16        # meta properties.
17        # The underlying pytorch model that we use for inference is the FCN model
18        # from torchvision.
19        torch_model = segmentation_models.fcn_resnet101
20        weights = segmentation_models.FCN_ResNet101_Weights.DEFAULT
21
22        try:
23            self.class_index = weights.meta["categories"].index(seg_class_name)
24        except ValueError:
25            raise ValueError(
26                "Requested segmentation class '%s' is not supported by the "
27                "fcn_resnet101 model. All supported class names are: %s"
28                % (seg_class_name, ", ".join(weights.meta["categories"]))
29            )
30
31        # Inference uses PyTorch to run a segmentation model on the pre-processed
32        # input and outputs the segmentation masks.
33        class FCN_Softmax(torch.nn.Module):
34            def __init__(self, fcn):
35                super(FCN_Softmax, self).__init__()
36                self.fcn = fcn
37
38            def forward(self, x):
39                infer_output = self.fcn(x)["out"]
40                return torch.nn.functional.softmax(infer_output, dim=1)
41
42        fcn_base = torch_model(weights=weights)
43        fcn_base.eval()
44        self.model = FCN_Softmax(fcn_base).cuda(self.device_id)
45        self.model.eval()
46
47        self.logger.info("Using PyTorch as the inference engine.")

To run the inference the __call__ method is used. It makes sure to use the CUDA stream and perform the forward inference pass without computing gradients.

 1def __call__(self, tensor):
 2    self.cvcuda_perf.push_range("inference.torch")
 3
 4    with torch.no_grad():
 5
 6        if isinstance(tensor, torch.Tensor):
 7            if not tensor.is_cuda:
 8                tensor = tensor.to("cuda:%d" % self.device_id)
 9        else:
10            # Convert CVCUDA tensor to Torch tensor.
11            tensor = torch.as_tensor(
12                tensor.cuda(), device="cuda:%d" % self.device_id
13            )
14
15        segmented = self.model(tensor)
16
17    self.cvcuda_perf.pop_range()
18    return segmented
19