Object Detection Inference Using TensorFlow

The object detection sample in CVCUDA uses the Peoplenet Model from NGC. The HDF5 model file is downloaded from NGC. We use appropriate GPU device with Keras to load the model.

 1class ObjectDetectionTensorflow:
 2    def __init__(
 3        self,
 4        output_dir,
 5        batch_size,
 6        image_size,
 7        device_id,
 8        cvcuda_perf,
 9    ):
10        self.logger = logging.getLogger(__name__)
11        self.output_dir = output_dir
12        self.batch_size = batch_size
13        self.image_size = image_size
14        self.device_id = device_id
15        self.cvcuda_perf = cvcuda_perf
16
17        physical_devices = tf.config.list_physical_devices("GPU")
18        tf.config.experimental.set_memory_growth(physical_devices[self.device_id], True)
19
20        hdf5_model_path = os.path.join(output_dir, "resnet34_peoplenet.hdf5")
21
22        if not os.path.isfile(hdf5_model_path):
23            # We need to download the HDF5 model first from NGC.
24            model_url = (
25                "https://api.ngc.nvidia.com/v2/models/"
26                "org/nvidia/team/tao/peoplenet/trainable_unencrypted_v2.6/"
27                "files?redirect=true&path=model.hdf5"
28            )
29            self.logger.info("Downloading the PeopleNet model from NGC: %s" % model_url)
30            urllib.request.urlretrieve(model_url, hdf5_model_path)
31            self.logger.info("Download complete. Saved to: %s" % hdf5_model_path)
32
33        with tf.device("/GPU:%d" % self.device_id):
34            self.model = tf.keras.models.load_model(hdf5_model_path)
35            self.logger.info("TensorFlow PeopleNet model is loaded.")
36
37        self.logger.info("Using TensorFlow as the inference engine.")

To run the inference the __call__ method is used. It converts incoming tensor from formats such as torch.Tensor, nvcv.Tensor or numpy.ndarray to a tensorflow.Tensor object. Since both PyTorch and CVCUDA tensors support the dlpack interface, we use that to convert them to the tensorflow tensor. At the time of this writing, a bug prevents conversion of tensors which are not flattened out beforehand. Hence we temporarily note down the shape of input tensor, flatten it out, use dlpack to convert to tensorflow.Tensor and then reshape it back to its original shape.

 1def __call__(self, frame_nchw):
 2    self.cvcuda_perf.push_range("inference.tensorflow")
 3
 4    if isinstance(frame_nchw, torch.Tensor):
 5        # We convert torch.Tensor to tf.Tensor by:
 6        # torch.Tensor -> Pytorch Flat Tensor -> DlPack -> tf.Tensor -> Un-flatten
 7        frame_nchw_shape = frame_nchw.shape
 8        frame_nchw = frame_nchw.flatten()
 9        frame_nchw_tf = tf.experimental.dlpack.from_dlpack(frame_nchw.__dlpack__())
10        frame_nchw_tf = tf.reshape(frame_nchw_tf, frame_nchw_shape)
11
12    elif isinstance(frame_nchw, nvcv.Tensor):
13        # We convert nvcv.Tensor to tf.Tensor by:
14        # nvcv.Tensor -> PyTorch Tensor -> Pytorch Flat Tensor -> DlPack -> tf.Tensor -> Un-flatten
15        frame_nchw_pyt = torch.as_tensor(
16            frame_nchw.cuda(), device="cuda:%d" % self.device_id
17        )
18        frame_nchw_pyt = frame_nchw_pyt.flatten()
19        frame_nchw_tf = tf.experimental.dlpack.from_dlpack(
20            frame_nchw_pyt.__dlpack__()
21        )
22        frame_nchw_tf = tf.reshape(frame_nchw_tf, frame_nchw.shape)
23
24    elif isinstance(frame_nchw, np.ndarray):
25        frame_nchw_tf = tf.convert_to_tensor(frame_nchw)
26
27    else:
28        raise ValueError(
29            "Invalid type of input tensor for tensorflow inference: %s"
30            % str(type(frame_nchw))
31        )
32
33    with tf.device("/GPU:%d" % self.device_id):
34        output_tensors = self.model(frame_nchw_tf)  # returns a tuple.
35
36    # Convert the output to PyTorch Tensors
37    boxes = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(output_tensors[0]))
38    score = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(output_tensors[1]))
39
40    self.cvcuda_perf.pop_range()  # inference.tensorflow
41    return boxes, score