Crop And Resize

In this example we will cover some basic concepts to show how to use the CVCUDA C++ API which includes usage of Tensor, wrapping externally allocated data in CVCUDA Tensor and using Tensors with operators.

Creating a CMake Project

Create the cmake project to build the application as follows. The <samples/common> folder provides utilities common across the C++ samples including IO utilities to read and write images using NvJpeg.

add_executable(cvcuda_sample_cropandresize Main.cpp)
target_link_libraries(cvcuda_sample_cropandresize nvcv_types cvcuda CUDA::cudart cvcuda_samples_common)

target_include_directories(cvcuda_sample_cropandresize
	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..)

Writing the Sample App

The first stage in the sample pipeline is loading the Input image. A cuda stream is created to enqueue all the tasks

cudaStream_t stream;
CHECK_CUDA_ERROR(cudaStreamCreate(&stream));

Since we need a contiguous buffer for a batch, we will preallocate the Tensor buffer for the input batch.

// Allocating memory for RGBI input image batch of uint8_t data type
// without padding since NvDecode utility currently doesnt support
// Padded buffers.

nvcv::TensorDataStridedCuda::Buffer inBuf;
inBuf.strides[3] = sizeof(uint8_t);
inBuf.strides[2] = maxChannels * inBuf.strides[3];
inBuf.strides[1] = maxImageWidth * inBuf.strides[2];
inBuf.strides[0] = maxImageHeight * inBuf.strides[1];
CHECK_CUDA_ERROR(cudaMallocAsync(&inBuf.basePtr, batchSize * inBuf.strides[0], stream));

The Tensor Buffer is then wrapped to create a Tensor Object for which we will calculate the requirements of the buffer such as strides and alignment

// Calculate the requirements for the RGBI uint8_t Tensor which include
// pitch bytes, alignment, shape  and tensor layout
nvcv::Tensor::Requirements inReqs
    = nvcv::Tensor::CalcRequirements(batchSize, {maxImageWidth, maxImageHeight}, nvcv::FMT_RGB8);

// Create a tensor buffer to store the data pointer and pitch bytes for each plane
nvcv::TensorDataStridedCuda inData(nvcv::TensorShape{inReqs.shape, inReqs.rank, inReqs.layout},
                                   nvcv::DataType{inReqs.dtype}, inBuf);

// TensorWrapData allows for interoperation of external tensor representations with CVCUDA Tensor.
nvcv::Tensor inTensor = nvcv::TensorWrapData(inData);

We will use NvJpeg library to decode the images into the required color format and create a buffer on the device.

// NvJpeg is used to load the images to create a batched input device buffer.
uint8_t             *gpuInput = reinterpret_cast<uint8_t *>(inBuf.basePtr);
// The total images is set to the same value as batch size for testing
uint32_t             totalImages = batchSize;
// Format in which the decoded output will be saved
nvjpegOutputFormat_t outputFormat = NVJPEG_OUTPUT_RGBI;

NvDecode(imagePath, batchSize, totalImages, outputFormat, gpuInput);

The CVCUDA Tensor is now ready to be used by the operators.

We will allocate the Tensors required for Resize and Crop using CVCUDA Allocator.

    // Create a CVCUDA Tensor based on the crop window size.
    nvcv::Tensor cropTensor(batchSize, {cropWidth, cropHeight}, nvcv::FMT_RGB8);
    // Create a CVCUDA Tensor based on resize dimensions
    nvcv::Tensor resizedTensor(batchSize, {resizeWidth, resizeHeight}, nvcv::FMT_RGB8);

#ifdef PROFILE_SAMPLE
    cudaEvent_t start, stop;
    cudaEventCreate(&start);
    cudaEventCreate(&stop);
    cudaEventRecord(start);
#endif

Initialize the resize and crop operators

cvcuda::CustomCrop cropOp;
cvcuda::Resize     resizeOp;

We can now enqueue both the operations in the stream

cropOp(stream, inTensor, cropTensor, crpRect);

// Resize operator can now be enqueued into the same stream
resizeOp(stream, cropTensor, resizedTensor, NVCV_INTERP_LINEAR);

To access the output we will synchronize the stream and copy to the CPU Output buffer We will use the utility below to sync and write the CPU output buffer into a bitmap file

WriteRGBITensor(resizedTensor, stream);

Destroy the cuda stream created

CHECK_CUDA_ERROR(cudaStreamDestroy(stream));

Build and Run the Sample

The sample can now be compiled using cmake.

mkdir build
cd build
cmake .. && make

To run the sample

./build/cvcuda_sample_cropandresize -i <image path> -b <batch size>

Sample Output

Input Image of size 700x700

Output Image cropped with ROI [150, 50, 400, 300] and resized to 320x240