Program Listing for File BorderWrap.hpp

Return to documentation for file (nvcv_types/include/nvcv/cuda/BorderWrap.hpp)

/*
 * SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef NVCV_CUDA_BORDER_WRAP_HPP
#define NVCV_CUDA_BORDER_WRAP_HPP

#include "FullTensorWrap.hpp" // for FullTensorWrap, etc.
#include "TensorWrap.hpp"     // for TensorWrap, etc.
#include "TypeTraits.hpp"     // for NumElements, etc.

#include <nvcv/BorderType.h>         // for NVCVBorderType, etc.
#include <nvcv/TensorData.hpp>       // for TensorDataStridedCuda, etc.
#include <nvcv/TensorDataAccess.hpp> // for TensorDataAccessStridedImagePlanar, etc.

namespace nvcv::cuda {

template<bool Active = true, typename T>
constexpr inline bool __host__ __device__ IsOutside(T c, T s)
{
    if constexpr (Active)
    {
        return (c < 0) || (c >= s);
    }
    return false;
}

template<NVCVBorderType B, bool Active = true, typename T>
constexpr inline T __host__ __device__ GetIndexWithBorder(T c, T s)
{
    static_assert(B != NVCV_BORDER_CONSTANT, "GetIndexWithBorder cannot be used with NVCV_BORDER_CONSTANT");

    if constexpr (Active)
    {
        assert(s > 0);

        if constexpr (B == NVCV_BORDER_REPLICATE)
        {
            c = (c < 0) ? 0 : (c >= s ? s - 1 : c);
        }
        else if constexpr (B == NVCV_BORDER_WRAP)
        {
            c = c % s;
            if (c < 0)
            {
                c += s;
            }
        }
        else if constexpr (B == NVCV_BORDER_REFLECT)
        {
            T s2 = s * 2;
            c    = c % s2;
            if (c < 0)
            {
                c += s2;
            }
            c = s - 1 - (abs(2 * c + 1 - s2) >> 1);
        }
        else if constexpr (B == NVCV_BORDER_REFLECT101)
        {
            if (s == 1)
            {
                c = 0;
            }
            else
            {
                c = c % (2 * s - 2);
                if (c < 0)
                {
                    c += 2 * s - 2;
                }
                c = s - 1 - abs(s - 1 - c);
            }
        }

        assert(c >= 0 && c < s);
    }

    return c;
}

namespace detail {

template<class TW, NVCVBorderType B, bool... ActiveDimensions>
class BorderWrapImpl
{
public:
    using TensorWrapper = TW;
    using ValueType     = typename TensorWrapper::ValueType;

    static constexpr int            kNumDimensions = TensorWrapper::kNumDimensions;
    static constexpr NVCVBorderType kBorderType    = B;

    static_assert(kNumDimensions == sizeof...(ActiveDimensions));

    static constexpr bool kActiveDimensions[]  = {ActiveDimensions...};
    static constexpr int  kNumActiveDimensions = ((ActiveDimensions ? 1 : 0) + ...);

    struct ActiveMap
    {
        int from[kNumDimensions];

        constexpr ActiveMap()
            : from()
        {
            int j = 0;
            for (int i = 0; i < kNumDimensions; ++i)
            {
                if (kActiveDimensions[i])
                {
                    from[i] = j++;
                }
            }
        }
    };

    static constexpr ActiveMap kMap{};

    BorderWrapImpl() = default;

    template<typename... Args>
    explicit __host__ __device__ BorderWrapImpl(TensorWrapper tensorWrap, Args... tensorShape)
        : m_tensorWrap(tensorWrap)
        , m_tensorShape{std::forward<int>(tensorShape)...}
    {
        if constexpr (sizeof...(Args) == 0)
        {
            static_assert(std::is_base_of_v<TensorWrapper, FullTensorWrap<ValueType, kNumDimensions>>);
            int j = 0;
#pragma unroll
            for (int i = 0; i < kNumDimensions; ++i)
            {
                if (kActiveDimensions[i])
                {
                    m_tensorShape[j++] = tensorWrap.shapes()[i];
                }
            }
        }
        else
        {
            static_assert(std::conjunction_v<std::is_same<int, Args>...>);
            static_assert(sizeof...(Args) == kNumActiveDimensions);
        }
    }

    explicit __host__ BorderWrapImpl(const TensorDataStridedCuda &tensor)
        : m_tensorWrap(tensor)
    {
        assert(tensor.rank() >= kNumDimensions);

        int j = 0;
#pragma unroll
        for (int i = 0; i < kNumDimensions; ++i)
        {
            if (kActiveDimensions[i])
            {
                assert(tensor.shape(i) <= TypeTraits<int>::max);

                m_tensorShape[j++] = tensor.shape(i);
            }
        }
    }

    inline const __host__ __device__ TensorWrapper &tensorWrap() const
    {
        return m_tensorWrap;
    }

    inline __host__ __device__ const int *tensorShape() const
    {
        return m_tensorShape;
    }

    inline __host__ __device__ ValueType borderValue() const
    {
        return ValueType{};
    }

protected:
    const TensorWrapper m_tensorWrap                        = {};
    int                 m_tensorShape[kNumActiveDimensions] = {0};
};

} // namespace detail

template<class TW, NVCVBorderType B, bool... ActiveDimensions>
class BorderWrap : public detail::BorderWrapImpl<TW, B, ActiveDimensions...>
{
    using Base = detail::BorderWrapImpl<TW, B, ActiveDimensions...>;

public:
    using typename Base::TensorWrapper;
    using typename Base::ValueType;

    using Base::kActiveDimensions;
    using Base::kBorderType;
    using Base::kMap;
    using Base::kNumActiveDimensions;
    using Base::kNumDimensions;

    BorderWrap() = default;

    template<typename... Args>
    explicit __host__ __device__ BorderWrap(TensorWrapper tensorWrap, ValueType borderValue, Args... tensorShape)
        : Base(tensorWrap, tensorShape...)
    {
    }

    explicit __host__ BorderWrap(const TensorDataStridedCuda &tensor, ValueType borderValue = {})
        : Base(tensor)
    {
    }

    // Get the tensor wrapped by this border wrap.
    using Base::tensorWrap;

    // Get the shape of the tensor wrapped by this border wrap.
    using Base::tensorShape;

    // Get border value of this border wrap, none is stored so an empty value is returned.
    using Base::borderValue;

    template<typename DimType, class = Require<std::is_same_v<int, BaseType<DimType>>>>
    inline __host__ __device__ ValueType &operator[](DimType c) const
    {
        constexpr int N = NumElements<DimType>;
        static_assert(kNumDimensions >= N);

        constexpr auto Is = std::make_index_sequence<N>{};

        if constexpr (N == 1)
        {
            if constexpr (NumComponents<DimType> == 0)
            {
                return *doGetPtr(Is, c);
            }
            else
            {
                return *doGetPtr(Is, c.x);
            }
        }
        else if constexpr (N == 2)
        {
            return *doGetPtr(Is, c.y, c.x);
        }
        else if constexpr (N == 3)
        {
            return *doGetPtr(Is, c.z, c.y, c.x);
        }
        else if constexpr (N == 4)
        {
            return *doGetPtr(Is, c.w, c.z, c.y, c.x);
        }
    }

    template<typename... Args>
    inline __host__ __device__ ValueType *ptr(Args... c) const
    {
        return doGetPtr(std::index_sequence_for<Args...>{}, std::forward<Args>(c)...);
    }

private:
    template<typename... Args, std::size_t... Is>
    inline __host__ __device__ ValueType *doGetPtr(std::index_sequence<Is...>, Args... c) const
    {
        return Base::m_tensorWrap.ptr(
            GetIndexWithBorder<kBorderType, kActiveDimensions[Is]>(c, Base::m_tensorShape[kMap.from[Is]])...);
    }
};

template<class TW, bool... ActiveDimensions>
class BorderWrap<TW, NVCV_BORDER_CONSTANT, ActiveDimensions...>
    : public detail::BorderWrapImpl<TW, NVCV_BORDER_CONSTANT, ActiveDimensions...>
{
    using Base = detail::BorderWrapImpl<TW, NVCV_BORDER_CONSTANT, ActiveDimensions...>;

public:
    using typename Base::TensorWrapper;
    using typename Base::ValueType;

    using Base::kActiveDimensions;
    using Base::kBorderType;
    using Base::kMap;
    using Base::kNumActiveDimensions;
    using Base::kNumDimensions;

    BorderWrap() = default;

    template<typename... Args>
    explicit __host__ __device__ BorderWrap(TensorWrapper tensorWrap, ValueType borderValue, Args... tensorShape)
        : Base(tensorWrap, tensorShape...)
        , m_borderValue(borderValue)
    {
    }

    explicit __host__ BorderWrap(const TensorDataStridedCuda &tensor, ValueType borderValue = {})
        : Base(tensor)
        , m_borderValue(borderValue)
    {
    }

    // Get the tensor wrapped by this border wrap.
    using Base::tensorWrap;

    // Get the shape of the tensor wrapped by this border wrap.
    using Base::tensorShape;

    inline __host__ __device__ ValueType borderValue() const
    {
        return m_borderValue;
    }

    template<typename DimType, class = Require<std::is_same_v<int, BaseType<DimType>>>>
    inline const __host__ __device__ ValueType &operator[](DimType c) const
    {
        constexpr int N = NumElements<DimType>;
        static_assert(kNumDimensions >= N);

        constexpr auto Is = std::make_index_sequence<N>{};

        const ValueType *p = nullptr;

        if constexpr (N == 1)
        {
            if constexpr (NumComponents<DimType> == 0)
            {
                p = doGetPtr(Is, c);
            }
            else
            {
                p = doGetPtr(Is, c.x);
            }
        }
        else if constexpr (N == 2)
        {
            p = doGetPtr(Is, c.y, c.x);
        }
        else if constexpr (N == 3)
        {
            p = doGetPtr(Is, c.z, c.y, c.x);
        }
        else if constexpr (N == 4)
        {
            p = doGetPtr(Is, c.w, c.z, c.y, c.x);
        }

        if (p == nullptr)
        {
            return m_borderValue;
        }

        return *p;
    }

    template<typename... Args>
    inline __host__ __device__ ValueType *ptr(Args... c) const
    {
        return doGetPtr(std::index_sequence_for<Args...>{}, std::forward<Args>(c)...);
    }

private:
    template<typename... Args, std::size_t... Is>
    inline __host__ __device__ ValueType *doGetPtr(std::index_sequence<Is...>, Args... c) const
    {
        if ((IsOutside<kActiveDimensions[Is]>(c, Base::m_tensorShape[kMap.from[Is]]) || ...))
        {
            return nullptr;
        }
        return Base::m_tensorWrap.ptr(c...);
    }

    const ValueType m_borderValue = SetAll<ValueType>(0);
};

template<typename T, NVCVBorderType B, class = Require<HasTypeTraits<T>>>
__host__ auto CreateBorderWrapNHW(const TensorDataStridedCuda &tensor, T borderValue = {})
{
    auto tensorAccess = TensorDataAccessStridedImagePlanar::Create(tensor);
    assert(tensorAccess);
    assert(tensorAccess->numRows() <= TypeTraits<int>::max);
    assert(tensorAccess->numCols() <= TypeTraits<int>::max);

    auto tensorWrap = CreateTensorWrapNHW<T>(tensor);

    return BorderWrap<decltype(tensorWrap), B, false, true, true>(
        tensorWrap, borderValue, static_cast<int>(tensorAccess->numRows()), static_cast<int>(tensorAccess->numCols()));
}

template<typename T, NVCVBorderType B, class = Require<HasTypeTraits<T>>>
__host__ auto CreateBorderWrapNHWC(const TensorDataStridedCuda &tensor, T borderValue = {})
{
    auto tensorAccess = TensorDataAccessStridedImagePlanar::Create(tensor);
    assert(tensorAccess);
    assert(tensorAccess->numRows() <= TypeTraits<int>::max);
    assert(tensorAccess->numCols() <= TypeTraits<int>::max);

    auto tensorWrap = CreateTensorWrapNHWC<T>(tensor);

    return BorderWrap<decltype(tensorWrap), B, false, true, true, false>(
        tensorWrap, borderValue, static_cast<int>(tensorAccess->numRows()), static_cast<int>(tensorAccess->numCols()));
}

} // namespace nvcv::cuda

#endif // NVCV_CUDA_BORDER_WRAP_HPP