Program Listing for File MathWrappers.hpp

↰ Return to documentation for file (nvcv_types/include/nvcv/cuda/MathWrappers.hpp)
/*
 * SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: Apache-2.0
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef NVCV_CUDA_MATH_WRAPPERS_HPP
#define NVCV_CUDA_MATH_WRAPPERS_HPP

#include "StaticCast.hpp"              // for StaticCast, etc.
#include "TypeTraits.hpp"              // for Require, etc.
#include "detail/MathWrappersImpl.hpp" // for MathWrappersImpl, etc.

#include <cfenv> // for FE_TONEAREST, etc.

namespace nvcv::cuda {

// Represents the round mode to be used by \ref round function.
enum class RoundMode : int
{
    NEAREST = FE_TONEAREST,  //< Round to nearest even mode, as in "*_rn" CUDA device functions
    DOWN    = FE_DOWNWARD,   //< Round down mode, as in "*_rd" CUDA device functions
    UP      = FE_UPWARD,     //< Round up mode, as in "*_ru" CUDA device functions
    ZERO    = FE_TOWARDZERO, //< Round towards zero mode, as in "*_rz" CUDA device functions
    DEFAULT = NEAREST        //< Default round mode is round to nearest even mode
};

namespace detail {

template<typename T, typename U, typename RT, RoundMode RM>
inline __host__ __device__ RT RoundImpl(U u)
{
    if constexpr (std::is_integral_v<BaseType<U>>)
    {
        return StaticCast<T>(u);
    }
    else
    {
        RT out{};

#pragma unroll
        for (int e = 0; e < nvcv::cuda::NumElements<RT>; ++e)
        {
            GetElement(out, e) = RoundImpl<T, BaseType<U>, static_cast<int>(RM)>(GetElement(u, e));
        }

        return out;
    }
}

} // namespace detail

template<RoundMode RM, typename T, typename U,
         class = Require<(!std::is_same_v<T, U>)&&((NumComponents<T> == NumComponents<U>)
                                                   || (NumComponents<T> == 0 && HasTypeTraits<U>))>>
inline __host__ __device__ auto round(U u)
{
    return detail::RoundImpl<BaseType<T>, U, ConvertBaseTypeTo<BaseType<T>, U>, RM>(u);
}

template<typename T, typename U,
         class = Require<(!std::is_same_v<T, U>)&&((NumComponents<T> == NumComponents<U>)
                                                   || (NumComponents<T> == 0 && HasTypeTraits<U>))>>
inline __host__ __device__ auto round(U u)
{
    return detail::RoundImpl<BaseType<T>, U, ConvertBaseTypeTo<BaseType<T>, U>, RoundMode::DEFAULT>(u);
}

template<RoundMode RM, typename U>
inline __host__ __device__ auto round(U u)
{
    return detail::RoundImpl<BaseType<U>, U, U, RM>(u);
}

template<typename U>
inline __host__ __device__ auto round(U u)
{
    return detail::RoundImpl<BaseType<U>, U, U, RoundMode::DEFAULT>(u);
}

#define NVCV_CUDA_BINARY_SIMD(TYPE_U, INTRINSIC)                  \
    constexpr(std::is_same_v<U, TYPE_U>)                          \
    {                                                             \
        unsigned int r_a = *reinterpret_cast<unsigned int *>(&a); \
        unsigned int r_b = *reinterpret_cast<unsigned int *>(&b); \
        unsigned int ret = INTRINSIC(r_a, r_b);                   \
        return *reinterpret_cast<TYPE_U *>(&ret);                 \
    }

template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U min(U a, U b)
{
    // clang-format off
#ifdef __CUDA_ARCH__
    if NVCV_CUDA_BINARY_SIMD (short2, __vmins2)
    else if NVCV_CUDA_BINARY_SIMD (char4, __vmins4)
    else if NVCV_CUDA_BINARY_SIMD (ushort2, __vminu2)
    else if NVCV_CUDA_BINARY_SIMD (uchar4, __vminu4)
    else
#endif
    {
        U out{};
#pragma unroll
        for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
        {
            GetElement(out, e) = detail::MinImpl(GetElement(a, e), GetElement(b, e));
        }
        return out;
    }
    // clang-format on
}

template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U max(U a, U b)
{
    // clang-format off
#ifdef __CUDA_ARCH__
    if NVCV_CUDA_BINARY_SIMD (short2, __vmaxs2)
    else if NVCV_CUDA_BINARY_SIMD (char4, __vmaxs4)
    else if NVCV_CUDA_BINARY_SIMD (ushort2, __vmaxu2)
    else if NVCV_CUDA_BINARY_SIMD (uchar4, __vmaxu4)
    else
#endif
    {
        U out{};
#pragma unroll
        for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
        {
            GetElement(out, e) = detail::MaxImpl(GetElement(a, e), GetElement(b, e));
        }
        return out;
    }
    // clang-format on
}

#undef NVCV_CUDA_BINARY_SIMD

template<typename U, typename S,
         class = Require<(NumComponents<U> == NumComponents<S>) || (HasTypeTraits<U> && NumComponents<S> == 0)>>
inline __host__ __device__ U pow(U x, S y)
{
    U out{};

#pragma unroll
    for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
    {
        GetElement(out, e) = detail::PowImpl(GetElement(x, e), GetElement(y, e));
    }

    return out;
}

template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U exp(U u)
{
    U out{};

#pragma unroll
    for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
    {
        GetElement(out, e) = detail::ExpImpl(GetElement(u, e));
    }

    return out;
}

template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U sqrt(U u)
{
    U out{};

#pragma unroll
    for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
    {
        GetElement(out, e) = detail::SqrtImpl(GetElement(u, e));
    }

    return out;
}

#define NVCV_CUDA_UNARY_SIMD(TYPE_U, INTRINSIC)                   \
    constexpr(std::is_same_v<U, TYPE_U>)                          \
    {                                                             \
        unsigned int r_u = *reinterpret_cast<unsigned int *>(&u); \
        unsigned int ret = INTRINSIC(r_u);                        \
        return *reinterpret_cast<TYPE_U *>(&ret);                 \
    }

template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U abs(U u)
{
    // clang-format off
#ifdef __CUDA_ARCH__
    if NVCV_CUDA_UNARY_SIMD (short2, __vabsss2)
    else if NVCV_CUDA_UNARY_SIMD (char4, __vabsss4)
    else
#endif
    {
        U out{};
#pragma unroll
        for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
        {
            GetElement(out, e) = detail::AbsImpl(GetElement(u, e));
        }
        return out;
    }
    // clang-format on
}

#undef NVCV_CUDA_UNARY_SIMD

template<typename U, typename S,
         class = Require<(NumComponents<U> == NumComponents<S>) || (HasTypeTraits<U> && NumComponents<S> == 0)>>
inline __host__ __device__ U clamp(U u, S lo, S hi)
{
    U out{};

#pragma unroll
    for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
    {
        GetElement(out, e) = detail::ClampImpl(GetElement(u, e), GetElement(lo, e), GetElement(hi, e));
    }

    return out;
}

} // namespace nvcv::cuda

#endif // NVCV_CUDA_MATH_WRAPPERS_HPP