Program Listing for File MathWrappers.hpp
↰ Return to documentation for file (nvcv_types/include/nvcv/cuda/MathWrappers.hpp
)
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef NVCV_CUDA_MATH_WRAPPERS_HPP
#define NVCV_CUDA_MATH_WRAPPERS_HPP
#include "StaticCast.hpp" // for StaticCast, etc.
#include "TypeTraits.hpp" // for Require, etc.
#include "detail/MathWrappersImpl.hpp" // for MathWrappersImpl, etc.
#include <cfenv> // for FE_TONEAREST, etc.
namespace nvcv::cuda {
// Represents the round mode to be used by \ref round function.
enum class RoundMode : int
{
NEAREST = FE_TONEAREST, //< Round to nearest even mode, as in "*_rn" CUDA device functions
DOWN = FE_DOWNWARD, //< Round down mode, as in "*_rd" CUDA device functions
UP = FE_UPWARD, //< Round up mode, as in "*_ru" CUDA device functions
ZERO = FE_TOWARDZERO, //< Round towards zero mode, as in "*_rz" CUDA device functions
DEFAULT = NEAREST //< Default round mode is round to nearest even mode
};
namespace detail {
template<typename T, typename U, typename RT, RoundMode RM>
inline __host__ __device__ RT RoundImpl(U u)
{
if constexpr (std::is_integral_v<BaseType<U>>)
{
return StaticCast<T>(u);
}
else
{
RT out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<RT>; ++e)
{
GetElement(out, e) = RoundImpl<T, BaseType<U>, static_cast<int>(RM)>(GetElement(u, e));
}
return out;
}
}
} // namespace detail
template<RoundMode RM, typename T, typename U,
class = Require<(!std::is_same_v<T, U>)&&((NumComponents<T> == NumComponents<U>)
|| (NumComponents<T> == 0 && HasTypeTraits<U>))>>
inline __host__ __device__ auto round(U u)
{
return detail::RoundImpl<BaseType<T>, U, ConvertBaseTypeTo<BaseType<T>, U>, RM>(u);
}
template<typename T, typename U,
class = Require<(!std::is_same_v<T, U>)&&((NumComponents<T> == NumComponents<U>)
|| (NumComponents<T> == 0 && HasTypeTraits<U>))>>
inline __host__ __device__ auto round(U u)
{
return detail::RoundImpl<BaseType<T>, U, ConvertBaseTypeTo<BaseType<T>, U>, RoundMode::DEFAULT>(u);
}
template<RoundMode RM, typename U>
inline __host__ __device__ auto round(U u)
{
return detail::RoundImpl<BaseType<U>, U, U, RM>(u);
}
template<typename U>
inline __host__ __device__ auto round(U u)
{
return detail::RoundImpl<BaseType<U>, U, U, RoundMode::DEFAULT>(u);
}
#define NVCV_CUDA_BINARY_SIMD(TYPE_U, INTRINSIC) \
constexpr(std::is_same_v<U, TYPE_U>) \
{ \
unsigned int r_a = *reinterpret_cast<unsigned int *>(&a); \
unsigned int r_b = *reinterpret_cast<unsigned int *>(&b); \
unsigned int ret = INTRINSIC(r_a, r_b); \
return *reinterpret_cast<TYPE_U *>(&ret); \
}
template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U min(U a, U b)
{
// clang-format off
#ifdef __CUDA_ARCH__
if NVCV_CUDA_BINARY_SIMD (short2, __vmins2)
else if NVCV_CUDA_BINARY_SIMD (char4, __vmins4)
else if NVCV_CUDA_BINARY_SIMD (ushort2, __vminu2)
else if NVCV_CUDA_BINARY_SIMD (uchar4, __vminu4)
else
#endif
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::MinImpl(GetElement(a, e), GetElement(b, e));
}
return out;
}
// clang-format on
}
template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U max(U a, U b)
{
// clang-format off
#ifdef __CUDA_ARCH__
if NVCV_CUDA_BINARY_SIMD (short2, __vmaxs2)
else if NVCV_CUDA_BINARY_SIMD (char4, __vmaxs4)
else if NVCV_CUDA_BINARY_SIMD (ushort2, __vmaxu2)
else if NVCV_CUDA_BINARY_SIMD (uchar4, __vmaxu4)
else
#endif
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::MaxImpl(GetElement(a, e), GetElement(b, e));
}
return out;
}
// clang-format on
}
#undef NVCV_CUDA_BINARY_SIMD
template<typename U, typename S,
class = Require<(NumComponents<U> == NumComponents<S>) || (HasTypeTraits<U> && NumComponents<S> == 0)>>
inline __host__ __device__ U pow(U x, S y)
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::PowImpl(GetElement(x, e), GetElement(y, e));
}
return out;
}
template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U exp(U u)
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::ExpImpl(GetElement(u, e));
}
return out;
}
template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U sqrt(U u)
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::SqrtImpl(GetElement(u, e));
}
return out;
}
#define NVCV_CUDA_UNARY_SIMD(TYPE_U, INTRINSIC) \
constexpr(std::is_same_v<U, TYPE_U>) \
{ \
unsigned int r_u = *reinterpret_cast<unsigned int *>(&u); \
unsigned int ret = INTRINSIC(r_u); \
return *reinterpret_cast<TYPE_U *>(&ret); \
}
template<typename U, class = Require<HasTypeTraits<U>>>
inline __host__ __device__ U abs(U u)
{
// clang-format off
#ifdef __CUDA_ARCH__
if NVCV_CUDA_UNARY_SIMD (short2, __vabsss2)
else if NVCV_CUDA_UNARY_SIMD (char4, __vabsss4)
else
#endif
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::AbsImpl(GetElement(u, e));
}
return out;
}
// clang-format on
}
#undef NVCV_CUDA_UNARY_SIMD
template<typename U, typename S,
class = Require<(NumComponents<U> == NumComponents<S>) || (HasTypeTraits<U> && NumComponents<S> == 0)>>
inline __host__ __device__ U clamp(U u, S lo, S hi)
{
U out{};
#pragma unroll
for (int e = 0; e < nvcv::cuda::NumElements<U>; ++e)
{
GetElement(out, e) = detail::ClampImpl(GetElement(u, e), GetElement(lo, e), GetElement(hi, e));
}
return out;
}
} // namespace nvcv::cuda
#endif // NVCV_CUDA_MATH_WRAPPERS_HPP