Path: blob/main/contrib/llvm-project/clang/lib/Headers/__clang_cuda_cmath.h
35233 views
/*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===1*2* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3* See https://llvm.org/LICENSE.txt for license information.4* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5*6*===-----------------------------------------------------------------------===7*/8#ifndef __CLANG_CUDA_CMATH_H__9#define __CLANG_CUDA_CMATH_H__10#ifndef __CUDA__11#error "This file is for CUDA compilation only."12#endif1314#ifndef __OPENMP_NVPTX__15#include <limits>16#endif1718// CUDA lets us use various std math functions on the device side. This file19// works in concert with __clang_cuda_math_forward_declares.h to make this work.20//21// Specifically, the forward-declares header declares __device__ overloads for22// these functions in the global namespace, then pulls them into namespace std23// with 'using' statements. Then this file implements those functions, after24// their implementations have been pulled in.25//26// It's important that we declare the functions in the global namespace and pull27// them into namespace std with using statements, as opposed to simply declaring28// these functions in namespace std, because our device functions need to29// overload the standard library functions, which may be declared in the global30// namespace or in std, depending on the degree of conformance of the stdlib31// implementation. Declaring in the global namespace and pulling into namespace32// std covers all of the known knowns.3334#ifdef __OPENMP_NVPTX__35#define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))36#else37#define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))38#endif3940__DEVICE__ long long abs(long long __n) { return ::llabs(__n); }41__DEVICE__ long abs(long __n) { return ::labs(__n); }42__DEVICE__ float abs(float __x) { return ::fabsf(__x); }43__DEVICE__ double abs(double __x) { return ::fabs(__x); }44__DEVICE__ float acos(float __x) { return ::acosf(__x); }45__DEVICE__ float asin(float __x) { return ::asinf(__x); }46__DEVICE__ float atan(float __x) { return ::atanf(__x); }47__DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }48__DEVICE__ float ceil(float __x) { return ::ceilf(__x); }49__DEVICE__ float cos(float __x) { return ::cosf(__x); }50__DEVICE__ float cosh(float __x) { return ::coshf(__x); }51__DEVICE__ float exp(float __x) { return ::expf(__x); }52__DEVICE__ float fabs(float __x) { return ::fabsf(__x); }53__DEVICE__ float floor(float __x) { return ::floorf(__x); }54__DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }55__DEVICE__ int fpclassify(float __x) {56return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,57FP_ZERO, __x);58}59__DEVICE__ int fpclassify(double __x) {60return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,61FP_ZERO, __x);62}63__DEVICE__ float frexp(float __arg, int *__exp) {64return ::frexpf(__arg, __exp);65}6667// For inscrutable reasons, the CUDA headers define these functions for us on68// Windows.69#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)7071// For OpenMP we work around some old system headers that have non-conforming72// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do73// this by providing two versions of these functions, differing only in the74// return type. To avoid conflicting definitions we disable implicit base75// function generation. That means we will end up with two specializations, one76// per type, but only one has a base function defined by the system header.77#if defined(__OPENMP_NVPTX__)78#pragma omp begin declare variant match( \79implementation = {extension(disable_implicit_base)})8081// FIXME: We lack an extension to customize the mangling of the variants, e.g.,82// add a suffix. This means we would clash with the names of the variants83// (note that we do not create implicit base functions here). To avoid84// this clash we add a new trait to some of them that is always true85// (this is LLVM after all ;)). It will only influence the mangled name86// of the variants inside the inner region and avoid the clash.87#pragma omp begin declare variant match(implementation = {vendor(llvm)})8889__DEVICE__ int isinf(float __x) { return ::__isinff(__x); }90__DEVICE__ int isinf(double __x) { return ::__isinf(__x); }91__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }92__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }93__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }94__DEVICE__ int isnan(double __x) { return ::__isnan(__x); }9596#pragma omp end declare variant9798#endif99100__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }101__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }102__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }103// For inscrutable reasons, __finite(), the double-precision version of104// __finitef, does not exist when compiling for MacOS. __isfinited is available105// everywhere and is just as good.106__DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }107__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }108__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }109110#if defined(__OPENMP_NVPTX__)111#pragma omp end declare variant112#endif113114#endif115116__DEVICE__ bool isgreater(float __x, float __y) {117return __builtin_isgreater(__x, __y);118}119__DEVICE__ bool isgreater(double __x, double __y) {120return __builtin_isgreater(__x, __y);121}122__DEVICE__ bool isgreaterequal(float __x, float __y) {123return __builtin_isgreaterequal(__x, __y);124}125__DEVICE__ bool isgreaterequal(double __x, double __y) {126return __builtin_isgreaterequal(__x, __y);127}128__DEVICE__ bool isless(float __x, float __y) {129return __builtin_isless(__x, __y);130}131__DEVICE__ bool isless(double __x, double __y) {132return __builtin_isless(__x, __y);133}134__DEVICE__ bool islessequal(float __x, float __y) {135return __builtin_islessequal(__x, __y);136}137__DEVICE__ bool islessequal(double __x, double __y) {138return __builtin_islessequal(__x, __y);139}140__DEVICE__ bool islessgreater(float __x, float __y) {141return __builtin_islessgreater(__x, __y);142}143__DEVICE__ bool islessgreater(double __x, double __y) {144return __builtin_islessgreater(__x, __y);145}146__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }147__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }148__DEVICE__ bool isunordered(float __x, float __y) {149return __builtin_isunordered(__x, __y);150}151__DEVICE__ bool isunordered(double __x, double __y) {152return __builtin_isunordered(__x, __y);153}154__DEVICE__ float ldexp(float __arg, int __exp) {155return ::ldexpf(__arg, __exp);156}157__DEVICE__ float log(float __x) { return ::logf(__x); }158__DEVICE__ float log10(float __x) { return ::log10f(__x); }159__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }160__DEVICE__ float pow(float __base, float __exp) {161return ::powf(__base, __exp);162}163__DEVICE__ float pow(float __base, int __iexp) {164return ::powif(__base, __iexp);165}166__DEVICE__ double pow(double __base, int __iexp) {167return ::powi(__base, __iexp);168}169__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }170__DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }171__DEVICE__ float sin(float __x) { return ::sinf(__x); }172__DEVICE__ float sinh(float __x) { return ::sinhf(__x); }173__DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }174__DEVICE__ float tan(float __x) { return ::tanf(__x); }175__DEVICE__ float tanh(float __x) { return ::tanhf(__x); }176177// There was a redefinition error for this this overload in CUDA mode.178// We restrict it to OpenMP mode for now, that is where it is actually needed179// anyway.180#ifdef __OPENMP_NVPTX__181__DEVICE__ float remquo(float __n, float __d, int *__q) {182return ::remquof(__n, __d, __q);183}184#endif185186// Notably missing above is nexttoward. We omit it because187// libdevice doesn't provide an implementation, and we don't want to be in the188// business of implementing tricky libm functions in this header.189190#ifndef __OPENMP_NVPTX__191192// Now we've defined everything we promised we'd define in193// __clang_cuda_math_forward_declares.h. We need to do two additional things to194// fix up our math functions.195//196// 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define197// only sin(float) and sin(double), which means that e.g. sin(0) is198// ambiguous.199//200// 2) Pull the __device__ overloads of "foobarf" math functions into namespace201// std. These are defined in the CUDA headers in the global namespace,202// independent of everything else we've done here.203204// We can't use std::enable_if, because we want to be pre-C++11 compatible. But205// we go ahead and unconditionally define functions that are only available when206// compiling for C++11 to match the behavior of the CUDA headers.207template<bool __B, class __T = void>208struct __clang_cuda_enable_if {};209210template <class __T> struct __clang_cuda_enable_if<true, __T> {211typedef __T type;212};213214// Defines an overload of __fn that accepts one integral argument, calls215// __fn((double)x), and returns __retty.216#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \217template <typename __T> \218__DEVICE__ \219typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \220__retty>::type \221__fn(__T __x) { \222return ::__fn((double)__x); \223}224225// Defines an overload of __fn that accepts one two arithmetic arguments, calls226// __fn((double)x, (double)y), and returns a double.227//228// Note this is different from OVERLOAD_1, which generates an overload that229// accepts only *integral* arguments.230#define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \231template <typename __T1, typename __T2> \232__DEVICE__ typename __clang_cuda_enable_if< \233std::numeric_limits<__T1>::is_specialized && \234std::numeric_limits<__T2>::is_specialized, \235__retty>::type \236__fn(__T1 __x, __T2 __y) { \237return __fn((double)__x, (double)__y); \238}239240__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)241__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)242__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)243__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)244__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)245__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);246__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)247__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)248__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)249__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);250__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)251__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)252__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)253__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)254__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)255__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)256__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)257__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)258__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);259__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)260__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);261__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);262__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);263__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)264__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);265__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)266__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)267__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);268__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);269__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);270__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);271__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);272__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);273__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);274__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)275__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);276__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)277__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)278__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)279__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)280__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)281__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)282__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)283__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)284__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)285__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)286__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);287__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);288__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);289__CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);290__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);291__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);292__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)293__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)294__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)295__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)296__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)297__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)298__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)299__CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);300301#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1302#undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2303304// Overloads for functions that don't match the patterns expected by305// __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.306template <typename __T1, typename __T2, typename __T3>307__DEVICE__ typename __clang_cuda_enable_if<308std::numeric_limits<__T1>::is_specialized &&309std::numeric_limits<__T2>::is_specialized &&310std::numeric_limits<__T3>::is_specialized,311double>::type312fma(__T1 __x, __T2 __y, __T3 __z) {313return std::fma((double)__x, (double)__y, (double)__z);314}315316template <typename __T>317__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,318double>::type319frexp(__T __x, int *__exp) {320return std::frexp((double)__x, __exp);321}322323template <typename __T>324__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,325double>::type326ldexp(__T __x, int __exp) {327return std::ldexp((double)__x, __exp);328}329330template <typename __T1, typename __T2>331__DEVICE__ typename __clang_cuda_enable_if<332std::numeric_limits<__T1>::is_specialized &&333std::numeric_limits<__T2>::is_specialized,334double>::type335remquo(__T1 __x, __T2 __y, int *__quo) {336return std::remquo((double)__x, (double)__y, __quo);337}338339template <typename __T>340__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,341double>::type342scalbln(__T __x, long __exp) {343return std::scalbln((double)__x, __exp);344}345346template <typename __T>347__DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,348double>::type349scalbn(__T __x, int __exp) {350return std::scalbn((double)__x, __exp);351}352353// We need to define these overloads in exactly the namespace our standard354// library uses (including the right inline namespace), otherwise they won't be355// picked up by other functions in the standard library (e.g. functions in356// <complex>). Thus the ugliness below.357#ifdef _LIBCPP_BEGIN_NAMESPACE_STD358_LIBCPP_BEGIN_NAMESPACE_STD359#else360namespace std {361#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION362_GLIBCXX_BEGIN_NAMESPACE_VERSION363#endif364#endif365366// Pull the new overloads we defined above into namespace std.367using ::acos;368using ::acosh;369using ::asin;370using ::asinh;371using ::atan;372using ::atan2;373using ::atanh;374using ::cbrt;375using ::ceil;376using ::copysign;377using ::cos;378using ::cosh;379using ::erf;380using ::erfc;381using ::exp;382using ::exp2;383using ::expm1;384using ::fabs;385using ::fdim;386using ::floor;387using ::fma;388using ::fmax;389using ::fmin;390using ::fmod;391using ::fpclassify;392using ::frexp;393using ::hypot;394using ::ilogb;395using ::isfinite;396using ::isgreater;397using ::isgreaterequal;398using ::isless;399using ::islessequal;400using ::islessgreater;401using ::isnormal;402using ::isunordered;403using ::ldexp;404using ::lgamma;405using ::llrint;406using ::llround;407using ::log;408using ::log10;409using ::log1p;410using ::log2;411using ::logb;412using ::lrint;413using ::lround;414using ::nearbyint;415using ::nextafter;416using ::pow;417using ::remainder;418using ::remquo;419using ::rint;420using ::round;421using ::scalbln;422using ::scalbn;423using ::signbit;424using ::sin;425using ::sinh;426using ::sqrt;427using ::tan;428using ::tanh;429using ::tgamma;430using ::trunc;431432// Well this is fun: We need to pull these symbols in for libc++, but we can't433// pull them in with libstdc++, because its ::isinf and ::isnan are different434// than its std::isinf and std::isnan.435#ifndef __GLIBCXX__436using ::isinf;437using ::isnan;438#endif439440// Finally, pull the "foobarf" functions that CUDA defines in its headers into441// namespace std.442using ::acosf;443using ::acoshf;444using ::asinf;445using ::asinhf;446using ::atan2f;447using ::atanf;448using ::atanhf;449using ::cbrtf;450using ::ceilf;451using ::copysignf;452using ::cosf;453using ::coshf;454using ::erfcf;455using ::erff;456using ::exp2f;457using ::expf;458using ::expm1f;459using ::fabsf;460using ::fdimf;461using ::floorf;462using ::fmaf;463using ::fmaxf;464using ::fminf;465using ::fmodf;466using ::frexpf;467using ::hypotf;468using ::ilogbf;469using ::ldexpf;470using ::lgammaf;471using ::llrintf;472using ::llroundf;473using ::log10f;474using ::log1pf;475using ::log2f;476using ::logbf;477using ::logf;478using ::lrintf;479using ::lroundf;480using ::modff;481using ::nearbyintf;482using ::nextafterf;483using ::powf;484using ::remainderf;485using ::remquof;486using ::rintf;487using ::roundf;488using ::scalblnf;489using ::scalbnf;490using ::sinf;491using ::sinhf;492using ::sqrtf;493using ::tanf;494using ::tanhf;495using ::tgammaf;496using ::truncf;497498#ifdef _LIBCPP_END_NAMESPACE_STD499_LIBCPP_END_NAMESPACE_STD500#else501#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION502_GLIBCXX_END_NAMESPACE_VERSION503#endif504} // namespace std505#endif506507#endif // __OPENMP_NVPTX__508509#undef __DEVICE__510511#endif512513514