Path: blob/master/thirdparty/embree/common/math/math_sycl.h
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../sys/platform.h"6#include "../sys/intrinsics.h"7#include "constants.h"8#include <cmath>910namespace embree11{12__forceinline bool isvalid ( const float& v ) {13return (v > -FLT_LARGE) & (v < +FLT_LARGE);14}1516__forceinline int cast_f2i(float f) {17return __builtin_bit_cast(int,f);18}1920__forceinline float cast_i2f(int i) {21return __builtin_bit_cast(float,i);22}2324__forceinline int toInt (const float& a) { return int(a); }25__forceinline float toFloat(const int& a) { return float(a); }2627__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }28__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }2930//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }31__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }32__forceinline float sqr ( const float x ) { return x*x; }3334__forceinline float rcp ( const float x ) {35return sycl::native::recip(x);36}3738__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }39//__forceinline float signmsk ( const float x ) {40// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));41//}42//__forceinline float xorf( const float x, const float y ) {43// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));44//}45//__forceinline float andf( const float x, const unsigned y ) {46// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));47//}4849__forceinline float rsqrt( const float x ) {50return sycl::rsqrt(x);51}5253//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }54//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }55//__forceinline int roundf(float f) { return (int)(f + 0.5f); }5657__forceinline float abs ( const float x ) { return sycl::fabs(x); }58__forceinline float acos ( const float x ) { return sycl::acos(x); }59__forceinline float asin ( const float x ) { return sycl::asin(x); }60__forceinline float atan ( const float x ) { return sycl::atan(x); }61__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }62__forceinline float cos ( const float x ) { return sycl::cos(x); }63__forceinline float cosh ( const float x ) { return sycl::cosh(x); }64__forceinline float exp ( const float x ) { return sycl::exp(x); }65__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }66__forceinline float log ( const float x ) { return sycl::log(x); }67__forceinline float log10( const float x ) { return sycl::log10(x); }68__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }69__forceinline float sin ( const float x ) { return sycl::sin(x); }70__forceinline float sinh ( const float x ) { return sycl::sinh(x); }71__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }72__forceinline float tan ( const float x ) { return sycl::tan(x); }73__forceinline float tanh ( const float x ) { return sycl::tanh(x); }74__forceinline float floor( const float x ) { return sycl::floor(x); }75__forceinline float ceil ( const float x ) { return sycl::ceil(x); }76__forceinline float frac ( const float x ) { return x-floor(x); }7778//__forceinline double abs ( const double x ) { return ::fabs(x); }79//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }80//__forceinline double acos ( const double x ) { return ::acos (x); }81//__forceinline double asin ( const double x ) { return ::asin (x); }82//__forceinline double atan ( const double x ) { return ::atan (x); }83//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }84//__forceinline double cos ( const double x ) { return ::cos (x); }85//__forceinline double cosh ( const double x ) { return ::cosh (x); }86//__forceinline double exp ( const double x ) { return ::exp (x); }87//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }88//__forceinline double log ( const double x ) { return ::log (x); }89//__forceinline double log10( const double x ) { return ::log10(x); }90//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }91//__forceinline double rcp ( const double x ) { return 1.0/x; }92//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }93//__forceinline double sin ( const double x ) { return ::sin (x); }94//__forceinline double sinh ( const double x ) { return ::sinh (x); }95//__forceinline double sqr ( const double x ) { return x*x; }96//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }97//__forceinline double tan ( const double x ) { return ::tan (x); }98//__forceinline double tanh ( const double x ) { return ::tanh (x); }99//__forceinline double floor( const double x ) { return ::floor (x); }100//__forceinline double ceil ( const double x ) { return ::ceil (x); }101102/*103#if defined(__SSE4_1__)104__forceinline float mini(float a, float b) {105const __m128i ai = _mm_castps_si128(_mm_set_ss(a));106const __m128i bi = _mm_castps_si128(_mm_set_ss(b));107const __m128i ci = _mm_min_epi32(ai,bi);108return _mm_cvtss_f32(_mm_castsi128_ps(ci));109}110#endif111112#if defined(__SSE4_1__)113__forceinline float maxi(float a, float b) {114const __m128i ai = _mm_castps_si128(_mm_set_ss(a));115const __m128i bi = _mm_castps_si128(_mm_set_ss(b));116const __m128i ci = _mm_max_epi32(ai,bi);117return _mm_cvtss_f32(_mm_castsi128_ps(ci));118}119#endif120*/121122template<typename T>123__forceinline T twice(const T& a) { return a+a; }124125__forceinline int min(int a, int b) { return sycl::min(a,b); }126__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }127__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }128__forceinline float min(float a, float b) { return sycl::fmin(a,b); }129__forceinline double min(double a, double b) { return sycl::fmin(a,b); }130#if defined(__X86_64__)131__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }132#endif133134template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }135template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }136template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }137138// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }139// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }140// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }141142__forceinline int max(int a, int b) { return sycl::max(a,b); }143__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }144__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }145__forceinline float max(float a, float b) { return sycl::fmax(a,b); }146__forceinline double max(double a, double b) { return sycl::fmax(a,b); }147#if defined(__X86_64__)148__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }149#endif150151template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }152template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }153template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }154155// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }156// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }157// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }158159template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }160template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }161162template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }163template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }164template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }165template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }166167__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }168__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }169__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }170__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }171172/*! random functions */173/*174template<typename T> T random() { return T(0); }175template<> __forceinline int random() { return int(rand()); }176template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }177template<> __forceinline float random() { return rand()/float(RAND_MAX); }178template<> __forceinline double random() { return rand()/double(RAND_MAX); }179*/180181/*! selects */182__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }183__forceinline int select(bool s, int t, int f) { return s ? t : f; }184__forceinline float select(bool s, float t, float f) { return s ? t : f; }185186__forceinline bool none(bool s) { return !s; }187__forceinline bool all (bool s) { return s; }188__forceinline bool any (bool s) { return s; }189190__forceinline unsigned movemask (bool s) { return (unsigned)s; }191192__forceinline float lerp(const float v0, const float v1, const float t) {193return madd(1.0f-t,v0,t*v1);194}195196template<typename T>197__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {198return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));199}200201/*! exchange */202template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }203204/* load/store */205template<typename Ty> struct mem;206207template<> struct mem<float> {208static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }209static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }210211static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }212static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }213};214215/*! bit reverse operation */216template<class T>217__forceinline T bitReverse(const T& vin)218{219T v = vin;220v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);221v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);222v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);223v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);224v = ( v >> 16 ) | ( v << 16);225return v;226}227228/*! bit interleave operation */229template<class T>230__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)231{232T x = xin, y = yin, z = zin;233x = (x | (x << 16)) & 0x030000FF;234x = (x | (x << 8)) & 0x0300F00F;235x = (x | (x << 4)) & 0x030C30C3;236x = (x | (x << 2)) & 0x09249249;237238y = (y | (y << 16)) & 0x030000FF;239y = (y | (y << 8)) & 0x0300F00F;240y = (y | (y << 4)) & 0x030C30C3;241y = (y | (y << 2)) & 0x09249249;242243z = (z | (z << 16)) & 0x030000FF;244z = (z | (z << 8)) & 0x0300F00F;245z = (z | (z << 4)) & 0x030C30C3;246z = (z | (z << 2)) & 0x09249249;247248return x | (y << 1) | (z << 2);249}250251/*! bit interleave operation for 64bit data types*/252template<class T>253__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){254T x = xin & 0x1fffff;255T y = yin & 0x1fffff;256T z = zin & 0x1fffff;257258x = (x | x << 32) & 0x1f00000000ffff;259x = (x | x << 16) & 0x1f0000ff0000ff;260x = (x | x << 8) & 0x100f00f00f00f00f;261x = (x | x << 4) & 0x10c30c30c30c30c3;262x = (x | x << 2) & 0x1249249249249249;263264y = (y | y << 32) & 0x1f00000000ffff;265y = (y | y << 16) & 0x1f0000ff0000ff;266y = (y | y << 8) & 0x100f00f00f00f00f;267y = (y | y << 4) & 0x10c30c30c30c30c3;268y = (y | y << 2) & 0x1249249249249249;269270z = (z | z << 32) & 0x1f00000000ffff;271z = (z | z << 16) & 0x1f0000ff0000ff;272z = (z | z << 8) & 0x100f00f00f00f00f;273z = (z | z << 4) & 0x10c30c30c30c30c3;274z = (z | z << 2) & 0x1249249249249249;275276return x | (y << 1) | (z << 2);277}278}279280281