Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/math/emath.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../sys/platform.h"
7
#include "../sys/intrinsics.h"
8
#include "constants.h"
9
#include <cmath>
10
11
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
12
# include "math_sycl.h"
13
#else
14
15
#if defined(__ARM_NEON)
16
#include "../simd/arm/emulation.h"
17
#else
18
#include <emmintrin.h>
19
#include <xmmintrin.h>
20
#include <immintrin.h>
21
#endif
22
23
#if defined(__WIN32__)
24
#if defined(_MSC_VER) && (_MSC_VER <= 1700)
25
namespace std
26
{
27
__forceinline bool isinf ( const float x ) { return _finite(x) == 0; }
28
__forceinline bool isnan ( const float x ) { return _isnan(x) != 0; }
29
__forceinline bool isfinite (const float x) { return _finite(x) != 0; }
30
}
31
#endif
32
#endif
33
34
namespace embree
35
{
36
__forceinline bool isvalid ( const float& v ) {
37
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
38
}
39
40
__forceinline int cast_f2i(float f) {
41
union { float f; int i; } v; v.f = f; return v.i;
42
}
43
44
__forceinline float cast_i2f(int i) {
45
union { float f; int i; } v; v.i = i; return v.f;
46
}
47
48
__forceinline int toInt (const float& a) { return int(a); }
49
__forceinline float toFloat(const int& a) { return float(a); }
50
51
__forceinline int asInt (const float& a) { return *((int*)&a); }
52
__forceinline float asFloat(const int& a) { return *((float*)&a); }
53
54
#if defined(__WIN32__)
55
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
56
#endif
57
58
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
59
__forceinline float sqr ( const float x ) { return x*x; }
60
61
__forceinline float rcp ( const float x )
62
{
63
#if defined(__aarch64__)
64
// Move scalar to vector register and do rcp.
65
__m128 a;
66
a[0] = x;
67
float32x4_t reciprocal = vrecpeq_f32(a);
68
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
69
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
70
return reciprocal[0];
71
#else
72
73
const __m128 a = _mm_set_ss(x);
74
75
#if defined(__AVX512VL__)
76
const __m128 r = _mm_rcp14_ss(_mm_set_ss(0.0f),a);
77
#else
78
const __m128 r = _mm_rcp_ss(a);
79
#endif
80
81
#if defined(__AVX2__)
82
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_fnmadd_ss(r, a, _mm_set_ss(2.0f))));
83
#else
84
return _mm_cvtss_f32(_mm_mul_ss(r,_mm_sub_ss(_mm_set_ss(2.0f), _mm_mul_ss(r, a))));
85
#endif
86
87
#endif //defined(__aarch64__)
88
}
89
90
__forceinline float signmsk ( const float x ) {
91
#if defined(__aarch64__)
92
// FP and Neon shares same vector register in arm64
93
__m128 a;
94
__m128i b;
95
a[0] = x;
96
b[0] = 0x80000000;
97
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
98
return a[0];
99
#else
100
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
101
#endif
102
}
103
__forceinline float xorf( const float x, const float y ) {
104
#if defined(__aarch64__)
105
// FP and Neon shares same vector register in arm64
106
__m128 a;
107
__m128 b;
108
a[0] = x;
109
b[0] = y;
110
a = _mm_xor_ps(a, b);
111
return a[0];
112
#else
113
return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
114
#endif
115
}
116
__forceinline float andf( const float x, const unsigned y ) {
117
#if defined(__aarch64__)
118
// FP and Neon shares same vector register in arm64
119
__m128 a;
120
__m128i b;
121
a[0] = x;
122
b[0] = y;
123
a = _mm_and_ps(a, vreinterpretq_f32_s32(b));
124
return a[0];
125
#else
126
return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
127
#endif
128
}
129
__forceinline float rsqrt( const float x )
130
{
131
#if defined(__aarch64__)
132
// FP and Neon shares same vector register in arm64
133
__m128 a;
134
a[0] = x;
135
__m128 value = _mm_rsqrt_ps(a);
136
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
137
value = vmulq_f32(value, vrsqrtsq_f32(vmulq_f32(a, value), value));
138
return value[0];
139
#else
140
141
const __m128 a = _mm_set_ss(x);
142
#if defined(__AVX512VL__)
143
__m128 r = _mm_rsqrt14_ss(_mm_set_ss(0.0f),a);
144
#else
145
__m128 r = _mm_rsqrt_ss(a);
146
#endif
147
const __m128 c = _mm_add_ss(_mm_mul_ss(_mm_set_ss(1.5f), r),
148
_mm_mul_ss(_mm_mul_ss(_mm_mul_ss(a, _mm_set_ss(-0.5f)), r), _mm_mul_ss(r, r)));
149
return _mm_cvtss_f32(c);
150
#endif
151
}
152
153
#if defined(__WIN32__) && defined(_MSC_VER) && (_MSC_VER <= 1700)
154
__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
155
__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
156
__forceinline int roundf(float f) { return (int)(f + 0.5f); }
157
#else
158
__forceinline float nextafter(float x, float y) { return ::nextafterf(x, y); }
159
__forceinline double nextafter(double x, double y) { return ::nextafter(x, y); }
160
#endif
161
162
__forceinline float abs ( const float x ) { return ::fabsf(x); }
163
__forceinline float acos ( const float x ) { return ::acosf (x); }
164
__forceinline float asin ( const float x ) { return ::asinf (x); }
165
__forceinline float atan ( const float x ) { return ::atanf (x); }
166
__forceinline float atan2( const float y, const float x ) { return ::atan2f(y, x); }
167
__forceinline float cos ( const float x ) { return ::cosf (x); }
168
__forceinline float cosh ( const float x ) { return ::coshf (x); }
169
__forceinline float exp ( const float x ) { return ::expf (x); }
170
__forceinline float fmod ( const float x, const float y ) { return ::fmodf (x, y); }
171
__forceinline float log ( const float x ) { return ::logf (x); }
172
__forceinline float log10( const float x ) { return ::log10f(x); }
173
__forceinline float pow ( const float x, const float y ) { return ::powf (x, y); }
174
__forceinline float sin ( const float x ) { return ::sinf (x); }
175
__forceinline float sinh ( const float x ) { return ::sinhf (x); }
176
__forceinline float sqrt ( const float x ) { return ::sqrtf (x); }
177
__forceinline float tan ( const float x ) { return ::tanf (x); }
178
__forceinline float tanh ( const float x ) { return ::tanhf (x); }
179
__forceinline float floor( const float x ) { return ::floorf (x); }
180
__forceinline float ceil ( const float x ) { return ::ceilf (x); }
181
__forceinline float frac ( const float x ) { return x-floor(x); }
182
183
__forceinline double abs ( const double x ) { return ::fabs(x); }
184
__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
185
__forceinline double acos ( const double x ) { return ::acos (x); }
186
__forceinline double asin ( const double x ) { return ::asin (x); }
187
__forceinline double atan ( const double x ) { return ::atan (x); }
188
__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
189
__forceinline double cos ( const double x ) { return ::cos (x); }
190
__forceinline double cosh ( const double x ) { return ::cosh (x); }
191
__forceinline double exp ( const double x ) { return ::exp (x); }
192
__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
193
__forceinline double log ( const double x ) { return ::log (x); }
194
__forceinline double log10( const double x ) { return ::log10(x); }
195
__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
196
__forceinline double rcp ( const double x ) { return 1.0/x; }
197
__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
198
__forceinline double sin ( const double x ) { return ::sin (x); }
199
__forceinline double sinh ( const double x ) { return ::sinh (x); }
200
__forceinline double sqr ( const double x ) { return x*x; }
201
__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
202
__forceinline double tan ( const double x ) { return ::tan (x); }
203
__forceinline double tanh ( const double x ) { return ::tanh (x); }
204
__forceinline double floor( const double x ) { return ::floor (x); }
205
__forceinline double ceil ( const double x ) { return ::ceil (x); }
206
207
#if defined(__aarch64__)
208
__forceinline float mini(float a, float b) {
209
// FP and Neon shares same vector register in arm64
210
__m128 x;
211
__m128 y;
212
x[0] = a;
213
y[0] = b;
214
x = _mm_min_ps(x, y);
215
return x[0];
216
}
217
#elif defined(__SSE4_1__)
218
__forceinline float mini(float a, float b) {
219
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
220
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
221
const __m128i ci = _mm_min_epi32(ai,bi);
222
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
223
}
224
#endif
225
226
#if defined(__aarch64__)
227
__forceinline float maxi(float a, float b) {
228
// FP and Neon shares same vector register in arm64
229
__m128 x;
230
__m128 y;
231
x[0] = a;
232
y[0] = b;
233
x = _mm_max_ps(x, y);
234
return x[0];
235
}
236
#elif defined(__SSE4_1__)
237
__forceinline float maxi(float a, float b) {
238
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
239
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
240
const __m128i ci = _mm_max_epi32(ai,bi);
241
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
242
}
243
#endif
244
245
template<typename T>
246
__forceinline T twice(const T& a) { return a+a; }
247
248
__forceinline int min(int a, int b) { return a<b ? a:b; }
249
__forceinline unsigned min(unsigned a, unsigned b) { return a<b ? a:b; }
250
__forceinline int64_t min(int64_t a, int64_t b) { return a<b ? a:b; }
251
__forceinline float min(float a, float b) { return a<b ? a:b; }
252
__forceinline double min(double a, double b) { return a<b ? a:b; }
253
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
254
__forceinline size_t min(size_t a, size_t b) { return a<b ? a:b; }
255
#endif
256
#if defined(__EMSCRIPTEN__)
257
__forceinline long min(long a, long b) { return a<b ? a:b; }
258
#endif
259
260
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
261
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
262
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
263
264
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
265
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
266
template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
267
268
__forceinline int max(int a, int b) { return a<b ? b:a; }
269
__forceinline unsigned max(unsigned a, unsigned b) { return a<b ? b:a; }
270
__forceinline int64_t max(int64_t a, int64_t b) { return a<b ? b:a; }
271
__forceinline float max(float a, float b) { return a<b ? b:a; }
272
__forceinline double max(double a, double b) { return a<b ? b:a; }
273
#if defined(__64BIT__) || defined(__EMSCRIPTEN__)
274
__forceinline size_t max(size_t a, size_t b) { return a<b ? b:a; }
275
#endif
276
#if defined(__EMSCRIPTEN__)
277
__forceinline long max(long a, long b) { return a<b ? b:a; }
278
#endif
279
280
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
281
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
282
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
283
284
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
285
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
286
template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
287
288
#if defined(__MACOSX__)
289
__forceinline ssize_t min(ssize_t a, ssize_t b) { return a<b ? a:b; }
290
__forceinline ssize_t max(ssize_t a, ssize_t b) { return a<b ? b:a; }
291
#endif
292
293
#if defined(__MACOSX__) && !defined(__INTEL_COMPILER)
294
__forceinline void sincosf(float x, float *sin, float *cos) {
295
__sincosf(x,sin,cos);
296
}
297
#endif
298
299
#if defined(__WIN32__) || defined(__FreeBSD__)
300
__forceinline void sincosf(float x, float *s, float *c) {
301
*s = sinf(x); *c = cosf(x);
302
}
303
#endif
304
305
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
306
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
307
308
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
309
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
310
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
311
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
312
313
#if defined(__AVX2__)
314
__forceinline float madd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
315
__forceinline float msub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
316
__forceinline float nmadd ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmadd_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
317
__forceinline float nmsub ( const float a, const float b, const float c) { return _mm_cvtss_f32(_mm_fnmsub_ss(_mm_set_ss(a),_mm_set_ss(b),_mm_set_ss(c))); }
318
319
#elif defined (__aarch64__) && defined(__clang__)
320
#pragma clang fp contract(fast)
321
__forceinline float madd ( const float a, const float b, const float c) { return a*b + c; }
322
__forceinline float msub ( const float a, const float b, const float c) { return a*b - c; }
323
__forceinline float nmadd ( const float a, const float b, const float c) { return c - a*b; }
324
__forceinline float nmsub ( const float a, const float b, const float c) { return -(c + a*b); }
325
#pragma clang fp contract(on)
326
327
#else
328
__forceinline float madd ( const float a, const float b, const float c) { return a*b+c; }
329
__forceinline float msub ( const float a, const float b, const float c) { return a*b-c; }
330
__forceinline float nmadd ( const float a, const float b, const float c) { return -a*b+c;}
331
__forceinline float nmsub ( const float a, const float b, const float c) { return -a*b-c; }
332
#endif
333
334
/*! random functions */
335
template<typename T> T random() { return T(0); }
336
#if defined(_WIN32)
337
template<> __forceinline int random() { return int(rand()) ^ (int(rand()) << 8) ^ (int(rand()) << 16); }
338
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 8) ^ (uint32_t(rand()) << 16); }
339
#else
340
template<> __forceinline int random() { return int(rand()); }
341
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
342
#endif
343
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
344
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
345
346
#if _WIN32
347
__forceinline double drand48() {
348
return double(rand())/double(RAND_MAX);
349
}
350
351
__forceinline void srand48(long seed) {
352
return srand(seed);
353
}
354
#endif
355
356
/*! selects */
357
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
358
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
359
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
360
361
__forceinline bool none(bool s) { return !s; }
362
__forceinline bool all (bool s) { return s; }
363
__forceinline bool any (bool s) { return s; }
364
365
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
366
367
__forceinline float lerp(const float v0, const float v1, const float t) {
368
return madd(1.0f-t,v0,t*v1);
369
}
370
371
template<typename T>
372
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
373
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
374
}
375
376
/*! exchange */
377
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
378
379
/* load/store */
380
template<typename Ty> struct mem;
381
382
template<> struct mem<float> {
383
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
384
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
385
386
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
387
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
388
};
389
390
/*! bit reverse operation */
391
template<class T>
392
__forceinline T bitReverse(const T& vin)
393
{
394
T v = vin;
395
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
396
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
397
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
398
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
399
v = ( v >> 16 ) | ( v << 16);
400
return v;
401
}
402
403
/*! bit interleave operation */
404
template<class T>
405
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
406
{
407
T x = xin, y = yin, z = zin;
408
x = (x | (x << 16)) & 0x030000FF;
409
x = (x | (x << 8)) & 0x0300F00F;
410
x = (x | (x << 4)) & 0x030C30C3;
411
x = (x | (x << 2)) & 0x09249249;
412
413
y = (y | (y << 16)) & 0x030000FF;
414
y = (y | (y << 8)) & 0x0300F00F;
415
y = (y | (y << 4)) & 0x030C30C3;
416
y = (y | (y << 2)) & 0x09249249;
417
418
z = (z | (z << 16)) & 0x030000FF;
419
z = (z | (z << 8)) & 0x0300F00F;
420
z = (z | (z << 4)) & 0x030C30C3;
421
z = (z | (z << 2)) & 0x09249249;
422
423
return x | (y << 1) | (z << 2);
424
}
425
426
#if defined(__AVX2__) && !defined(__aarch64__)
427
428
template<>
429
__forceinline unsigned int bitInterleave(const unsigned int &xi, const unsigned int& yi, const unsigned int& zi)
430
{
431
const unsigned int xx = pdep(xi,0x49249249 /* 0b01001001001001001001001001001001 */ );
432
const unsigned int yy = pdep(yi,0x92492492 /* 0b10010010010010010010010010010010 */);
433
const unsigned int zz = pdep(zi,0x24924924 /* 0b00100100100100100100100100100100 */);
434
return xx | yy | zz;
435
}
436
437
#endif
438
439
/*! bit interleave operation for 64bit data types*/
440
template<class T>
441
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
442
T x = xin & 0x1fffff;
443
T y = yin & 0x1fffff;
444
T z = zin & 0x1fffff;
445
446
x = (x | x << 32) & 0x1f00000000ffff;
447
x = (x | x << 16) & 0x1f0000ff0000ff;
448
x = (x | x << 8) & 0x100f00f00f00f00f;
449
x = (x | x << 4) & 0x10c30c30c30c30c3;
450
x = (x | x << 2) & 0x1249249249249249;
451
452
y = (y | y << 32) & 0x1f00000000ffff;
453
y = (y | y << 16) & 0x1f0000ff0000ff;
454
y = (y | y << 8) & 0x100f00f00f00f00f;
455
y = (y | y << 4) & 0x10c30c30c30c30c3;
456
y = (y | y << 2) & 0x1249249249249249;
457
458
z = (z | z << 32) & 0x1f00000000ffff;
459
z = (z | z << 16) & 0x1f0000ff0000ff;
460
z = (z | z << 8) & 0x100f00f00f00f00f;
461
z = (z | z << 4) & 0x10c30c30c30c30c3;
462
z = (z | z << 2) & 0x1249249249249249;
463
464
return x | (y << 1) | (z << 2);
465
}
466
}
467
468
#endif
469
470