Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/math/vec2fa.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../sys/alloc.h"
7
#include "emath.h"
8
9
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
10
# include "vec2fa_sycl.h"
11
#else
12
13
#include "../simd/sse.h"
14
15
namespace embree
16
{
17
////////////////////////////////////////////////////////////////////////////////
18
/// SSE Vec2fa Type
19
////////////////////////////////////////////////////////////////////////////////
20
21
struct __aligned(16) Vec2fa
22
{
23
ALIGNED_STRUCT_(16);
24
25
typedef float Scalar;
26
enum { N = 2 };
27
union {
28
__m128 m128;
29
struct { float x,y,az,aw; };
30
};
31
32
////////////////////////////////////////////////////////////////////////////////
33
/// Constructors, Assignment & Cast Operators
34
////////////////////////////////////////////////////////////////////////////////
35
36
__forceinline Vec2fa( ) {}
37
__forceinline Vec2fa( const __m128 a ) : m128(a) {}
38
39
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
40
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
41
42
__forceinline Vec2fa ( const Vec2fa& other ) { m128 = other.m128; }
43
__forceinline Vec2fa& operator =( const Vec2fa& other ) { m128 = other.m128; return *this; }
44
45
__forceinline explicit Vec2fa( const float a ) : m128(_mm_set1_ps(a)) {}
46
__forceinline Vec2fa( const float x, const float y) : m128(_mm_set_ps(y, y, y, x)) {}
47
48
__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
49
50
__forceinline operator const __m128&() const { return m128; }
51
__forceinline operator __m128&() { return m128; }
52
53
////////////////////////////////////////////////////////////////////////////////
54
/// Loads and Stores
55
////////////////////////////////////////////////////////////////////////////////
56
57
static __forceinline Vec2fa load( const void* const a ) {
58
return Vec2fa(_mm_and_ps(_mm_load_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
59
}
60
61
static __forceinline Vec2fa loadu( const void* const a ) {
62
return Vec2fa(_mm_and_ps(_mm_loadu_ps((float*)a),_mm_castsi128_ps(_mm_set_epi32(0, 0, -1, -1))));
63
}
64
65
static __forceinline void storeu ( void* ptr, const Vec2fa& v ) {
66
_mm_storeu_ps((float*)ptr,v);
67
}
68
69
////////////////////////////////////////////////////////////////////////////////
70
/// Constants
71
////////////////////////////////////////////////////////////////////////////////
72
73
__forceinline Vec2fa( ZeroTy ) : m128(_mm_setzero_ps()) {}
74
__forceinline Vec2fa( OneTy ) : m128(_mm_set1_ps(1.0f)) {}
75
__forceinline Vec2fa( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
76
__forceinline Vec2fa( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
77
78
////////////////////////////////////////////////////////////////////////////////
79
/// Array Access
80
////////////////////////////////////////////////////////////////////////////////
81
82
__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
83
__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
84
};
85
86
////////////////////////////////////////////////////////////////////////////////
87
/// Unary Operators
88
////////////////////////////////////////////////////////////////////////////////
89
90
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
91
__forceinline Vec2fa operator -( const Vec2fa& a ) {
92
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
93
return _mm_xor_ps(a.m128, mask);
94
}
95
__forceinline Vec2fa abs ( const Vec2fa& a ) {
96
const __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
97
return _mm_and_ps(a.m128, mask);
98
}
99
__forceinline Vec2fa sign ( const Vec2fa& a ) {
100
return blendv_ps(Vec2fa(one), -Vec2fa(one), _mm_cmplt_ps (a,Vec2fa(zero)));
101
}
102
103
__forceinline Vec2fa rcp ( const Vec2fa& a )
104
{
105
#if defined(__aarch64__)
106
__m128 reciprocal = _mm_rcp_ps(a.m128);
107
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
108
reciprocal = vmulq_f32(vrecpsq_f32(a.m128, reciprocal), reciprocal);
109
return (const Vec2fa)reciprocal;
110
#else
111
#if defined(__AVX512VL__)
112
const Vec2fa r = _mm_rcp14_ps(a.m128);
113
#else
114
const Vec2fa r = _mm_rcp_ps(a.m128);
115
#endif
116
117
#if defined(__AVX2__)
118
const Vec2fa h_n = _mm_fnmadd_ps(a, r, vfloat4(1.0)); // First, compute 1 - a * r (which will be very close to 0)
119
const Vec2fa res = _mm_fmadd_ps(r, h_n, r); // Then compute r + r * h_n
120
#else
121
const Vec2fa h_n = _mm_sub_ps(vfloat4(1.0f), _mm_mul_ps(a, r)); // First, compute 1 - a * r (which will be very close to 0)
122
const Vec2fa res = _mm_add_ps(r,_mm_mul_ps(r, h_n)); // Then compute r + r * h_n
123
#endif
124
125
return res;
126
#endif //defined(__aarch64__)
127
}
128
129
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return _mm_sqrt_ps(a.m128); }
130
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return _mm_mul_ps(a,a); }
131
132
__forceinline Vec2fa rsqrt( const Vec2fa& a )
133
{
134
#if defined(__aarch64__)
135
__m128 r = _mm_rsqrt_ps(a.m128);
136
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
137
r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(a.m128, r), r));
138
return r;
139
#else
140
141
#if defined(__AVX512VL__)
142
__m128 r = _mm_rsqrt14_ps(a.m128);
143
#else
144
__m128 r = _mm_rsqrt_ps(a.m128);
145
#endif
146
return _mm_add_ps(_mm_mul_ps(_mm_set1_ps(1.5f),r), _mm_mul_ps(_mm_mul_ps(_mm_mul_ps(a, _mm_set1_ps(-0.5f)), r), _mm_mul_ps(r, r)));
147
148
#endif
149
}
150
151
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
152
return blendv_ps(a, _mm_set1_ps(min_rcp_input), _mm_cmplt_ps (abs(a).m128, _mm_set1_ps(min_rcp_input)));
153
}
154
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
155
return rcp(zero_fix(a));
156
}
157
__forceinline Vec2fa log ( const Vec2fa& a ) {
158
return Vec2fa(logf(a.x),logf(a.y));
159
}
160
161
__forceinline Vec2fa exp ( const Vec2fa& a ) {
162
return Vec2fa(expf(a.x),expf(a.y));
163
}
164
165
////////////////////////////////////////////////////////////////////////////////
166
/// Binary Operators
167
////////////////////////////////////////////////////////////////////////////////
168
169
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return _mm_add_ps(a.m128, b.m128); }
170
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return _mm_sub_ps(a.m128, b.m128); }
171
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return _mm_mul_ps(a.m128, b.m128); }
172
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
173
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
174
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return _mm_div_ps(a.m128,b.m128); }
175
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return _mm_div_ps(a.m128,_mm_set1_ps(b)); }
176
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return _mm_div_ps(_mm_set1_ps(a),b.m128); }
177
178
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) { return _mm_min_ps(a.m128,b.m128); }
179
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) { return _mm_max_ps(a.m128,b.m128); }
180
181
#if defined(__aarch64__) || defined(__SSE4_1__)
182
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
183
const vint4 ai = _mm_castps_si128(a);
184
const vint4 bi = _mm_castps_si128(b);
185
const vint4 ci = _mm_min_epi32(ai,bi);
186
return _mm_castsi128_ps(ci);
187
}
188
#endif
189
190
#if defined(__aarch64__) || defined(__SSE4_1__)
191
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
192
const vint4 ai = _mm_castps_si128(a);
193
const vint4 bi = _mm_castps_si128(b);
194
const vint4 ci = _mm_max_epi32(ai,bi);
195
return _mm_castsi128_ps(ci);
196
}
197
#endif
198
199
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
200
return Vec2fa(powf(a.x,b),powf(a.y,b));
201
}
202
203
////////////////////////////////////////////////////////////////////////////////
204
/// Ternary Operators
205
////////////////////////////////////////////////////////////////////////////////
206
207
#if defined(__AVX2__)
208
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmadd_ps(a,b,c); }
209
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fmsub_ps(a,b,c); }
210
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmadd_ps(a,b,c); }
211
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return _mm_fnmsub_ps(a,b,c); }
212
#else
213
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b+c; }
214
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return a*b-c; }
215
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b+c;}
216
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return -a*b-c; }
217
#endif
218
219
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
220
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
221
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
222
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
223
224
////////////////////////////////////////////////////////////////////////////////
225
/// Assignment Operators
226
////////////////////////////////////////////////////////////////////////////////
227
228
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
229
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
230
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
231
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
232
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
233
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
234
235
////////////////////////////////////////////////////////////////////////////////
236
/// Reductions
237
////////////////////////////////////////////////////////////////////////////////
238
239
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
240
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
241
__forceinline float reduce_min(const Vec2fa& v) { return min(v.x,v.y); }
242
__forceinline float reduce_max(const Vec2fa& v) { return max(v.x,v.y); }
243
244
////////////////////////////////////////////////////////////////////////////////
245
/// Comparison Operators
246
////////////////////////////////////////////////////////////////////////////////
247
248
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpeq_ps (a.m128, b.m128)) & 3) == 3; }
249
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return (_mm_movemask_ps(_mm_cmpneq_ps(a.m128, b.m128)) & 3) != 0; }
250
251
////////////////////////////////////////////////////////////////////////////////
252
/// Euclidean Space Operators
253
////////////////////////////////////////////////////////////////////////////////
254
255
#if defined(__SSE4_1__)
256
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
257
return _mm_cvtss_f32(_mm_dp_ps(a,b,0x3F));
258
}
259
#else
260
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
261
return reduce_add(a*b);
262
}
263
#endif
264
265
__forceinline Vec2fa cross ( const Vec2fa& a ) {
266
return Vec2fa(-a.y,a.x);
267
}
268
269
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
270
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
271
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
272
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
273
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
274
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
275
276
////////////////////////////////////////////////////////////////////////////////
277
/// Select
278
////////////////////////////////////////////////////////////////////////////////
279
280
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
281
__m128 mask = s ? _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_setzero_si128(), _mm_setzero_si128())) : _mm_setzero_ps();
282
return blendv_ps(f, t, mask);
283
}
284
285
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
286
return madd(1.0f-t,v0,t*v1);
287
}
288
289
__forceinline int maxDim ( const Vec2fa& a )
290
{
291
const Vec2fa b = abs(a);
292
if (b.x > b.y) return 0;
293
else return 1;
294
}
295
296
////////////////////////////////////////////////////////////////////////////////
297
/// Rounding Functions
298
////////////////////////////////////////////////////////////////////////////////
299
300
#if defined(__aarch64__)
301
//__forceinline Vec2fa trunc(const Vec2fa& a) { return vrndq_f32(a); }
302
__forceinline Vec2fa floor(const Vec2fa& a) { return vrndmq_f32(a); }
303
__forceinline Vec2fa ceil (const Vec2fa& a) { return vrndpq_f32(a); }
304
#elif defined (__SSE4_1__)
305
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEAREST_INT); }
306
__forceinline Vec2fa floor( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_NEG_INF ); }
307
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return _mm_round_ps(a, _MM_FROUND_TO_POS_INF ); }
308
#else
309
//__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(truncf(a.x),truncf(a.y),truncf(a.z)); }
310
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(floorf(a.x),floorf(a.y)); }
311
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(ceilf (a.x),ceilf (a.y)); }
312
#endif
313
314
////////////////////////////////////////////////////////////////////////////////
315
/// Output Operators
316
////////////////////////////////////////////////////////////////////////////////
317
318
__forceinline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
319
return cout << "(" << a.x << ", " << a.y << ")";
320
}
321
322
typedef Vec2fa Vec2fa_t;
323
}
324
325
#endif
326
327