Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/math/vec3fa_sycl.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../sys/alloc.h"
7
#include "emath.h"
8
#include "../simd/sse.h"
9
10
namespace embree
11
{
12
////////////////////////////////////////////////////////////////////////////////
13
/// SSE Vec3fa Type
14
////////////////////////////////////////////////////////////////////////////////
15
16
struct __aligned(16) Vec3fa
17
{
18
//ALIGNED_STRUCT_(16);
19
20
typedef float Scalar;
21
enum { N = 3 };
22
struct { float x,y,z, do_not_use; };
23
24
////////////////////////////////////////////////////////////////////////////////
25
/// Constructors, Assignment & Cast Operators
26
////////////////////////////////////////////////////////////////////////////////
27
28
__forceinline Vec3fa( ) {}
29
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
30
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
31
32
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
33
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
34
35
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
36
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
37
38
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
39
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
40
41
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
42
43
//__forceinline operator const __m128&() const { return m128; }
44
//__forceinline operator __m128&() { return m128; }
45
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
46
47
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
48
49
////////////////////////////////////////////////////////////////////////////////
50
/// Loads and Stores
51
////////////////////////////////////////////////////////////////////////////////
52
53
static __forceinline Vec3fa load( const void* const a ) {
54
const float* ptr = (const float*)a;
55
return Vec3fa(ptr[0],ptr[1],ptr[2]);
56
}
57
58
static __forceinline Vec3fa loadu( const void* const a ) {
59
const float* ptr = (const float*)a;
60
return Vec3fa(ptr[0],ptr[1],ptr[2]);
61
}
62
63
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
64
float* ptr = (float*)a;
65
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
66
}
67
68
////////////////////////////////////////////////////////////////////////////////
69
/// Constants
70
////////////////////////////////////////////////////////////////////////////////
71
72
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
73
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
74
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
75
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
76
77
////////////////////////////////////////////////////////////////////////////////
78
/// Array Access
79
////////////////////////////////////////////////////////////////////////////////
80
81
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
82
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
83
};
84
85
////////////////////////////////////////////////////////////////////////////////
86
/// Unary Operators
87
////////////////////////////////////////////////////////////////////////////////
88
89
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
90
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
91
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
92
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
93
94
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
95
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z)); }
96
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
97
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
98
99
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
100
101
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
102
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
103
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
104
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
105
return Vec3fa(x,y,z);
106
}
107
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
108
return rcp(zero_fix(a));
109
}
110
__forceinline Vec3fa log ( const Vec3fa& a ) {
111
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
112
}
113
114
__forceinline Vec3fa exp ( const Vec3fa& a ) {
115
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
116
}
117
118
////////////////////////////////////////////////////////////////////////////////
119
/// Binary Operators
120
////////////////////////////////////////////////////////////////////////////////
121
122
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
123
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
124
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
125
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
126
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
127
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
128
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
129
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
130
131
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
132
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
133
}
134
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
135
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
136
}
137
138
/*
139
#if defined(__SSE4_1__)
140
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
141
const vint4 ai = _mm_castps_si128(a);
142
const vint4 bi = _mm_castps_si128(b);
143
const vint4 ci = _mm_min_epi32(ai,bi);
144
return _mm_castsi128_ps(ci);
145
}
146
#endif
147
148
#if defined(__SSE4_1__)
149
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
150
const vint4 ai = _mm_castps_si128(a);
151
const vint4 bi = _mm_castps_si128(b);
152
const vint4 ci = _mm_max_epi32(ai,bi);
153
return _mm_castsi128_ps(ci);
154
}
155
#endif
156
*/
157
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
158
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
159
}
160
161
////////////////////////////////////////////////////////////////////////////////
162
/// Ternary Operators
163
////////////////////////////////////////////////////////////////////////////////
164
165
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
166
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
167
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
168
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
169
170
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
171
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
172
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
173
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
174
175
////////////////////////////////////////////////////////////////////////////////
176
/// Assignment Operators
177
////////////////////////////////////////////////////////////////////////////////
178
179
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
180
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
181
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
182
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
183
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
184
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
185
186
////////////////////////////////////////////////////////////////////////////////
187
/// Reductions
188
////////////////////////////////////////////////////////////////////////////////
189
190
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
191
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
192
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
193
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
194
195
////////////////////////////////////////////////////////////////////////////////
196
/// Comparison Operators
197
////////////////////////////////////////////////////////////////////////////////
198
199
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
200
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
201
202
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
203
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
204
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
205
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
206
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
207
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
208
209
__forceinline bool isvalid ( const Vec3fa& v ) {
210
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
211
}
212
213
__forceinline bool is_finite ( const Vec3fa& a ) {
214
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
215
}
216
217
////////////////////////////////////////////////////////////////////////////////
218
/// Euclidian Space Operators
219
////////////////////////////////////////////////////////////////////////////////
220
221
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
222
return reduce_add(a*b);
223
}
224
225
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
226
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
227
}
228
229
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
230
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
231
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
232
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
233
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
234
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
235
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
236
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
237
238
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
239
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
240
}
241
242
/*! differentiated normalization */
243
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
244
{
245
const float pp = dot(p,p);
246
const float pdp = dot(p,dp);
247
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
248
}
249
250
////////////////////////////////////////////////////////////////////////////////
251
/// Select
252
////////////////////////////////////////////////////////////////////////////////
253
254
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
255
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
256
}
257
258
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
259
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
260
}
261
262
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
263
return madd(1.0f-t,v0,t*v1);
264
}
265
266
__forceinline int maxDim ( const Vec3fa& a )
267
{
268
const Vec3fa b = abs(a);
269
if (b.x > b.y) {
270
if (b.x > b.z) return 0; else return 2;
271
} else {
272
if (b.y > b.z) return 1; else return 2;
273
}
274
}
275
276
////////////////////////////////////////////////////////////////////////////////
277
/// Rounding Functions
278
////////////////////////////////////////////////////////////////////////////////
279
280
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
281
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
282
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
283
284
////////////////////////////////////////////////////////////////////////////////
285
/// Output Operators
286
////////////////////////////////////////////////////////////////////////////////
287
288
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
289
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
290
}
291
292
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
293
: x(a.x), y(a.y) {}
294
295
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
296
: x((int)a.x), y((int)a.y), z((int)a.z) {}
297
298
typedef Vec3fa Vec3fa_t;
299
300
301
302
////////////////////////////////////////////////////////////////////////////////
303
/// SSE Vec3fx Type
304
////////////////////////////////////////////////////////////////////////////////
305
306
struct __aligned(16) Vec3fx
307
{
308
//ALIGNED_STRUCT_(16);
309
310
typedef float Scalar;
311
enum { N = 3 };
312
struct { float x,y,z; union { int a; unsigned u; float w; }; };
313
314
////////////////////////////////////////////////////////////////////////////////
315
/// Constructors, Assignment & Cast Operators
316
////////////////////////////////////////////////////////////////////////////////
317
318
__forceinline Vec3fx( ) {}
319
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
320
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
321
322
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
323
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
324
325
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
326
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
327
328
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
329
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
330
331
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
332
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
333
334
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
335
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
336
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
337
338
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
339
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
340
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
341
342
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
343
344
//__forceinline operator const __m128&() const { return m128; }
345
//__forceinline operator __m128&() { return m128; }
346
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
347
348
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
349
350
////////////////////////////////////////////////////////////////////////////////
351
/// Loads and Stores
352
////////////////////////////////////////////////////////////////////////////////
353
354
static __forceinline Vec3fx load( const void* const a ) {
355
const float* ptr = (const float*)a;
356
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
357
}
358
359
static __forceinline Vec3fx loadu( const void* const a ) {
360
const float* ptr = (const float*)a;
361
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
362
}
363
364
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
365
float* ptr = (float*)a;
366
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
367
}
368
369
////////////////////////////////////////////////////////////////////////////////
370
/// Constants
371
////////////////////////////////////////////////////////////////////////////////
372
373
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
374
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
375
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
376
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
377
378
////////////////////////////////////////////////////////////////////////////////
379
/// Array Access
380
////////////////////////////////////////////////////////////////////////////////
381
382
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
383
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
384
};
385
386
////////////////////////////////////////////////////////////////////////////////
387
/// Unary Operators
388
////////////////////////////////////////////////////////////////////////////////
389
390
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
391
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
392
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
393
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
394
395
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
396
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::native::recip(a.x),sycl::native::recip(a.y),sycl::native::recip(a.z),sycl::native::recip(a.w)); }
397
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
398
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
399
400
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
401
402
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
403
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
404
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
405
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
406
return Vec3fx(x,y,z);
407
}
408
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
409
return rcp(zero_fix(a));
410
}
411
__forceinline Vec3fx log ( const Vec3fx& a ) {
412
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
413
}
414
415
__forceinline Vec3fx exp ( const Vec3fx& a ) {
416
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
417
}
418
419
////////////////////////////////////////////////////////////////////////////////
420
/// Binary Operators
421
////////////////////////////////////////////////////////////////////////////////
422
423
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
424
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
425
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
426
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
427
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
428
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
429
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
430
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
431
432
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
433
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
434
}
435
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
436
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
437
}
438
439
/*
440
#if defined(__SSE4_1__)
441
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
442
const vint4 ai = _mm_castps_si128(a);
443
const vint4 bi = _mm_castps_si128(b);
444
const vint4 ci = _mm_min_epi32(ai,bi);
445
return _mm_castsi128_ps(ci);
446
}
447
#endif
448
449
#if defined(__SSE4_1__)
450
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
451
const vint4 ai = _mm_castps_si128(a);
452
const vint4 bi = _mm_castps_si128(b);
453
const vint4 ci = _mm_max_epi32(ai,bi);
454
return _mm_castsi128_ps(ci);
455
}
456
#endif
457
458
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
459
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
460
}
461
*/
462
463
////////////////////////////////////////////////////////////////////////////////
464
/// Ternary Operators
465
////////////////////////////////////////////////////////////////////////////////
466
467
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
468
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
469
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
470
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
471
472
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
473
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
474
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
475
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
476
477
////////////////////////////////////////////////////////////////////////////////
478
/// Assignment Operators
479
////////////////////////////////////////////////////////////////////////////////
480
481
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
482
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
483
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
484
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
485
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
486
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
487
488
////////////////////////////////////////////////////////////////////////////////
489
/// Reductions
490
////////////////////////////////////////////////////////////////////////////////
491
492
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
493
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
494
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
495
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
496
497
////////////////////////////////////////////////////////////////////////////////
498
/// Comparison Operators
499
////////////////////////////////////////////////////////////////////////////////
500
501
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
502
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
503
504
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
505
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
506
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
507
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
508
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
509
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
510
511
__forceinline bool isvalid ( const Vec3fx& v ) {
512
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
513
}
514
515
__forceinline bool is_finite ( const Vec3fx& a ) {
516
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
517
}
518
519
__forceinline bool isvalid4 ( const Vec3fx& v ) {
520
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
521
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
522
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
523
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
524
return valid_x & valid_y & valid_z & valid_w;
525
}
526
527
__forceinline bool is_finite4 ( const Vec3fx& v ) {
528
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
529
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
530
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
531
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
532
return finite_x & finite_y & finite_z & finite_w;
533
}
534
535
////////////////////////////////////////////////////////////////////////////////
536
/// Euclidian Space Operators
537
////////////////////////////////////////////////////////////////////////////////
538
539
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
540
return reduce_add(a*b);
541
}
542
543
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
544
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
545
}
546
547
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
548
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
549
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
550
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
551
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
552
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
553
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
554
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
555
556
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
557
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
558
}
559
560
/*! differentiated normalization */
561
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
562
{
563
const float pp = dot(p,p);
564
const float pdp = dot(p,dp);
565
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
566
}
567
568
////////////////////////////////////////////////////////////////////////////////
569
/// Select
570
////////////////////////////////////////////////////////////////////////////////
571
572
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
573
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
574
}
575
576
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
577
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
578
}
579
580
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
581
return madd(1.0f-t,v0,t*v1);
582
}
583
584
__forceinline int maxDim ( const Vec3fx& a )
585
{
586
const Vec3fx b = abs(a);
587
if (b.x > b.y) {
588
if (b.x > b.z) return 0; else return 2;
589
} else {
590
if (b.y > b.z) return 1; else return 2;
591
}
592
}
593
594
////////////////////////////////////////////////////////////////////////////////
595
/// Rounding Functions
596
////////////////////////////////////////////////////////////////////////////////
597
598
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
599
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
600
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
601
602
////////////////////////////////////////////////////////////////////////////////
603
/// Output Operators
604
////////////////////////////////////////////////////////////////////////////////
605
606
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
607
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
608
}
609
610
typedef Vec3fx Vec3ff;
611
612
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
613
// : x(a.x), y(a.y) {}
614
615
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
616
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
617
618
}
619
620
#if __SYCL_COMPILER_VERSION >= 20210801
621
namespace sycl {
622
template<> struct is_device_copyable<embree::Vec3fa> : std::true_type {};
623
template<> struct is_device_copyable<const embree::Vec3fa> : std::true_type {};
624
}
625
#endif
626