Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/common/ray.h
9905 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "default.h"
7
#include "instance_stack.h"
8
9
// FIXME: if ray gets separated into ray* and hit, uload4 needs to be adjusted
10
11
namespace embree
12
{
13
/* Ray structure for K rays */
14
template<int K>
15
struct RayK
16
{
17
/* Default construction does nothing */
18
__forceinline RayK() {}
19
20
/* Constructs a ray from origin, direction, and ray segment. Near
21
* has to be smaller than far */
22
__forceinline RayK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
23
const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
24
const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
25
: org(org), dir(dir), _tnear(tnear), tfar(tfar), _time(time), mask(mask), id(id), flags(flags) {}
26
27
/* Returns the size of the ray */
28
static __forceinline size_t size() { return K; }
29
30
/* Calculates if this is a valid ray that does not cause issues during traversal */
31
__forceinline vbool<K> valid() const
32
{
33
const vbool<K> vx = (abs(org.x) <= vfloat<K>(FLT_LARGE)) & (abs(dir.x) <= vfloat<K>(FLT_LARGE));
34
const vbool<K> vy = (abs(org.y) <= vfloat<K>(FLT_LARGE)) & (abs(dir.y) <= vfloat<K>(FLT_LARGE));
35
const vbool<K> vz = (abs(org.z) <= vfloat<K>(FLT_LARGE)) & (abs(dir.z) <= vfloat<K>(FLT_LARGE));
36
const vbool<K> vn = abs(tnear()) <= vfloat<K>(inf);
37
const vbool<K> vf = abs(tfar) <= vfloat<K>(inf);
38
return vx & vy & vz & vn & vf;
39
}
40
41
__forceinline void get(RayK<1>* ray) const;
42
__forceinline void get(size_t i, RayK<1>& ray) const;
43
__forceinline void set(const RayK<1>* ray);
44
__forceinline void set(size_t i, const RayK<1>& ray);
45
46
__forceinline void copy(size_t dest, size_t source);
47
48
__forceinline vint<K> octant() const
49
{
50
return select(dir.x < 0.0f, vint<K>(1), vint<K>(zero)) |
51
select(dir.y < 0.0f, vint<K>(2), vint<K>(zero)) |
52
select(dir.z < 0.0f, vint<K>(4), vint<K>(zero));
53
}
54
55
/* Ray data */
56
Vec3vf<K> org; // ray origin
57
vfloat<K> _tnear; // start of ray segment
58
Vec3vf<K> dir; // ray direction
59
vfloat<K> _time; // time of this ray for motion blur
60
vfloat<K> tfar; // end of ray segment
61
vint<K> mask; // used to mask out objects during traversal
62
vint<K> id;
63
vint<K> flags;
64
65
__forceinline vfloat<K>& tnear() { return _tnear; }
66
__forceinline vfloat<K>& time() { return _time; }
67
__forceinline const vfloat<K>& tnear() const { return _tnear; }
68
__forceinline const vfloat<K>& time() const { return _time; }
69
};
70
71
/* Ray+hit structure for K rays */
72
template<int K>
73
struct RayHitK : RayK<K>
74
{
75
using RayK<K>::org;
76
using RayK<K>::_tnear;
77
using RayK<K>::dir;
78
using RayK<K>::_time;
79
using RayK<K>::tfar;
80
using RayK<K>::mask;
81
using RayK<K>::id;
82
using RayK<K>::flags;
83
84
using RayK<K>::tnear;
85
using RayK<K>::time;
86
87
/* Default construction does nothing */
88
__forceinline RayHitK() {}
89
90
/* Constructs a ray from origin, direction, and ray segment. Near
91
* has to be smaller than far */
92
__forceinline RayHitK(const Vec3vf<K>& org, const Vec3vf<K>& dir,
93
const vfloat<K>& tnear = zero, const vfloat<K>& tfar = inf,
94
const vfloat<K>& time = zero, const vint<K>& mask = -1, const vint<K>& id = 0, const vint<K>& flags = 0)
95
: RayK<K>(org, dir, tnear, tfar, time, mask, id, flags),
96
geomID(RTC_INVALID_GEOMETRY_ID)
97
{
98
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
99
instID[l] = RTC_INVALID_GEOMETRY_ID;
100
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
101
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
102
#endif
103
}
104
}
105
106
__forceinline RayHitK(const RayK<K>& ray)
107
: RayK<K>(ray),
108
geomID(RTC_INVALID_GEOMETRY_ID)
109
{
110
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
111
instID[l] = RTC_INVALID_GEOMETRY_ID;
112
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
113
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
114
#endif
115
}
116
}
117
118
__forceinline RayHitK<K>& operator =(const RayK<K>& ray)
119
{
120
org = ray.org;
121
_tnear = ray._tnear;
122
dir = ray.dir;
123
_time = ray._time;
124
tfar = ray.tfar;
125
mask = ray.mask;
126
id = ray.id;
127
flags = ray.flags;
128
129
geomID = RTC_INVALID_GEOMETRY_ID;
130
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
131
instID[l] = RTC_INVALID_GEOMETRY_ID;
132
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
133
instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
134
#endif
135
}
136
137
return *this;
138
}
139
140
/* Calculates if the hit is valid */
141
__forceinline void verifyHit(const vbool<K>& valid0) const
142
{
143
vbool<K> valid = valid0 & geomID != vuint<K>(RTC_INVALID_GEOMETRY_ID);
144
const vbool<K> vt = (abs(tfar) <= vfloat<K>(FLT_LARGE)) | (tfar == vfloat<K>(neg_inf));
145
const vbool<K> vu = (abs(u) <= vfloat<K>(FLT_LARGE));
146
const vbool<K> vv = (abs(v) <= vfloat<K>(FLT_LARGE));
147
const vbool<K> vnx = abs(Ng.x) <= vfloat<K>(FLT_LARGE);
148
const vbool<K> vny = abs(Ng.y) <= vfloat<K>(FLT_LARGE);
149
const vbool<K> vnz = abs(Ng.z) <= vfloat<K>(FLT_LARGE);
150
if (any(valid & !vt)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid t");
151
if (any(valid & !vu)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid u");
152
if (any(valid & !vv)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid v");
153
if (any(valid & !vnx)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.x");
154
if (any(valid & !vny)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.y");
155
if (any(valid & !vnz)) throw_RTCError(RTC_ERROR_UNKNOWN,"invalid Ng.z");
156
}
157
158
__forceinline void get(RayHitK<1>* ray) const;
159
__forceinline void get(size_t i, RayHitK<1>& ray) const;
160
__forceinline void set(const RayHitK<1>* ray);
161
__forceinline void set(size_t i, const RayHitK<1>& ray);
162
163
__forceinline void copy(size_t dest, size_t source);
164
165
/* Hit data */
166
Vec3vf<K> Ng; // geometry normal
167
vfloat<K> u; // barycentric u coordinate of hit
168
vfloat<K> v; // barycentric v coordinate of hit
169
vuint<K> primID; // primitive ID
170
vuint<K> geomID; // geometry ID
171
vuint<K> instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
172
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
173
vuint<K> instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance prim ID
174
#endif
175
};
176
177
/* Specialization for a single ray */
178
template<>
179
struct RayK<1>
180
{
181
/* Default construction does nothing */
182
__forceinline RayK() {}
183
184
/* Constructs a ray from origin, direction, and ray segment. Near
185
* has to be smaller than far */
186
__forceinline RayK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
187
: org(org,tnear), dir(dir,time), tfar(tfar), mask(mask), id(id), flags(flags) {}
188
189
/* Calculates if this is a valid ray that does not cause issues during traversal */
190
__forceinline bool valid() const {
191
return all(le_mask(abs(Vec3fa(org)), Vec3fa(FLT_LARGE)) & le_mask(abs(Vec3fa(dir)), Vec3fa(FLT_LARGE))) && abs(tnear()) <= float(inf) && abs(tfar) <= float(inf);
192
}
193
194
/* checks if occlusion ray is done */
195
__forceinline bool occluded() const {
196
return tfar < 0.0f;
197
}
198
199
/* Ray data */
200
Vec3ff org; // 3 floats for ray origin, 1 float for tnear
201
//float tnear; // start of ray segment
202
Vec3ff dir; // 3 floats for ray direction, 1 float for time
203
// float time;
204
float tfar; // end of ray segment
205
int mask; // used to mask out objects during traversal
206
int id; // ray ID
207
int flags; // ray flags
208
209
__forceinline float& tnear() { return org.w; };
210
__forceinline const float& tnear() const { return org.w; };
211
212
__forceinline float& time() { return dir.w; };
213
__forceinline const float& time() const { return dir.w; };
214
215
};
216
217
template<>
218
struct RayHitK<1> : RayK<1>
219
{
220
/* Default construction does nothing */
221
__forceinline RayHitK() {}
222
223
/* Constructs a ray from origin, direction, and ray segment. Near
224
* has to be smaller than far */
225
__forceinline RayHitK(const Vec3fa& org, const Vec3fa& dir, float tnear = zero, float tfar = inf, float time = zero, int mask = -1, int id = 0, int flags = 0)
226
: RayK<1>(org, dir, tnear, tfar, time, mask, id, flags),
227
geomID(RTC_INVALID_GEOMETRY_ID) {}
228
229
__forceinline RayHitK(const RayK<1>& ray)
230
: RayK<1>(ray),
231
geomID(RTC_INVALID_GEOMETRY_ID) {}
232
233
__forceinline RayHitK<1>& operator =(const RayK<1>& ray)
234
{
235
org = ray.org;
236
dir = ray.dir;
237
tfar = ray.tfar;
238
mask = ray.mask;
239
id = ray.id;
240
flags = ray.flags;
241
242
geomID = RTC_INVALID_GEOMETRY_ID;
243
244
return *this;
245
}
246
247
/* Calculates if the hit is valid */
248
__forceinline void verifyHit() const
249
{
250
if (geomID == RTC_INVALID_GEOMETRY_ID) return;
251
const bool vt = (abs(tfar) <= FLT_LARGE) || (tfar == float(neg_inf));
252
const bool vu = (abs(u) <= FLT_LARGE);
253
const bool vv = (abs(u) <= FLT_LARGE);
254
const bool vnx = abs(Ng.x) <= FLT_LARGE;
255
const bool vny = abs(Ng.y) <= FLT_LARGE;
256
const bool vnz = abs(Ng.z) <= FLT_LARGE;
257
if (!vt) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid t");
258
if (!vu) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid u");
259
if (!vv) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid v");
260
if (!vnx) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.x");
261
if (!vny) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.y");
262
if (!vnz) throw_RTCError(RTC_ERROR_UNKNOWN, "invalid Ng.z");
263
}
264
265
/* Hit data */
266
Vec3f Ng; // not normalized geometry normal
267
float u; // barycentric u coordinate of hit
268
float v; // barycentric v coordinate of hit
269
unsigned int primID; // primitive ID
270
unsigned int geomID; // geometry ID
271
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
272
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
273
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
274
#endif
275
};
276
277
/* Converts ray packet to single rays */
278
template<int K>
279
__forceinline void RayK<K>::get(RayK<1>* ray) const
280
{
281
for (size_t i = 0; i < K; i++) // FIXME: use SIMD transpose
282
{
283
ray[i].org.x = org.x[i]; ray[i].org.y = org.y[i]; ray[i].org.z = org.z[i]; ray[i].tnear() = tnear()[i];
284
ray[i].dir.x = dir.x[i]; ray[i].dir.y = dir.y[i]; ray[i].dir.z = dir.z[i]; ray[i].time() = time()[i];
285
ray[i].tfar = tfar[i]; ray[i].mask = mask[i]; ray[i].id = id[i]; ray[i].flags = flags[i];
286
}
287
}
288
289
template<int K>
290
__forceinline void RayHitK<K>::get(RayHitK<1>* ray) const
291
{
292
// FIXME: use SIMD transpose
293
for (size_t i = 0; i < K; i++)
294
get(i, ray[i]);
295
}
296
297
/* Extracts a single ray out of a ray packet*/
298
template<int K>
299
__forceinline void RayK<K>::get(size_t i, RayK<1>& ray) const
300
{
301
ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
302
ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.time() = time()[i];
303
ray.tfar = tfar[i]; ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
304
}
305
306
template<int K>
307
__forceinline void RayHitK<K>::get(size_t i, RayHitK<1>& ray) const
308
{
309
ray.org.x = org.x[i]; ray.org.y = org.y[i]; ray.org.z = org.z[i]; ray.tnear() = tnear()[i];
310
ray.dir.x = dir.x[i]; ray.dir.y = dir.y[i]; ray.dir.z = dir.z[i]; ray.tfar = tfar[i]; ray.time() = time()[i];
311
ray.mask = mask[i]; ray.id = id[i]; ray.flags = flags[i];
312
ray.Ng.x = Ng.x[i]; ray.Ng.y = Ng.y[i]; ray.Ng.z = Ng.z[i];
313
ray.u = u[i]; ray.v = v[i];
314
ray.primID = primID[i]; ray.geomID = geomID[i];
315
316
instance_id_stack::copy_VU<K>(instID, ray.instID, i);
317
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
318
instance_id_stack::copy_VU<K>(instPrimID, ray.instPrimID, i);
319
#endif
320
}
321
322
/* Converts single rays to ray packet */
323
template<int K>
324
__forceinline void RayK<K>::set(const RayK<1>* ray)
325
{
326
// FIXME: use SIMD transpose
327
for (size_t i = 0; i < K; i++)
328
set(i, ray[i]);
329
}
330
331
template<int K>
332
__forceinline void RayHitK<K>::set(const RayHitK<1>* ray)
333
{
334
// FIXME: use SIMD transpose
335
for (size_t i = 0; i < K; i++)
336
set(i, ray[i]);
337
}
338
339
/* inserts a single ray into a ray packet element */
340
template<int K>
341
__forceinline void RayK<K>::set(size_t i, const RayK<1>& ray)
342
{
343
org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
344
dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
345
tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
346
}
347
348
template<int K>
349
__forceinline void RayHitK<K>::set(size_t i, const RayHitK<1>& ray)
350
{
351
org.x[i] = ray.org.x; org.y[i] = ray.org.y; org.z[i] = ray.org.z; tnear()[i] = ray.tnear();
352
dir.x[i] = ray.dir.x; dir.y[i] = ray.dir.y; dir.z[i] = ray.dir.z; time()[i] = ray.time();
353
tfar[i] = ray.tfar; mask[i] = ray.mask; id[i] = ray.id; flags[i] = ray.flags;
354
Ng.x[i] = ray.Ng.x; Ng.y[i] = ray.Ng.y; Ng.z[i] = ray.Ng.z;
355
u[i] = ray.u; v[i] = ray.v;
356
primID[i] = ray.primID; geomID[i] = ray.geomID;
357
358
instance_id_stack::copy_UV<K>(ray.instID, instID, i);
359
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
360
instance_id_stack::copy_UV<K>(ray.instPrimID, instPrimID, i);
361
#endif
362
}
363
364
/* copies a ray packet element into another element*/
365
template<int K>
366
__forceinline void RayK<K>::copy(size_t dest, size_t source)
367
{
368
org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
369
dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
370
tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
371
}
372
373
template<int K>
374
__forceinline void RayHitK<K>::copy(size_t dest, size_t source)
375
{
376
org.x[dest] = org.x[source]; org.y[dest] = org.y[source]; org.z[dest] = org.z[source]; tnear()[dest] = tnear()[source];
377
dir.x[dest] = dir.x[source]; dir.y[dest] = dir.y[source]; dir.z[dest] = dir.z[source]; time()[dest] = time()[source];
378
tfar [dest] = tfar[source]; mask[dest] = mask[source]; id[dest] = id[source]; flags[dest] = flags[source];
379
Ng.x[dest] = Ng.x[source]; Ng.y[dest] = Ng.y[source]; Ng.z[dest] = Ng.z[source];
380
u[dest] = u[source]; v[dest] = v[source];
381
primID[dest] = primID[source]; geomID[dest] = geomID[source];
382
383
instance_id_stack::copy_VV<K>(instID, instID, source, dest);
384
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
385
instance_id_stack::copy_VV<K>(instPrimID, instPrimID, source, dest);
386
#endif
387
}
388
389
/* Shortcuts */
390
typedef RayK<1> Ray;
391
typedef RayK<4> Ray4;
392
typedef RayK<8> Ray8;
393
typedef RayK<16> Ray16;
394
typedef RayK<VSIZEX> Rayx;
395
struct RayN;
396
397
typedef RayHitK<1> RayHit;
398
typedef RayHitK<4> RayHit4;
399
typedef RayHitK<8> RayHit8;
400
typedef RayHitK<16> RayHit16;
401
typedef RayHitK<VSIZEX> RayHitx;
402
struct RayHitN;
403
404
template<int K, bool intersect>
405
struct RayTypeHelper;
406
407
template<int K>
408
struct RayTypeHelper<K, true>
409
{
410
typedef RayHitK<K> Ty;
411
};
412
413
template<int K>
414
struct RayTypeHelper<K, false>
415
{
416
typedef RayK<K> Ty;
417
};
418
419
template<bool intersect>
420
using RayType = typename RayTypeHelper<1, intersect>::Ty;
421
422
template<int K, bool intersect>
423
using RayTypeK = typename RayTypeHelper<K, intersect>::Ty;
424
425
/* Outputs ray to stream */
426
template<int K>
427
__forceinline embree_ostream operator <<(embree_ostream cout, const RayK<K>& ray)
428
{
429
return cout << "{ " << embree_endl
430
<< " org = " << ray.org << embree_endl
431
<< " dir = " << ray.dir << embree_endl
432
<< " near = " << ray.tnear() << embree_endl
433
<< " far = " << ray.tfar << embree_endl
434
<< " time = " << ray.time() << embree_endl
435
<< " mask = " << ray.mask << embree_endl
436
<< " id = " << ray.id << embree_endl
437
<< " flags = " << ray.flags << embree_endl
438
<< "}";
439
}
440
441
template<int K>
442
__forceinline embree_ostream operator <<(embree_ostream cout, const RayHitK<K>& ray)
443
{
444
cout << "{ " << embree_endl
445
<< " org = " << ray.org << embree_endl
446
<< " dir = " << ray.dir << embree_endl
447
<< " near = " << ray.tnear() << embree_endl
448
<< " far = " << ray.tfar << embree_endl
449
<< " time = " << ray.time() << embree_endl
450
<< " mask = " << ray.mask << embree_endl
451
<< " id = " << ray.id << embree_endl
452
<< " flags = " << ray.flags << embree_endl
453
<< " Ng = " << ray.Ng
454
<< " u = " << ray.u << embree_endl
455
<< " v = " << ray.v << embree_endl
456
<< " primID = " << ray.primID << embree_endl
457
<< " geomID = " << ray.geomID << embree_endl
458
<< " instID =";
459
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
460
{
461
cout << " " << ray.instID[l];
462
}
463
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
464
cout << " instPrimID =";
465
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
466
{
467
cout << " " << ray.instPrimID[l];
468
}
469
#endif
470
cout << embree_endl;
471
return cout << "}";
472
}
473
474
struct RayStreamSOA
475
{
476
__forceinline RayStreamSOA(void* rays, size_t N)
477
: ptr((char*)rays), N(N) {}
478
479
/* ray data access functions */
480
__forceinline float* org_x(size_t offset = 0) { return (float*)&ptr[0*4*N+offset]; } // x coordinate of ray origin
481
__forceinline float* org_y(size_t offset = 0) { return (float*)&ptr[1*4*N+offset]; } // y coordinate of ray origin
482
__forceinline float* org_z(size_t offset = 0) { return (float*)&ptr[2*4*N+offset]; }; // z coordinate of ray origin
483
__forceinline float* tnear(size_t offset = 0) { return (float*)&ptr[3*4*N+offset]; }; // start of ray segment
484
485
__forceinline float* dir_x(size_t offset = 0) { return (float*)&ptr[4*4*N+offset]; }; // x coordinate of ray direction
486
__forceinline float* dir_y(size_t offset = 0) { return (float*)&ptr[5*4*N+offset]; }; // y coordinate of ray direction
487
__forceinline float* dir_z(size_t offset = 0) { return (float*)&ptr[6*4*N+offset]; }; // z coordinate of ray direction
488
__forceinline float* time (size_t offset = 0) { return (float*)&ptr[7*4*N+offset]; }; // time of this ray for motion blur
489
490
__forceinline float* tfar (size_t offset = 0) { return (float*)&ptr[8*4*N+offset]; }; // end of ray segment (set to hit distance)
491
__forceinline int* mask (size_t offset = 0) { return (int*)&ptr[9*4*N+offset]; }; // used to mask out objects during traversal (optional)
492
__forceinline int* id (size_t offset = 0) { return (int*)&ptr[10*4*N+offset]; }; // id
493
__forceinline int* flags(size_t offset = 0) { return (int*)&ptr[11*4*N+offset]; }; // flags
494
495
/* hit data access functions */
496
__forceinline float* Ng_x(size_t offset = 0) { return (float*)&ptr[12*4*N+offset]; }; // x coordinate of geometry normal
497
__forceinline float* Ng_y(size_t offset = 0) { return (float*)&ptr[13*4*N+offset]; }; // y coordinate of geometry normal
498
__forceinline float* Ng_z(size_t offset = 0) { return (float*)&ptr[14*4*N+offset]; }; // z coordinate of geometry normal
499
500
__forceinline float* u(size_t offset = 0) { return (float*)&ptr[15*4*N+offset]; }; // barycentric u coordinate of hit
501
__forceinline float* v(size_t offset = 0) { return (float*)&ptr[16*4*N+offset]; }; // barycentric v coordinate of hit
502
503
__forceinline unsigned int* primID(size_t offset = 0) { return (unsigned int*)&ptr[17*4*N+offset]; }; // primitive ID
504
__forceinline unsigned int* geomID(size_t offset = 0) { return (unsigned int*)&ptr[18*4*N+offset]; }; // geometry ID
505
__forceinline unsigned int* instID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+level*4*N+offset]; }; // instance ID
506
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
507
__forceinline unsigned int* instPrimID(size_t level, size_t offset = 0) { return (unsigned int*)&ptr[19*4*N+RTC_MAX_INSTANCE_LEVEL_COUNT*4*N+level*4*N+offset]; }; // instance primitive ID
508
#endif
509
510
__forceinline Ray getRayByOffset(size_t offset)
511
{
512
Ray ray;
513
ray.org.x = org_x(offset)[0];
514
ray.org.y = org_y(offset)[0];
515
ray.org.z = org_z(offset)[0];
516
ray.tnear() = tnear(offset)[0];
517
ray.dir.x = dir_x(offset)[0];
518
ray.dir.y = dir_y(offset)[0];
519
ray.dir.z = dir_z(offset)[0];
520
ray.time() = time(offset)[0];
521
ray.tfar = tfar(offset)[0];
522
ray.mask = mask(offset)[0];
523
ray.id = id(offset)[0];
524
ray.flags = flags(offset)[0];
525
return ray;
526
}
527
528
template<int K>
529
__forceinline RayK<K> getRayByOffset(size_t offset)
530
{
531
RayK<K> ray;
532
ray.org.x = vfloat<K>::loadu(org_x(offset));
533
ray.org.y = vfloat<K>::loadu(org_y(offset));
534
ray.org.z = vfloat<K>::loadu(org_z(offset));
535
ray.tnear = vfloat<K>::loadu(tnear(offset));
536
ray.dir.x = vfloat<K>::loadu(dir_x(offset));
537
ray.dir.y = vfloat<K>::loadu(dir_y(offset));
538
ray.dir.z = vfloat<K>::loadu(dir_z(offset));
539
ray.time = vfloat<K>::loadu(time(offset));
540
ray.tfar = vfloat<K>::loadu(tfar(offset));
541
ray.mask = vint<K>::loadu(mask(offset));
542
ray.id = vint<K>::loadu(id(offset));
543
ray.flags = vint<K>::loadu(flags(offset));
544
return ray;
545
}
546
547
template<int K>
548
__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
549
{
550
RayK<K> ray;
551
ray.org.x = vfloat<K>::loadu(valid, org_x(offset));
552
ray.org.y = vfloat<K>::loadu(valid, org_y(offset));
553
ray.org.z = vfloat<K>::loadu(valid, org_z(offset));
554
ray.tnear() = vfloat<K>::loadu(valid, tnear(offset));
555
ray.dir.x = vfloat<K>::loadu(valid, dir_x(offset));
556
ray.dir.y = vfloat<K>::loadu(valid, dir_y(offset));
557
ray.dir.z = vfloat<K>::loadu(valid, dir_z(offset));
558
ray.time() = vfloat<K>::loadu(valid, time(offset));
559
ray.tfar = vfloat<K>::loadu(valid, tfar(offset));
560
561
#if !defined(__AVX__)
562
/* SSE: some ray members must be loaded with scalar instructions to ensure that we don't cause memory faults,
563
because the SSE masked loads always access the entire vector */
564
if (unlikely(!all(valid)))
565
{
566
ray.mask = zero;
567
ray.id = zero;
568
ray.flags = zero;
569
570
for (size_t k = 0; k < K; k++)
571
{
572
if (likely(valid[k]))
573
{
574
ray.mask[k] = mask(offset)[k];
575
ray.id[k] = id(offset)[k];
576
ray.flags[k] = flags(offset)[k];
577
}
578
}
579
}
580
else
581
#endif
582
{
583
ray.mask = vint<K>::loadu(valid, mask(offset));
584
ray.id = vint<K>::loadu(valid, id(offset));
585
ray.flags = vint<K>::loadu(valid, flags(offset));
586
}
587
588
return ray;
589
}
590
591
template<int K>
592
__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
593
{
594
/*
595
* valid_i: stores which of the input rays exist (do not access nonexistent rays!)
596
* valid: stores which of the rays actually hit something.
597
*/
598
vbool<K> valid = valid_i;
599
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
600
601
if (likely(any(valid)))
602
{
603
vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
604
vfloat<K>::storeu(valid, Ng_x(offset), ray.Ng.x);
605
vfloat<K>::storeu(valid, Ng_y(offset), ray.Ng.y);
606
vfloat<K>::storeu(valid, Ng_z(offset), ray.Ng.z);
607
vfloat<K>::storeu(valid, u(offset), ray.u);
608
vfloat<K>::storeu(valid, v(offset), ray.v);
609
610
#if !defined(__AVX__)
611
/* SSE: some ray members must be stored with scalar instructions to ensure that we don't cause memory faults,
612
because the SSE masked stores always access the entire vector */
613
if (unlikely(!all(valid_i)))
614
{
615
for (size_t k = 0; k < K; k++)
616
{
617
if (likely(valid[k]))
618
{
619
primID(offset)[k] = ray.primID[k];
620
geomID(offset)[k] = ray.geomID[k];
621
622
instID(0, offset)[k] = ray.instID[0][k];
623
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
624
instPrimID(0, offset)[k] = ray.instPrimID[0][k];
625
#endif
626
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
627
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
628
instID(l, offset)[k] = ray.instID[l][k];
629
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
630
instPrimID(l, offset)[k] = ray.instPrimID[l][k];
631
#endif
632
}
633
#endif
634
}
635
}
636
}
637
else
638
#endif
639
{
640
vuint<K>::storeu(valid, primID(offset), ray.primID);
641
vuint<K>::storeu(valid, geomID(offset), ray.geomID);
642
643
vuint<K>::storeu(valid, instID(0, offset), ray.instID[0]);
644
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
645
vuint<K>::storeu(valid, instPrimID(0, offset), ray.instPrimID[0]);
646
#endif
647
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
648
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
649
vuint<K>::storeu(valid, instID(l, offset), ray.instID[l]);
650
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
651
vuint<K>::storeu(valid, instPrimID(l, offset), ray.instPrimID[l]);
652
#endif
653
}
654
#endif
655
}
656
}
657
}
658
659
template<int K>
660
__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
661
{
662
vbool<K> valid = valid_i;
663
valid &= (ray.tfar < 0.0f);
664
665
if (likely(any(valid)))
666
vfloat<K>::storeu(valid, tfar(offset), ray.tfar);
667
}
668
669
__forceinline size_t getOctantByOffset(size_t offset)
670
{
671
const float dx = dir_x(offset)[0];
672
const float dy = dir_y(offset)[0];
673
const float dz = dir_z(offset)[0];
674
const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
675
return octantID;
676
}
677
678
__forceinline bool isValidByOffset(size_t offset)
679
{
680
const float nnear = tnear(offset)[0];
681
const float ffar = tfar(offset)[0];
682
return nnear <= ffar;
683
}
684
685
template<int K>
686
__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
687
{
688
RayK<K> ray;
689
690
#if defined(__AVX2__)
691
ray.org.x = vfloat<K>::template gather<1>(valid, org_x(), offset);
692
ray.org.y = vfloat<K>::template gather<1>(valid, org_y(), offset);
693
ray.org.z = vfloat<K>::template gather<1>(valid, org_z(), offset);
694
ray.tnear() = vfloat<K>::template gather<1>(valid, tnear(), offset);
695
ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x(), offset);
696
ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y(), offset);
697
ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z(), offset);
698
ray.time() = vfloat<K>::template gather<1>(valid, time(), offset);
699
ray.tfar = vfloat<K>::template gather<1>(valid, tfar(), offset);
700
ray.mask = vint<K>::template gather<1>(valid, mask(), offset);
701
ray.id = vint<K>::template gather<1>(valid, id(), offset);
702
ray.flags = vint<K>::template gather<1>(valid, flags(), offset);
703
#else
704
ray.org = zero;
705
ray.tnear() = zero;
706
ray.dir = zero;
707
ray.time() = zero;
708
ray.tfar = zero;
709
ray.mask = zero;
710
ray.id = zero;
711
ray.flags = zero;
712
713
for (size_t k = 0; k < K; k++)
714
{
715
if (likely(valid[k]))
716
{
717
const size_t ofs = offset[k];
718
719
ray.org.x[k] = *org_x(ofs);
720
ray.org.y[k] = *org_y(ofs);
721
ray.org.z[k] = *org_z(ofs);
722
ray.tnear()[k] = *tnear(ofs);
723
ray.dir.x[k] = *dir_x(ofs);
724
ray.dir.y[k] = *dir_y(ofs);
725
ray.dir.z[k] = *dir_z(ofs);
726
ray.time()[k] = *time(ofs);
727
ray.tfar[k] = *tfar(ofs);
728
ray.mask[k] = *mask(ofs);
729
ray.id[k] = *id(ofs);
730
ray.flags[k] = *flags(ofs);
731
}
732
}
733
#endif
734
735
return ray;
736
}
737
738
template<int K>
739
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
740
{
741
vbool<K> valid = valid_i;
742
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
743
744
if (likely(any(valid)))
745
{
746
#if defined(__AVX512F__)
747
vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
748
vfloat<K>::template scatter<1>(valid, Ng_x(), offset, ray.Ng.x);
749
vfloat<K>::template scatter<1>(valid, Ng_y(), offset, ray.Ng.y);
750
vfloat<K>::template scatter<1>(valid, Ng_z(), offset, ray.Ng.z);
751
vfloat<K>::template scatter<1>(valid, u(), offset, ray.u);
752
vfloat<K>::template scatter<1>(valid, v(), offset, ray.v);
753
vuint<K>::template scatter<1>(valid, primID(), offset, ray.primID);
754
vuint<K>::template scatter<1>(valid, geomID(), offset, ray.geomID);
755
756
vuint<K>::template scatter<1>(valid, instID(0), offset, ray.instID[0]);
757
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
758
vuint<K>::template scatter<1>(valid, instPrimID(0), offset, ray.instPrimID[0]);
759
#endif
760
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
761
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
762
vuint<K>::template scatter<1>(valid, instID(l), offset, ray.instID[l]);
763
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
764
vuint<K>::template scatter<1>(valid, instPrimID(l), offset, ray.instPrimID[l]);
765
#endif
766
}
767
#endif
768
#else
769
size_t valid_bits = movemask(valid);
770
while (valid_bits != 0)
771
{
772
const size_t k = bscf(valid_bits);
773
const size_t ofs = offset[k];
774
775
*tfar(ofs) = ray.tfar[k];
776
777
*Ng_x(ofs) = ray.Ng.x[k];
778
*Ng_y(ofs) = ray.Ng.y[k];
779
*Ng_z(ofs) = ray.Ng.z[k];
780
*u(ofs) = ray.u[k];
781
*v(ofs) = ray.v[k];
782
*primID(ofs) = ray.primID[k];
783
*geomID(ofs) = ray.geomID[k];
784
785
*instID(0, ofs) = ray.instID[0][k];
786
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
787
*instPrimID(0, ofs) = ray.instPrimID[0][k];
788
#endif
789
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
790
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
791
*instID(l, ofs) = ray.instID[l][k];
792
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
793
*instPrimID(l, ofs) = ray.instPrimID[l][k];
794
#endif
795
}
796
#endif
797
}
798
#endif
799
}
800
}
801
802
template<int K>
803
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
804
{
805
vbool<K> valid = valid_i;
806
valid &= (ray.tfar < 0.0f);
807
808
if (likely(any(valid)))
809
{
810
#if defined(__AVX512F__)
811
vfloat<K>::template scatter<1>(valid, tfar(), offset, ray.tfar);
812
#else
813
size_t valid_bits = movemask(valid);
814
while (valid_bits != 0)
815
{
816
const size_t k = bscf(valid_bits);
817
const size_t ofs = offset[k];
818
819
*tfar(ofs) = ray.tfar[k];
820
}
821
#endif
822
}
823
}
824
825
char* __restrict__ ptr;
826
size_t N;
827
};
828
829
template<size_t MAX_K>
830
struct StackRayStreamSOA : public RayStreamSOA
831
{
832
__forceinline StackRayStreamSOA(size_t K)
833
: RayStreamSOA(data, K) { assert(K <= MAX_K); }
834
835
char data[MAX_K / 4 * sizeof(RayHit4)];
836
};
837
838
839
struct RayStreamSOP
840
{
841
template<class T>
842
__forceinline void init(T& t)
843
{
844
org_x = (float*)&t.org.x;
845
org_y = (float*)&t.org.y;
846
org_z = (float*)&t.org.z;
847
tnear = (float*)&t.tnear;
848
dir_x = (float*)&t.dir.x;
849
dir_y = (float*)&t.dir.y;
850
dir_z = (float*)&t.dir.z;
851
time = (float*)&t.time;
852
tfar = (float*)&t.tfar;
853
mask = (unsigned int*)&t.mask;
854
id = (unsigned int*)&t.id;
855
flags = (unsigned int*)&t.flags;
856
857
Ng_x = (float*)&t.Ng.x;
858
Ng_y = (float*)&t.Ng.y;
859
Ng_z = (float*)&t.Ng.z;
860
u = (float*)&t.u;
861
v = (float*)&t.v;
862
primID = (unsigned int*)&t.primID;
863
geomID = (unsigned int*)&t.geomID;
864
865
for (unsigned l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
866
instID[l] = (unsigned int*)&t.instID[l];
867
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
868
instPrimID[l] = (unsigned int*)&t.instPrimID[l];
869
#endif
870
}
871
}
872
873
__forceinline Ray getRayByOffset(size_t offset)
874
{
875
Ray ray;
876
ray.org.x = *(float* __restrict__)((char*)org_x + offset);
877
ray.org.y = *(float* __restrict__)((char*)org_y + offset);
878
ray.org.z = *(float* __restrict__)((char*)org_z + offset);
879
ray.dir.x = *(float* __restrict__)((char*)dir_x + offset);
880
ray.dir.y = *(float* __restrict__)((char*)dir_y + offset);
881
ray.dir.z = *(float* __restrict__)((char*)dir_z + offset);
882
ray.tfar = *(float* __restrict__)((char*)tfar + offset);
883
ray.tnear() = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
884
ray.time() = time ? *(float* __restrict__)((char*)time + offset) : 0.0f;
885
ray.mask = mask ? *(unsigned int* __restrict__)((char*)mask + offset) : -1;
886
ray.id = id ? *(unsigned int* __restrict__)((char*)id + offset) : -1;
887
ray.flags = flags ? *(unsigned int* __restrict__)((char*)flags + offset) : -1;
888
return ray;
889
}
890
891
template<int K>
892
__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, size_t offset)
893
{
894
RayK<K> ray;
895
ray.org.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_x + offset));
896
ray.org.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_y + offset));
897
ray.org.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)org_z + offset));
898
ray.dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
899
ray.dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
900
ray.dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
901
ray.tfar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
902
ray.tnear() = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
903
ray.time() = time ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)time + offset)) : 0.0f;
904
ray.mask = mask ? vint<K>::loadu(valid, (const void* __restrict__)((char*)mask + offset)) : -1;
905
ray.id = id ? vint<K>::loadu(valid, (const void* __restrict__)((char*)id + offset)) : -1;
906
ray.flags = flags ? vint<K>::loadu(valid, (const void* __restrict__)((char*)flags + offset)) : -1;
907
return ray;
908
}
909
910
template<int K>
911
__forceinline Vec3vf<K> getDirByOffset(const vbool<K>& valid, size_t offset)
912
{
913
Vec3vf<K> dir;
914
dir.x = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_x + offset));
915
dir.y = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_y + offset));
916
dir.z = vfloat<K>::loadu(valid, (float* __restrict__)((char*)dir_z + offset));
917
return dir;
918
}
919
920
__forceinline void setHitByOffset(size_t offset, const RayHit& ray)
921
{
922
if (ray.geomID != RTC_INVALID_GEOMETRY_ID)
923
{
924
*(float* __restrict__)((char*)tfar + offset) = ray.tfar;
925
926
if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + offset) = ray.Ng.x;
927
if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + offset) = ray.Ng.y;
928
if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + offset) = ray.Ng.z;
929
*(float* __restrict__)((char*)u + offset) = ray.u;
930
*(float* __restrict__)((char*)v + offset) = ray.v;
931
*(unsigned int* __restrict__)((char*)geomID + offset) = ray.geomID;
932
*(unsigned int* __restrict__)((char*)primID + offset) = ray.primID;
933
934
if (likely(instID[0])) {
935
*(unsigned int* __restrict__)((char*)instID[0] + offset) = ray.instID[0];
936
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
937
*(unsigned int* __restrict__)((char*)instPrimID[0] + offset) = ray.instPrimID[0];
938
#endif
939
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
940
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID; ++l) {
941
*(unsigned int* __restrict__)((char*)instID[l] + offset) = ray.instID[l];
942
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
943
*(unsigned int* __restrict__)((char*)instPrimID[l] + offset) = ray.instPrimID[l];
944
#endif
945
}
946
#endif
947
}
948
}
949
}
950
951
__forceinline void setHitByOffset(size_t offset, const Ray& ray)
952
{
953
*(float* __restrict__)((char*)tfar + offset) = ray.tfar;
954
}
955
956
template<int K>
957
__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayHitK<K>& ray)
958
{
959
vbool<K> valid = valid_i;
960
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
961
962
if (likely(any(valid)))
963
{
964
vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
965
966
if (likely(Ng_x)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_x + offset), ray.Ng.x);
967
if (likely(Ng_y)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_y + offset), ray.Ng.y);
968
if (likely(Ng_z)) vfloat<K>::storeu(valid, (float* __restrict__)((char*)Ng_z + offset), ray.Ng.z);
969
vfloat<K>::storeu(valid, (float* __restrict__)((char*)u + offset), ray.u);
970
vfloat<K>::storeu(valid, (float* __restrict__)((char*)v + offset), ray.v);
971
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)primID + offset), ray.primID);
972
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)geomID + offset), ray.geomID);
973
974
if (likely(instID[0])) {
975
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[0] + offset), ray.instID[0]);
976
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
977
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[0] + offset), ray.instPrimID[0]);
978
#endif
979
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
980
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
981
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instID[l] + offset), ray.instID[l]);
982
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
983
vuint<K>::storeu(valid, (unsigned int* __restrict__)((char*)instPrimID[l] + offset), ray.instPrimID[l]);
984
#endif
985
}
986
#endif
987
}
988
}
989
}
990
991
template<int K>
992
__forceinline void setHitByOffset(const vbool<K>& valid_i, size_t offset, const RayK<K>& ray)
993
{
994
vbool<K> valid = valid_i;
995
valid &= (ray.tfar < 0.0f);
996
997
if (likely(any(valid)))
998
vfloat<K>::storeu(valid, (float* __restrict__)((char*)tfar + offset), ray.tfar);
999
}
1000
1001
__forceinline size_t getOctantByOffset(size_t offset)
1002
{
1003
const float dx = *(float* __restrict__)((char*)dir_x + offset);
1004
const float dy = *(float* __restrict__)((char*)dir_y + offset);
1005
const float dz = *(float* __restrict__)((char*)dir_z + offset);
1006
const size_t octantID = (dx < 0.0f ? 1 : 0) + (dy < 0.0f ? 2 : 0) + (dz < 0.0f ? 4 : 0);
1007
return octantID;
1008
}
1009
1010
__forceinline bool isValidByOffset(size_t offset)
1011
{
1012
const float nnear = tnear ? *(float* __restrict__)((char*)tnear + offset) : 0.0f;
1013
const float ffar = *(float* __restrict__)((char*)tfar + offset);
1014
return nnear <= ffar;
1015
}
1016
1017
template<int K>
1018
__forceinline vbool<K> isValidByOffset(const vbool<K>& valid, size_t offset)
1019
{
1020
const vfloat<K> nnear = tnear ? vfloat<K>::loadu(valid, (float* __restrict__)((char*)tnear + offset)) : 0.0f;
1021
const vfloat<K> ffar = vfloat<K>::loadu(valid, (float* __restrict__)((char*)tfar + offset));
1022
return nnear <= ffar;
1023
}
1024
1025
template<int K>
1026
__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
1027
{
1028
RayK<K> ray;
1029
1030
#if defined(__AVX2__)
1031
ray.org.x = vfloat<K>::template gather<1>(valid, org_x, offset);
1032
ray.org.y = vfloat<K>::template gather<1>(valid, org_y, offset);
1033
ray.org.z = vfloat<K>::template gather<1>(valid, org_z, offset);
1034
ray.dir.x = vfloat<K>::template gather<1>(valid, dir_x, offset);
1035
ray.dir.y = vfloat<K>::template gather<1>(valid, dir_y, offset);
1036
ray.dir.z = vfloat<K>::template gather<1>(valid, dir_z, offset);
1037
ray.tfar = vfloat<K>::template gather<1>(valid, tfar, offset);
1038
ray.tnear() = tnear ? vfloat<K>::template gather<1>(valid, tnear, offset) : vfloat<K>(zero);
1039
ray.time() = time ? vfloat<K>::template gather<1>(valid, time, offset) : vfloat<K>(zero);
1040
ray.mask = mask ? vint<K>::template gather<1>(valid, (int*)mask, offset) : vint<K>(-1);
1041
ray.id = id ? vint<K>::template gather<1>(valid, (int*)id, offset) : vint<K>(-1);
1042
ray.flags = flags ? vint<K>::template gather<1>(valid, (int*)flags, offset) : vint<K>(-1);
1043
#else
1044
ray.org = zero;
1045
ray.tnear() = zero;
1046
ray.dir = zero;
1047
ray.tfar = zero;
1048
ray.time() = zero;
1049
ray.mask = zero;
1050
ray.id = zero;
1051
ray.flags = zero;
1052
1053
for (size_t k = 0; k < K; k++)
1054
{
1055
if (likely(valid[k]))
1056
{
1057
const size_t ofs = offset[k];
1058
1059
ray.org.x[k] = *(float* __restrict__)((char*)org_x + ofs);
1060
ray.org.y[k] = *(float* __restrict__)((char*)org_y + ofs);
1061
ray.org.z[k] = *(float* __restrict__)((char*)org_z + ofs);
1062
ray.dir.x[k] = *(float* __restrict__)((char*)dir_x + ofs);
1063
ray.dir.y[k] = *(float* __restrict__)((char*)dir_y + ofs);
1064
ray.dir.z[k] = *(float* __restrict__)((char*)dir_z + ofs);
1065
ray.tfar[k] = *(float* __restrict__)((char*)tfar + ofs);
1066
ray.tnear()[k] = tnear ? *(float* __restrict__)((char*)tnear + ofs) : 0.0f;
1067
ray.time()[k] = time ? *(float* __restrict__)((char*)time + ofs) : 0.0f;
1068
ray.mask[k] = mask ? *(int* __restrict__)((char*)mask + ofs) : -1;
1069
ray.id[k] = id ? *(int* __restrict__)((char*)id + ofs) : -1;
1070
ray.flags[k] = flags ? *(int* __restrict__)((char*)flags + ofs) : -1;
1071
}
1072
}
1073
#endif
1074
1075
return ray;
1076
}
1077
1078
template<int K>
1079
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
1080
{
1081
vbool<K> valid = valid_i;
1082
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
1083
1084
if (likely(any(valid)))
1085
{
1086
#if defined(__AVX512F__)
1087
vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
1088
1089
if (likely(Ng_x)) vfloat<K>::template scatter<1>(valid, Ng_x, offset, ray.Ng.x);
1090
if (likely(Ng_y)) vfloat<K>::template scatter<1>(valid, Ng_y, offset, ray.Ng.y);
1091
if (likely(Ng_z)) vfloat<K>::template scatter<1>(valid, Ng_z, offset, ray.Ng.z);
1092
vfloat<K>::template scatter<1>(valid, u, offset, ray.u);
1093
vfloat<K>::template scatter<1>(valid, v, offset, ray.v);
1094
vuint<K>::template scatter<1>(valid, (unsigned int*)geomID, offset, ray.geomID);
1095
vuint<K>::template scatter<1>(valid, (unsigned int*)primID, offset, ray.primID);
1096
1097
if (likely(instID[0])) {
1098
vuint<K>::template scatter<1>(valid, (unsigned int*)instID[0], offset, ray.instID[0]);
1099
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1100
vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[0], offset, ray.instPrimID[0]);
1101
#endif
1102
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1103
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
1104
vuint<K>::template scatter<1>(valid, (unsigned int*)instID[l], offset, ray.instID[l]);
1105
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1106
vuint<K>::template scatter<1>(valid, (unsigned int*)instPrimID[l], offset, ray.instPrimID[l]);
1107
#endif
1108
}
1109
#endif
1110
}
1111
#else
1112
size_t valid_bits = movemask(valid);
1113
while (valid_bits != 0)
1114
{
1115
const size_t k = bscf(valid_bits);
1116
const size_t ofs = offset[k];
1117
1118
*(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
1119
1120
if (likely(Ng_x)) *(float* __restrict__)((char*)Ng_x + ofs) = ray.Ng.x[k];
1121
if (likely(Ng_y)) *(float* __restrict__)((char*)Ng_y + ofs) = ray.Ng.y[k];
1122
if (likely(Ng_z)) *(float* __restrict__)((char*)Ng_z + ofs) = ray.Ng.z[k];
1123
*(float* __restrict__)((char*)u + ofs) = ray.u[k];
1124
*(float* __restrict__)((char*)v + ofs) = ray.v[k];
1125
*(unsigned int* __restrict__)((char*)primID + ofs) = ray.primID[k];
1126
*(unsigned int* __restrict__)((char*)geomID + ofs) = ray.geomID[k];
1127
1128
if (likely(instID[0])) {
1129
*(unsigned int* __restrict__)((char*)instID[0] + ofs) = ray.instID[0][k];
1130
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1131
*(unsigned int* __restrict__)((char*)instPrimID[0] + ofs) = ray.instPrimID[0][k];
1132
#endif
1133
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1134
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && ray.instID[l-1][k] != RTC_INVALID_GEOMETRY_ID; ++l) {
1135
*(unsigned int* __restrict__)((char*)instID[l] + ofs) = ray.instID[l][k];
1136
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1137
*(unsigned int* __restrict__)((char*)instPrimID[l] + ofs) = ray.instPrimID[l][k];
1138
#endif
1139
}
1140
#endif
1141
}
1142
}
1143
#endif
1144
}
1145
}
1146
1147
template<int K>
1148
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
1149
{
1150
vbool<K> valid = valid_i;
1151
valid &= (ray.tfar < 0.0f);
1152
1153
if (likely(any(valid)))
1154
{
1155
#if defined(__AVX512F__)
1156
vfloat<K>::template scatter<1>(valid, tfar, offset, ray.tfar);
1157
#else
1158
size_t valid_bits = movemask(valid);
1159
while (valid_bits != 0)
1160
{
1161
const size_t k = bscf(valid_bits);
1162
const size_t ofs = offset[k];
1163
1164
*(float* __restrict__)((char*)tfar + ofs) = ray.tfar[k];
1165
}
1166
#endif
1167
}
1168
}
1169
1170
/* ray data */
1171
float* __restrict__ org_x; // x coordinate of ray origin
1172
float* __restrict__ org_y; // y coordinate of ray origin
1173
float* __restrict__ org_z; // z coordinate of ray origin
1174
float* __restrict__ tnear; // start of ray segment (optional)
1175
1176
float* __restrict__ dir_x; // x coordinate of ray direction
1177
float* __restrict__ dir_y; // y coordinate of ray direction
1178
float* __restrict__ dir_z; // z coordinate of ray direction
1179
float* __restrict__ time; // time of this ray for motion blur (optional)
1180
1181
float* __restrict__ tfar; // end of ray segment (set to hit distance)
1182
unsigned int* __restrict__ mask; // used to mask out objects during traversal (optional)
1183
unsigned int* __restrict__ id; // ray ID
1184
unsigned int* __restrict__ flags; // ray flags
1185
1186
/* hit data */
1187
float* __restrict__ Ng_x; // x coordinate of geometry normal (optional)
1188
float* __restrict__ Ng_y; // y coordinate of geometry normal (optional)
1189
float* __restrict__ Ng_z; // z coordinate of geometry normal (optional)
1190
1191
float* __restrict__ u; // barycentric u coordinate of hit
1192
float* __restrict__ v; // barycentric v coordinate of hit
1193
1194
unsigned int* __restrict__ primID; // primitive ID
1195
unsigned int* __restrict__ geomID; // geometry ID
1196
unsigned int* __restrict__ instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
1197
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1198
unsigned int* __restrict__ instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID (optional)
1199
#endif
1200
};
1201
1202
1203
struct RayStreamAOS
1204
{
1205
__forceinline RayStreamAOS(void* rays)
1206
: ptr((Ray*)rays) {}
1207
1208
__forceinline Ray& getRayByOffset(size_t offset)
1209
{
1210
return *(Ray*)((char*)ptr + offset);
1211
}
1212
1213
template<int K>
1214
__forceinline RayK<K> getRayByOffset(const vint<K>& offset);
1215
1216
template<int K>
1217
__forceinline RayK<K> getRayByOffset(const vbool<K>& valid, const vint<K>& offset)
1218
{
1219
const vint<K> valid_offset = select(valid, offset, vintx(zero));
1220
return getRayByOffset<K>(valid_offset);
1221
}
1222
1223
template<int K>
1224
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayHitK<K>& ray)
1225
{
1226
vbool<K> valid = valid_i;
1227
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
1228
1229
if (likely(any(valid)))
1230
{
1231
#if defined(__AVX512F__)
1232
vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
1233
vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.x, offset, ray.Ng.x);
1234
vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.y, offset, ray.Ng.y);
1235
vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->Ng.z, offset, ray.Ng.z);
1236
vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->u, offset, ray.u);
1237
vfloat<K>::template scatter<1>(valid, &((RayHit*)ptr)->v, offset, ray.v);
1238
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->primID, offset, ray.primID);
1239
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->geomID, offset, ray.geomID);
1240
1241
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[0], offset, ray.instID[0]);
1242
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1243
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[0], offset, ray.instPrimID[0]);
1244
#endif
1245
#if (RTC_MAX_INSTANCE_LEVEL_COUNT > 1)
1246
for (unsigned l = 1; l < RTC_MAX_INSTANCE_LEVEL_COUNT && any(valid & (ray.instID[l-1] != RTC_INVALID_GEOMETRY_ID)); ++l) {
1247
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instID[l], offset, ray.instID[l]);
1248
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1249
vuint<K>::template scatter<1>(valid, (unsigned int*)&((RayHit*)ptr)->instPrimID[l], offset, ray.instPrimID[l]);
1250
#endif
1251
}
1252
#endif
1253
#else
1254
size_t valid_bits = movemask(valid);
1255
while (valid_bits != 0)
1256
{
1257
const size_t k = bscf(valid_bits);
1258
RayHit* __restrict__ ray_k = (RayHit*)((char*)ptr + offset[k]);
1259
ray_k->tfar = ray.tfar[k];
1260
ray_k->Ng.x = ray.Ng.x[k];
1261
ray_k->Ng.y = ray.Ng.y[k];
1262
ray_k->Ng.z = ray.Ng.z[k];
1263
ray_k->u = ray.u[k];
1264
ray_k->v = ray.v[k];
1265
ray_k->primID = ray.primID[k];
1266
ray_k->geomID = ray.geomID[k];
1267
1268
instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
1269
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1270
instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);
1271
#endif
1272
}
1273
#endif
1274
}
1275
}
1276
1277
template<int K>
1278
__forceinline void setHitByOffset(const vbool<K>& valid_i, const vint<K>& offset, const RayK<K>& ray)
1279
{
1280
vbool<K> valid = valid_i;
1281
valid &= (ray.tfar < 0.0f);
1282
1283
if (likely(any(valid)))
1284
{
1285
#if defined(__AVX512F__)
1286
vfloat<K>::template scatter<1>(valid, &ptr->tfar, offset, ray.tfar);
1287
#else
1288
size_t valid_bits = movemask(valid);
1289
while (valid_bits != 0)
1290
{
1291
const size_t k = bscf(valid_bits);
1292
Ray* __restrict__ ray_k = (Ray*)((char*)ptr + offset[k]);
1293
ray_k->tfar = ray.tfar[k];
1294
}
1295
#endif
1296
}
1297
}
1298
1299
Ray* __restrict__ ptr;
1300
};
1301
1302
template<>
1303
__forceinline Ray4 RayStreamAOS::getRayByOffset<4>(const vint4& offset)
1304
{
1305
Ray4 ray;
1306
1307
/* load and transpose: org.x, org.y, org.z, tnear */
1308
const vfloat4 a0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
1309
const vfloat4 a1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
1310
const vfloat4 a2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
1311
const vfloat4 a3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
1312
1313
transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
1314
1315
/* load and transpose: dir.x, dir.y, dir.z, time */
1316
const vfloat4 b0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->dir);
1317
const vfloat4 b1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->dir);
1318
const vfloat4 b2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->dir);
1319
const vfloat4 b3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->dir);
1320
1321
transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1322
1323
/* load and transpose: tfar, mask, id, flags */
1324
const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
1325
const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
1326
const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
1327
const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
1328
1329
vfloat4 maskf, idf, flagsf;
1330
transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
1331
ray.mask = asInt(maskf);
1332
ray.id = asInt(idf);
1333
ray.flags = asInt(flagsf);
1334
1335
return ray;
1336
}
1337
1338
#if defined(__AVX__)
1339
template<>
1340
__forceinline Ray8 RayStreamAOS::getRayByOffset<8>(const vint8& offset)
1341
{
1342
Ray8 ray;
1343
1344
/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1345
const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[0]))->org);
1346
const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[1]))->org);
1347
const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[2]))->org);
1348
const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[3]))->org);
1349
const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[4]))->org);
1350
const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[5]))->org);
1351
const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[6]))->org);
1352
const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[7]))->org);
1353
1354
transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1355
1356
/* load and transpose: tfar, mask, id, flags */
1357
const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[0]))->tfar);
1358
const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[1]))->tfar);
1359
const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[2]))->tfar);
1360
const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[3]))->tfar);
1361
const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[4]))->tfar);
1362
const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[5]))->tfar);
1363
const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[6]))->tfar);
1364
const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[7]))->tfar);
1365
1366
vfloat8 maskf, idf, flagsf;
1367
transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
1368
ray.mask = asInt(maskf);
1369
ray.id = asInt(idf);
1370
ray.flags = asInt(flagsf);
1371
1372
return ray;
1373
}
1374
#endif
1375
1376
#if defined(__AVX512F__)
1377
template<>
1378
__forceinline Ray16 RayStreamAOS::getRayByOffset<16>(const vint16& offset)
1379
{
1380
Ray16 ray;
1381
1382
/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1383
const vfloat8 ab0 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 0]))->org);
1384
const vfloat8 ab1 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 1]))->org);
1385
const vfloat8 ab2 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 2]))->org);
1386
const vfloat8 ab3 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 3]))->org);
1387
const vfloat8 ab4 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 4]))->org);
1388
const vfloat8 ab5 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 5]))->org);
1389
const vfloat8 ab6 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 6]))->org);
1390
const vfloat8 ab7 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 7]))->org);
1391
const vfloat8 ab8 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 8]))->org);
1392
const vfloat8 ab9 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[ 9]))->org);
1393
const vfloat8 ab10 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[10]))->org);
1394
const vfloat8 ab11 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[11]))->org);
1395
const vfloat8 ab12 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[12]))->org);
1396
const vfloat8 ab13 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[13]))->org);
1397
const vfloat8 ab14 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[14]))->org);
1398
const vfloat8 ab15 = vfloat8::loadu(&((Ray*)((char*)ptr + offset[15]))->org);
1399
1400
transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
1401
ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1402
1403
/* load and transpose: tfar, mask, id, flags */
1404
const vfloat4 c0 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 0]))->tfar);
1405
const vfloat4 c1 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 1]))->tfar);
1406
const vfloat4 c2 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 2]))->tfar);
1407
const vfloat4 c3 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 3]))->tfar);
1408
const vfloat4 c4 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 4]))->tfar);
1409
const vfloat4 c5 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 5]))->tfar);
1410
const vfloat4 c6 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 6]))->tfar);
1411
const vfloat4 c7 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 7]))->tfar);
1412
const vfloat4 c8 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 8]))->tfar);
1413
const vfloat4 c9 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[ 9]))->tfar);
1414
const vfloat4 c10 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[10]))->tfar);
1415
const vfloat4 c11 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[11]))->tfar);
1416
const vfloat4 c12 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[12]))->tfar);
1417
const vfloat4 c13 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[13]))->tfar);
1418
const vfloat4 c14 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[14]))->tfar);
1419
const vfloat4 c15 = vfloat4::loadu(&((Ray*)((char*)ptr + offset[15]))->tfar);
1420
1421
vfloat16 maskf, idf, flagsf;
1422
transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
1423
ray.tfar, maskf, idf, flagsf);
1424
ray.mask = asInt(maskf);
1425
ray.id = asInt(idf);
1426
ray.flags = asInt(flagsf);
1427
1428
return ray;
1429
}
1430
#endif
1431
1432
1433
struct RayStreamAOP
1434
{
1435
__forceinline RayStreamAOP(void* rays)
1436
: ptr((Ray**)rays) {}
1437
1438
__forceinline Ray& getRayByIndex(size_t index)
1439
{
1440
return *ptr[index];
1441
}
1442
1443
template<int K>
1444
__forceinline RayK<K> getRayByIndex(const vint<K>& index);
1445
1446
template<int K>
1447
__forceinline RayK<K> getRayByIndex(const vbool<K>& valid, const vint<K>& index)
1448
{
1449
const vint<K> valid_index = select(valid, index, vintx(zero));
1450
return getRayByIndex<K>(valid_index);
1451
}
1452
1453
template<int K>
1454
__forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayHitK<K>& ray)
1455
{
1456
vbool<K> valid = valid_i;
1457
valid &= (ray.geomID != RTC_INVALID_GEOMETRY_ID);
1458
1459
if (likely(any(valid)))
1460
{
1461
size_t valid_bits = movemask(valid);
1462
while (valid_bits != 0)
1463
{
1464
const size_t k = bscf(valid_bits);
1465
RayHit* __restrict__ ray_k = (RayHit*)ptr[index[k]];
1466
1467
ray_k->tfar = ray.tfar[k];
1468
ray_k->Ng.x = ray.Ng.x[k];
1469
ray_k->Ng.y = ray.Ng.y[k];
1470
ray_k->Ng.z = ray.Ng.z[k];
1471
ray_k->u = ray.u[k];
1472
ray_k->v = ray.v[k];
1473
ray_k->primID = ray.primID[k];
1474
ray_k->geomID = ray.geomID[k];
1475
instance_id_stack::copy_VU<K>(ray.instID, ray_k->instID, k);
1476
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
1477
instance_id_stack::copy_VU<K>(ray.instPrimID, ray_k->instPrimID, k);
1478
#endif
1479
}
1480
}
1481
}
1482
1483
template<int K>
1484
__forceinline void setHitByIndex(const vbool<K>& valid_i, const vint<K>& index, const RayK<K>& ray)
1485
{
1486
vbool<K> valid = valid_i;
1487
valid &= (ray.tfar < 0.0f);
1488
1489
if (likely(any(valid)))
1490
{
1491
size_t valid_bits = movemask(valid);
1492
while (valid_bits != 0)
1493
{
1494
const size_t k = bscf(valid_bits);
1495
Ray* __restrict__ ray_k = ptr[index[k]];
1496
1497
ray_k->tfar = ray.tfar[k];
1498
}
1499
}
1500
}
1501
1502
Ray** __restrict__ ptr;
1503
};
1504
1505
template<>
1506
__forceinline Ray4 RayStreamAOP::getRayByIndex<4>(const vint4& index)
1507
{
1508
Ray4 ray;
1509
1510
/* load and transpose: org.x, org.y, org.z, tnear */
1511
const vfloat4 a0 = vfloat4::loadu(&ptr[index[0]]->org);
1512
const vfloat4 a1 = vfloat4::loadu(&ptr[index[1]]->org);
1513
const vfloat4 a2 = vfloat4::loadu(&ptr[index[2]]->org);
1514
const vfloat4 a3 = vfloat4::loadu(&ptr[index[3]]->org);
1515
1516
transpose(a0,a1,a2,a3, ray.org.x, ray.org.y, ray.org.z, ray.tnear());
1517
1518
/* load and transpose: dir.x, dir.y, dir.z, time */
1519
const vfloat4 b0 = vfloat4::loadu(&ptr[index[0]]->dir);
1520
const vfloat4 b1 = vfloat4::loadu(&ptr[index[1]]->dir);
1521
const vfloat4 b2 = vfloat4::loadu(&ptr[index[2]]->dir);
1522
const vfloat4 b3 = vfloat4::loadu(&ptr[index[3]]->dir);
1523
1524
transpose(b0,b1,b2,b3, ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1525
1526
/* load and transpose: tfar, mask, id, flags */
1527
const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1528
const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1529
const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1530
const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1531
1532
vfloat4 maskf, idf, flagsf;
1533
transpose(c0,c1,c2,c3, ray.tfar, maskf, idf, flagsf);
1534
ray.mask = asInt(maskf);
1535
ray.id = asInt(idf);
1536
ray.flags = asInt(flagsf);
1537
1538
return ray;
1539
}
1540
1541
#if defined(__AVX__)
1542
template<>
1543
__forceinline Ray8 RayStreamAOP::getRayByIndex<8>(const vint8& index)
1544
{
1545
Ray8 ray;
1546
1547
/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1548
const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
1549
const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
1550
const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
1551
const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
1552
const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
1553
const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
1554
const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
1555
const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
1556
1557
transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7, ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1558
1559
/* load and transpose: tfar, mask, id, flags */
1560
const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1561
const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1562
const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1563
const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1564
const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
1565
const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
1566
const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
1567
const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
1568
1569
vfloat8 maskf, idf, flagsf;
1570
transpose(c0,c1,c2,c3,c4,c5,c6,c7, ray.tfar, maskf, idf, flagsf);
1571
ray.mask = asInt(maskf);
1572
ray.id = asInt(idf);
1573
ray.flags = asInt(flagsf);
1574
1575
return ray;
1576
}
1577
#endif
1578
1579
#if defined(__AVX512F__)
1580
template<>
1581
__forceinline Ray16 RayStreamAOP::getRayByIndex<16>(const vint16& index)
1582
{
1583
Ray16 ray;
1584
1585
/* load and transpose: org.x, org.y, org.z, tnear, dir.x, dir.y, dir.z, time */
1586
const vfloat8 ab0 = vfloat8::loadu(&ptr[index[0]]->org);
1587
const vfloat8 ab1 = vfloat8::loadu(&ptr[index[1]]->org);
1588
const vfloat8 ab2 = vfloat8::loadu(&ptr[index[2]]->org);
1589
const vfloat8 ab3 = vfloat8::loadu(&ptr[index[3]]->org);
1590
const vfloat8 ab4 = vfloat8::loadu(&ptr[index[4]]->org);
1591
const vfloat8 ab5 = vfloat8::loadu(&ptr[index[5]]->org);
1592
const vfloat8 ab6 = vfloat8::loadu(&ptr[index[6]]->org);
1593
const vfloat8 ab7 = vfloat8::loadu(&ptr[index[7]]->org);
1594
const vfloat8 ab8 = vfloat8::loadu(&ptr[index[8]]->org);
1595
const vfloat8 ab9 = vfloat8::loadu(&ptr[index[9]]->org);
1596
const vfloat8 ab10 = vfloat8::loadu(&ptr[index[10]]->org);
1597
const vfloat8 ab11 = vfloat8::loadu(&ptr[index[11]]->org);
1598
const vfloat8 ab12 = vfloat8::loadu(&ptr[index[12]]->org);
1599
const vfloat8 ab13 = vfloat8::loadu(&ptr[index[13]]->org);
1600
const vfloat8 ab14 = vfloat8::loadu(&ptr[index[14]]->org);
1601
const vfloat8 ab15 = vfloat8::loadu(&ptr[index[15]]->org);
1602
1603
transpose(ab0,ab1,ab2,ab3,ab4,ab5,ab6,ab7,ab8,ab9,ab10,ab11,ab12,ab13,ab14,ab15,
1604
ray.org.x, ray.org.y, ray.org.z, ray.tnear(), ray.dir.x, ray.dir.y, ray.dir.z, ray.time());
1605
1606
/* load and transpose: tfar, mask, id, flags */
1607
const vfloat4 c0 = vfloat4::loadu(&ptr[index[0]]->tfar);
1608
const vfloat4 c1 = vfloat4::loadu(&ptr[index[1]]->tfar);
1609
const vfloat4 c2 = vfloat4::loadu(&ptr[index[2]]->tfar);
1610
const vfloat4 c3 = vfloat4::loadu(&ptr[index[3]]->tfar);
1611
const vfloat4 c4 = vfloat4::loadu(&ptr[index[4]]->tfar);
1612
const vfloat4 c5 = vfloat4::loadu(&ptr[index[5]]->tfar);
1613
const vfloat4 c6 = vfloat4::loadu(&ptr[index[6]]->tfar);
1614
const vfloat4 c7 = vfloat4::loadu(&ptr[index[7]]->tfar);
1615
const vfloat4 c8 = vfloat4::loadu(&ptr[index[8]]->tfar);
1616
const vfloat4 c9 = vfloat4::loadu(&ptr[index[9]]->tfar);
1617
const vfloat4 c10 = vfloat4::loadu(&ptr[index[10]]->tfar);
1618
const vfloat4 c11 = vfloat4::loadu(&ptr[index[11]]->tfar);
1619
const vfloat4 c12 = vfloat4::loadu(&ptr[index[12]]->tfar);
1620
const vfloat4 c13 = vfloat4::loadu(&ptr[index[13]]->tfar);
1621
const vfloat4 c14 = vfloat4::loadu(&ptr[index[14]]->tfar);
1622
const vfloat4 c15 = vfloat4::loadu(&ptr[index[15]]->tfar);
1623
1624
vfloat16 maskf, idf, flagsf;
1625
transpose(c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15,
1626
ray.tfar, maskf, idf, flagsf);
1627
1628
ray.mask = asInt(maskf);
1629
ray.id = asInt(idf);
1630
ray.flags = asInt(flagsf);
1631
1632
return ray;
1633
}
1634
#endif
1635
}
1636
1637