Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/bvh/bvh_traverser1.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "bvh.h"
7
#include "node_intersector1.h"
8
#include "../common/stack_item.h"
9
10
#define NEW_SORTING_CODE 1
11
12
namespace embree
13
{
14
namespace isa
15
{
16
/*! BVH regular node traversal for single rays. */
17
template<int N, int types>
18
class BVHNNodeTraverser1Hit;
19
20
#if defined(__AVX512VL__) // SKX
21
22
template<int N>
23
__forceinline void isort_update(vint<N> &dist, const vint<N> &d)
24
{
25
const vint<N> dist_shift = align_shift_right<N-1>(dist,dist);
26
const vboolf<N> m_geq = d >= dist;
27
const vboolf<N> m_geq_shift = m_geq << 1;
28
dist = select(m_geq,d,dist);
29
dist = select(m_geq_shift,dist_shift,dist);
30
}
31
32
template<int N>
33
__forceinline void isort_quick_update(vint<N> &dist, const vint<N> &d) {
34
dist = align_shift_right<N-1>(dist,permute(d,vint<N>(zero)));
35
}
36
37
__forceinline size_t permuteExtract(const vint8& index, const vllong4& n0, const vllong4& n1) {
38
return toScalar(permutex2var((__m256i)index,n0,n1));
39
}
40
41
__forceinline float permuteExtract(const vint8& index, const vfloat8& n) {
42
return toScalar(permute(n,index));
43
}
44
45
#endif
46
47
/* Specialization for BVH4. */
48
template<int types>
49
class BVHNNodeTraverser1Hit<4, types>
50
{
51
typedef BVH4 BVH;
52
typedef BVH4::NodeRef NodeRef;
53
typedef BVH4::BaseNode BaseNode;
54
55
56
public:
57
/* Traverses a node with at least one hit child. Optimized for finding the closest hit (intersection). */
58
static __forceinline void traverseClosestHit(NodeRef& cur,
59
size_t mask,
60
const vfloat4& tNear,
61
StackItemT<NodeRef>*& stackPtr,
62
StackItemT<NodeRef>* stackEnd)
63
{
64
assert(mask != 0);
65
const BaseNode* node = cur.baseNode();
66
67
/*! one child is hit, continue with that child */
68
size_t r = bscf(mask);
69
cur = node->child(r);
70
BVH::prefetch(cur,types);
71
if (likely(mask == 0)) {
72
assert(cur != BVH::emptyNode);
73
return;
74
}
75
76
/*! two children are hit, push far child, and continue with closer child */
77
NodeRef c0 = cur;
78
const unsigned int d0 = ((unsigned int*)&tNear)[r];
79
r = bscf(mask);
80
NodeRef c1 = node->child(r);
81
BVH::prefetch(c1,types);
82
const unsigned int d1 = ((unsigned int*)&tNear)[r];
83
assert(c0 != BVH::emptyNode);
84
assert(c1 != BVH::emptyNode);
85
if (likely(mask == 0)) {
86
assert(stackPtr < stackEnd);
87
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
88
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
89
}
90
91
#if NEW_SORTING_CODE == 1
92
vint4 s0((size_t)c0,(size_t)d0);
93
vint4 s1((size_t)c1,(size_t)d1);
94
r = bscf(mask);
95
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
96
vint4 s2((size_t)c2,(size_t)d2);
97
/* 3 hits */
98
if (likely(mask == 0)) {
99
StackItemT<NodeRef>::sort3(s0,s1,s2);
100
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
101
cur = toSizeT(s2);
102
stackPtr+=2;
103
return;
104
}
105
r = bscf(mask);
106
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
107
vint4 s3((size_t)c3,(size_t)d3);
108
/* 4 hits */
109
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
110
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
111
cur = toSizeT(s3);
112
stackPtr+=3;
113
#else
114
/*! Here starts the slow path for 3 or 4 hit children. We push
115
* all nodes onto the stack to sort them there. */
116
assert(stackPtr < stackEnd);
117
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
118
assert(stackPtr < stackEnd);
119
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
120
121
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
122
assert(stackPtr < stackEnd);
123
r = bscf(mask);
124
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
125
assert(c != BVH::emptyNode);
126
if (likely(mask == 0)) {
127
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
128
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
129
return;
130
}
131
132
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
133
assert(stackPtr < stackEnd);
134
r = bscf(mask);
135
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
136
assert(c != BVH::emptyNode);
137
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
138
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
139
#endif
140
}
141
142
/* Traverses a node with at least one hit child. Optimized for finding any hit (occlusion). */
143
static __forceinline void traverseAnyHit(NodeRef& cur,
144
size_t mask,
145
const vfloat4& tNear,
146
NodeRef*& stackPtr,
147
NodeRef* stackEnd)
148
{
149
const BaseNode* node = cur.baseNode();
150
151
/*! one child is hit, continue with that child */
152
size_t r = bscf(mask);
153
cur = node->child(r);
154
BVH::prefetch(cur,types);
155
156
/* simpler in sequence traversal order */
157
assert(cur != BVH::emptyNode);
158
if (likely(mask == 0)) return;
159
assert(stackPtr < stackEnd);
160
*stackPtr = cur; stackPtr++;
161
162
for (; ;)
163
{
164
r = bscf(mask);
165
cur = node->child(r); BVH::prefetch(cur,types);
166
assert(cur != BVH::emptyNode);
167
if (likely(mask == 0)) return;
168
assert(stackPtr < stackEnd);
169
*stackPtr = cur; stackPtr++;
170
}
171
}
172
};
173
174
/* Specialization for BVH8. */
175
template<int types>
176
class BVHNNodeTraverser1Hit<8, types>
177
{
178
typedef BVH8 BVH;
179
typedef BVH8::NodeRef NodeRef;
180
typedef BVH8::BaseNode BaseNode;
181
182
#if defined(__AVX512VL__)
183
template<class NodeRef, class BaseNode>
184
static __forceinline void traverseClosestHitAVX512VL8(NodeRef& cur,
185
size_t mask,
186
const vfloat8& tNear,
187
StackItemT<NodeRef>*& stackPtr,
188
StackItemT<NodeRef>* stackEnd)
189
{
190
assert(mask != 0);
191
const BaseNode* node = cur.baseNode();
192
const vllong4 n0 = vllong4::loadu((vllong4*)&node->children[0]);
193
const vllong4 n1 = vllong4::loadu((vllong4*)&node->children[4]);
194
vint8 distance_i = (asInt(tNear) & 0xfffffff8) | vint8(step);
195
distance_i = vint8::compact((int)mask,distance_i,distance_i);
196
cur = permuteExtract(distance_i,n0,n1);
197
BVH::prefetch(cur,types);
198
199
mask &= mask-1;
200
if (likely(mask == 0)) return;
201
202
/* 2 hits: order A0 B0 */
203
const vint8 d0(distance_i);
204
const vint8 d1(shuffle<1>(distance_i));
205
cur = permuteExtract(d1,n0,n1);
206
BVH::prefetch(cur,types);
207
208
const vint8 dist_A0 = min(d0, d1);
209
const vint8 dist_B0 = max(d0, d1);
210
assert(dist_A0[0] < dist_B0[0]);
211
212
mask &= mask-1;
213
if (likely(mask == 0)) {
214
cur = permuteExtract(dist_A0,n0,n1);
215
stackPtr[0].ptr = permuteExtract(dist_B0,n0,n1);
216
*(float*)&stackPtr[0].dist = permuteExtract(dist_B0,tNear);
217
stackPtr++;
218
return;
219
}
220
221
/* 3 hits: order A1 B1 C1 */
222
223
const vint8 d2(shuffle<2>(distance_i));
224
cur = permuteExtract(d2,n0,n1);
225
BVH::prefetch(cur,types);
226
227
const vint8 dist_A1 = min(dist_A0,d2);
228
const vint8 dist_tmp_B1 = max(dist_A0,d2);
229
const vint8 dist_B1 = min(dist_B0,dist_tmp_B1);
230
const vint8 dist_C1 = max(dist_B0,dist_tmp_B1);
231
assert(dist_A1[0] < dist_B1[0]);
232
assert(dist_B1[0] < dist_C1[0]);
233
234
mask &= mask-1;
235
if (likely(mask == 0)) {
236
cur = permuteExtract(dist_A1,n0,n1);
237
stackPtr[0].ptr = permuteExtract(dist_C1,n0,n1);
238
*(float*)&stackPtr[0].dist = permuteExtract(dist_C1,tNear);
239
stackPtr[1].ptr = permuteExtract(dist_B1,n0,n1);
240
*(float*)&stackPtr[1].dist = permuteExtract(dist_B1,tNear);
241
stackPtr+=2;
242
return;
243
}
244
245
/* 4 hits: order A2 B2 C2 D2 */
246
247
const vint8 d3(shuffle<3>(distance_i));
248
cur = permuteExtract(d3,n0,n1);
249
BVH::prefetch(cur,types);
250
251
const vint8 dist_A2 = min(dist_A1,d3);
252
const vint8 dist_tmp_B2 = max(dist_A1,d3);
253
const vint8 dist_B2 = min(dist_B1,dist_tmp_B2);
254
const vint8 dist_tmp_C2 = max(dist_B1,dist_tmp_B2);
255
const vint8 dist_C2 = min(dist_C1,dist_tmp_C2);
256
const vint8 dist_D2 = max(dist_C1,dist_tmp_C2);
257
assert(dist_A2[0] < dist_B2[0]);
258
assert(dist_B2[0] < dist_C2[0]);
259
assert(dist_C2[0] < dist_D2[0]);
260
261
mask &= mask-1;
262
if (likely(mask == 0)) {
263
cur = permuteExtract(dist_A2,n0,n1);
264
stackPtr[0].ptr = permuteExtract(dist_D2,n0,n1);
265
*(float*)&stackPtr[0].dist = permuteExtract(dist_D2,tNear);
266
stackPtr[1].ptr = permuteExtract(dist_C2,n0,n1);
267
*(float*)&stackPtr[1].dist = permuteExtract(dist_C2,tNear);
268
stackPtr[2].ptr = permuteExtract(dist_B2,n0,n1);
269
*(float*)&stackPtr[2].dist = permuteExtract(dist_B2,tNear);
270
stackPtr+=3;
271
return;
272
}
273
274
/* >=5 hits: reverse to descending order for writing to stack */
275
276
distance_i = align_shift_right<3>(distance_i,distance_i);
277
const size_t hits = 4 + popcnt(mask);
278
vint8 dist(INT_MIN); // this will work with -0.0f (0x80000000) as distance, isort_update uses >= to insert
279
280
isort_quick_update<8>(dist,dist_A2);
281
isort_quick_update<8>(dist,dist_B2);
282
isort_quick_update<8>(dist,dist_C2);
283
isort_quick_update<8>(dist,dist_D2);
284
285
do {
286
287
distance_i = align_shift_right<1>(distance_i,distance_i);
288
cur = permuteExtract(distance_i,n0,n1);
289
BVH::prefetch(cur,types);
290
const vint8 new_dist(permute(distance_i,vint8(zero)));
291
mask &= mask-1;
292
isort_update<8>(dist,new_dist);
293
294
} while(mask);
295
296
for (size_t i=0; i<7; i++)
297
assert(dist[i+0]>=dist[i+1]);
298
299
for (size_t i=0;i<hits-1;i++)
300
{
301
stackPtr->ptr = permuteExtract(dist,n0,n1);
302
*(float*)&stackPtr->dist = permuteExtract(dist,tNear);
303
dist = align_shift_right<1>(dist,dist);
304
stackPtr++;
305
}
306
cur = permuteExtract(dist,n0,n1);
307
}
308
#endif
309
310
public:
311
static __forceinline void traverseClosestHit(NodeRef& cur,
312
size_t mask,
313
const vfloat8& tNear,
314
StackItemT<NodeRef>*& stackPtr,
315
StackItemT<NodeRef>* stackEnd)
316
{
317
assert(mask != 0);
318
#if defined(__AVX512VL__)
319
traverseClosestHitAVX512VL8<NodeRef,BaseNode>(cur,mask,tNear,stackPtr,stackEnd);
320
#else
321
322
const BaseNode* node = cur.baseNode();
323
324
/*! one child is hit, continue with that child */
325
size_t r = bscf(mask);
326
cur = node->child(r);
327
BVH::prefetch(cur,types);
328
if (likely(mask == 0)) {
329
assert(cur != BVH::emptyNode);
330
return;
331
}
332
333
/*! two children are hit, push far child, and continue with closer child */
334
NodeRef c0 = cur;
335
const unsigned int d0 = ((unsigned int*)&tNear)[r];
336
r = bscf(mask);
337
NodeRef c1 = node->child(r);
338
BVH::prefetch(c1,types);
339
const unsigned int d1 = ((unsigned int*)&tNear)[r];
340
341
assert(c0 != BVH::emptyNode);
342
assert(c1 != BVH::emptyNode);
343
if (likely(mask == 0)) {
344
assert(stackPtr < stackEnd);
345
if (d0 < d1) { stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++; cur = c0; return; }
346
else { stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++; cur = c1; return; }
347
}
348
#if NEW_SORTING_CODE == 1
349
vint4 s0((size_t)c0,(size_t)d0);
350
vint4 s1((size_t)c1,(size_t)d1);
351
352
r = bscf(mask);
353
NodeRef c2 = node->child(r); BVH::prefetch(c2,types); unsigned int d2 = ((unsigned int*)&tNear)[r];
354
vint4 s2((size_t)c2,(size_t)d2);
355
/* 3 hits */
356
if (likely(mask == 0)) {
357
StackItemT<NodeRef>::sort3(s0,s1,s2);
358
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1;
359
cur = toSizeT(s2);
360
stackPtr+=2;
361
return;
362
}
363
r = bscf(mask);
364
NodeRef c3 = node->child(r); BVH::prefetch(c3,types); unsigned int d3 = ((unsigned int*)&tNear)[r];
365
vint4 s3((size_t)c3,(size_t)d3);
366
/* 4 hits */
367
if (likely(mask == 0)) {
368
StackItemT<NodeRef>::sort4(s0,s1,s2,s3);
369
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2;
370
cur = toSizeT(s3);
371
stackPtr+=3;
372
return;
373
}
374
*(vint4*)&stackPtr[0] = s0; *(vint4*)&stackPtr[1] = s1; *(vint4*)&stackPtr[2] = s2; *(vint4*)&stackPtr[3] = s3;
375
/*! fallback case if more than 4 children are hit */
376
StackItemT<NodeRef>* stackFirst = stackPtr;
377
stackPtr+=4;
378
while (1)
379
{
380
assert(stackPtr < stackEnd);
381
r = bscf(mask);
382
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = *(unsigned int*)&tNear[r];
383
const vint4 s((size_t)c,(size_t)d);
384
*(vint4*)stackPtr++ = s;
385
assert(c != BVH::emptyNode);
386
if (unlikely(mask == 0)) break;
387
}
388
sort(stackFirst,stackPtr);
389
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
390
#else
391
/*! Here starts the slow path for 3 or 4 hit children. We push
392
* all nodes onto the stack to sort them there. */
393
assert(stackPtr < stackEnd);
394
stackPtr->ptr = c0; stackPtr->dist = d0; stackPtr++;
395
assert(stackPtr < stackEnd);
396
stackPtr->ptr = c1; stackPtr->dist = d1; stackPtr++;
397
398
/*! three children are hit, push all onto stack and sort 3 stack items, continue with closest child */
399
assert(stackPtr < stackEnd);
400
r = bscf(mask);
401
NodeRef c = node->child(r); BVH::prefetch(c,types); unsigned int d = ((unsigned int*)&tNear)[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
402
assert(c != BVH::emptyNode);
403
if (likely(mask == 0)) {
404
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3]);
405
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
406
return;
407
}
408
409
/*! four children are hit, push all onto stack and sort 4 stack items, continue with closest child */
410
assert(stackPtr < stackEnd);
411
r = bscf(mask);
412
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
413
assert(c != BVH::emptyNode);
414
if (likely(mask == 0)) {
415
sort(stackPtr[-1],stackPtr[-2],stackPtr[-3],stackPtr[-4]);
416
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
417
return;
418
}
419
/*! fallback case if more than 4 children are hit */
420
StackItemT<NodeRef>* stackFirst = stackPtr-4;
421
while (1)
422
{
423
assert(stackPtr < stackEnd);
424
r = bscf(mask);
425
c = node->child(r); BVH::prefetch(c,types); d = *(unsigned int*)&tNear[r]; stackPtr->ptr = c; stackPtr->dist = d; stackPtr++;
426
assert(c != BVH::emptyNode);
427
if (unlikely(mask == 0)) break;
428
}
429
sort(stackFirst,stackPtr);
430
cur = (NodeRef) stackPtr[-1].ptr; stackPtr--;
431
#endif
432
#endif
433
}
434
435
static __forceinline void traverseAnyHit(NodeRef& cur,
436
size_t mask,
437
const vfloat8& tNear,
438
NodeRef*& stackPtr,
439
NodeRef* stackEnd)
440
{
441
const BaseNode* node = cur.baseNode();
442
443
/*! one child is hit, continue with that child */
444
size_t r = bscf(mask);
445
cur = node->child(r);
446
BVH::prefetch(cur,types);
447
448
/* simpler in sequence traversal order */
449
assert(cur != BVH::emptyNode);
450
if (likely(mask == 0)) return;
451
assert(stackPtr < stackEnd);
452
*stackPtr = cur; stackPtr++;
453
454
for (; ;)
455
{
456
r = bscf(mask);
457
cur = node->child(r); BVH::prefetch(cur,types);
458
assert(cur != BVH::emptyNode);
459
if (likely(mask == 0)) return;
460
assert(stackPtr < stackEnd);
461
*stackPtr = cur; stackPtr++;
462
}
463
}
464
};
465
}
466
}
467
468