Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#include "bvh_intersector_hybrid.h"
5
#include "bvh_traverser1.h"
6
#include "node_intersector1.h"
7
#include "node_intersector_packet.h"
8
9
#include "../geometry/intersector_iterators.h"
10
#include "../geometry/triangle_intersector.h"
11
#include "../geometry/trianglev_intersector.h"
12
#include "../geometry/trianglev_mb_intersector.h"
13
#include "../geometry/trianglei_intersector.h"
14
#include "../geometry/quadv_intersector.h"
15
#include "../geometry/quadi_intersector.h"
16
#include "../geometry/curveNv_intersector.h"
17
#include "../geometry/curveNi_intersector.h"
18
#include "../geometry/curveNi_mb_intersector.h"
19
#include "../geometry/linei_intersector.h"
20
#include "../geometry/subdivpatch1_intersector.h"
21
#include "../geometry/object_intersector.h"
22
#include "../geometry/instance_intersector.h"
23
#include "../geometry/instance_array_intersector.h"
24
#include "../geometry/subgrid_intersector.h"
25
#include "../geometry/subgrid_mb_intersector.h"
26
#include "../geometry/curve_intersector_virtual.h"
27
28
#define SWITCH_DURING_DOWN_TRAVERSAL 1
29
#define FORCE_SINGLE_MODE 0
30
31
#define ENABLE_FAST_COHERENT_CODEPATHS 1
32
33
namespace embree
34
{
35
namespace isa
36
{
37
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
38
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,
39
const BVH* bvh,
40
NodeRef root,
41
size_t k,
42
Precalculations& pre,
43
RayHitK<K>& ray,
44
const TravRayK<K, robust>& tray,
45
RayQueryContext* context)
46
{
47
/* stack state */
48
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
49
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
50
StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;
51
stack[0].ptr = root;
52
stack[0].dist = neg_inf;
53
54
/* load the ray into SIMD registers */
55
TravRay<N,robust> tray1;
56
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
57
58
/* pop loop */
59
while (true) pop:
60
{
61
/* pop next node */
62
if (unlikely(stackPtr == stack)) break;
63
stackPtr--;
64
NodeRef cur = NodeRef(stackPtr->ptr);
65
66
/* if popped node is too far, pop next one */
67
if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))
68
continue;
69
70
/* downtraversal loop */
71
while (true)
72
{
73
/* intersect node */
74
size_t mask; vfloat<N> tNear;
75
STAT3(normal.trav_nodes, 1, 1, 1);
76
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
77
if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }
78
79
/* if no child is hit, pop next node */
80
if (unlikely(mask == 0))
81
goto pop;
82
83
/* select next child and push other children */
84
BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);
85
}
86
87
/* this is a leaf node */
88
assert(cur != BVH::emptyNode);
89
STAT3(normal.trav_leaves, 1, 1, 1);
90
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
91
92
size_t lazy_node = 0;
93
PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);
94
95
tray1.tfar = ray.tfar[k];
96
97
if (unlikely(lazy_node)) {
98
stackPtr->ptr = lazy_node;
99
stackPtr->dist = neg_inf;
100
stackPtr++;
101
}
102
}
103
}
104
105
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
106
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,
107
Accel::Intersectors* __restrict__ This,
108
RayHitK<K>& __restrict__ ray,
109
RayQueryContext* __restrict__ context)
110
{
111
BVH* __restrict__ bvh = (BVH*)This->ptr;
112
113
/* we may traverse an empty BVH in case all geometry was invalid */
114
if (bvh->root == BVH::emptyNode)
115
return;
116
117
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
118
assert(context);
119
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
120
{
121
intersectCoherent(valid_i, This, ray, context);
122
return;
123
}
124
#endif
125
126
/* filter out invalid rays */
127
vbool<K> valid = *valid_i == -1;
128
#if defined(EMBREE_IGNORE_INVALID_RAYS)
129
valid &= ray.valid();
130
#endif
131
132
/* return if there are no valid rays */
133
size_t valid_bits = movemask(valid);
134
135
#if defined(__AVX__)
136
STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);
137
#endif
138
139
if (unlikely(valid_bits == 0)) return;
140
141
/* verify correct input */
142
assert(all(valid, ray.valid()));
143
assert(all(valid, ray.tnear() >= 0.0f));
144
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
145
Precalculations pre(valid, ray);
146
147
/* load ray */
148
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
149
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
150
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
151
152
if (single)
153
{
154
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
155
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
156
157
for (; valid_bits!=0; ) {
158
const size_t i = bscf(valid_bits);
159
intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);
160
}
161
return;
162
}
163
164
/* determine switch threshold based on flags */
165
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
166
167
vint<K> octant = ray.octant();
168
octant = select(valid, octant, vint<K>(0xffffffff));
169
170
/* test whether we have ray with opposing direction signs in the packet */
171
bool split = false;
172
{
173
size_t bits = valid_bits;
174
vbool<K> vsplit( false );
175
do
176
{
177
const size_t valid_index = bsf(bits);
178
vbool<K> octant_valid = octant[valid_index] == octant;
179
bits &= ~(size_t)movemask(octant_valid);
180
vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));
181
} while (bits);
182
if (any(vsplit)) split = true;
183
}
184
185
do
186
{
187
const size_t valid_index = bsf(valid_bits);
188
const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;
189
const vint<K> count_diff_octant = \
190
((diff_octant >> 2) & 1) +
191
((diff_octant >> 1) & 1) +
192
((diff_octant >> 0) & 1);
193
194
vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));
195
if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down
196
197
198
octant = select(octant_valid,vint<K>(0xffffffff),octant);
199
valid_bits &= ~(size_t)movemask(octant_valid);
200
201
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
202
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
203
204
/* allocate stack and push root node */
205
vfloat<K> stack_near[stackSizeChunk];
206
NodeRef stack_node[stackSizeChunk];
207
stack_node[0] = BVH::invalidNode;
208
stack_near[0] = inf;
209
stack_node[1] = bvh->root;
210
stack_near[1] = tray.tnear;
211
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
212
NodeRef* __restrict__ sptr_node = stack_node + 2;
213
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
214
215
while (1) pop:
216
{
217
/* pop next node from stack */
218
assert(sptr_node > stack_node);
219
sptr_node--;
220
sptr_near--;
221
NodeRef cur = *sptr_node;
222
if (unlikely(cur == BVH::invalidNode)) {
223
assert(sptr_node == stack_node);
224
break;
225
}
226
227
/* cull node if behind closest hit point */
228
vfloat<K> curDist = *sptr_near;
229
const vbool<K> active = curDist < tray.tfar;
230
if (unlikely(none(active)))
231
continue;
232
233
/* switch to single ray traversal */
234
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
235
#if FORCE_SINGLE_MODE == 0
236
if (single)
237
#endif
238
{
239
size_t bits = movemask(active);
240
#if FORCE_SINGLE_MODE == 0
241
if (unlikely(popcnt(bits) <= switchThreshold))
242
#endif
243
{
244
for (; bits!=0; ) {
245
const size_t i = bscf(bits);
246
intersect1(This, bvh, cur, i, pre, ray, tray, context);
247
}
248
tray.tfar = min(tray.tfar, ray.tfar);
249
continue;
250
}
251
}
252
#endif
253
while (likely(!cur.isLeaf()))
254
{
255
/* process nodes */
256
const vbool<K> valid_node = tray.tfar > curDist;
257
STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
258
const NodeRef nodeRef = cur;
259
const BaseNode* __restrict__ const node = nodeRef.baseNode();
260
261
/* set cur to invalid */
262
cur = BVH::emptyNode;
263
curDist = pos_inf;
264
265
size_t num_child_hits = 0;
266
267
for (unsigned i = 0; i < N; i++)
268
{
269
const NodeRef child = node->children[i];
270
if (unlikely(child == BVH::emptyNode)) break;
271
vfloat<K> lnearP;
272
vbool<K> lhit = valid_node;
273
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
274
275
/* if we hit the child we choose to continue with that child if it
276
is closer than the current next child, or we push it onto the stack */
277
if (likely(any(lhit)))
278
{
279
assert(sptr_node < stackEnd);
280
assert(child != BVH::emptyNode);
281
const vfloat<K> childDist = select(lhit, lnearP, inf);
282
/* push cur node onto stack and continue with hit child */
283
if (any(childDist < curDist))
284
{
285
if (likely(cur != BVH::emptyNode)) {
286
num_child_hits++;
287
*sptr_node = cur; sptr_node++;
288
*sptr_near = curDist; sptr_near++;
289
}
290
curDist = childDist;
291
cur = child;
292
}
293
294
/* push hit child onto stack */
295
else {
296
num_child_hits++;
297
*sptr_node = child; sptr_node++;
298
*sptr_near = childDist; sptr_near++;
299
}
300
}
301
}
302
303
#if defined(__AVX__)
304
//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);
305
#endif
306
307
if (unlikely(cur == BVH::emptyNode))
308
goto pop;
309
310
/* improved distance sorting for 3 or more hits */
311
if (unlikely(num_child_hits >= 2))
312
{
313
if (any(sptr_near[-2] < sptr_near[-1]))
314
{
315
std::swap(sptr_near[-2],sptr_near[-1]);
316
std::swap(sptr_node[-2],sptr_node[-1]);
317
}
318
if (unlikely(num_child_hits >= 3))
319
{
320
if (any(sptr_near[-3] < sptr_near[-1]))
321
{
322
std::swap(sptr_near[-3],sptr_near[-1]);
323
std::swap(sptr_node[-3],sptr_node[-1]);
324
}
325
if (any(sptr_near[-3] < sptr_near[-2]))
326
{
327
std::swap(sptr_near[-3],sptr_near[-2]);
328
std::swap(sptr_node[-3],sptr_node[-2]);
329
}
330
}
331
}
332
333
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
334
if (single)
335
{
336
// seems to be the best place for testing utilization
337
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
338
{
339
*sptr_node++ = cur;
340
*sptr_near++ = curDist;
341
goto pop;
342
}
343
}
344
#endif
345
}
346
347
/* return if stack is empty */
348
if (unlikely(cur == BVH::invalidNode)) {
349
assert(sptr_node == stack_node);
350
break;
351
}
352
353
/* intersect leaf */
354
assert(cur != BVH::emptyNode);
355
const vbool<K> valid_leaf = tray.tfar > curDist;
356
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
357
if (unlikely(none(valid_leaf))) continue;
358
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
359
360
size_t lazy_node = 0;
361
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
362
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
363
364
if (unlikely(lazy_node)) {
365
*sptr_node = lazy_node; sptr_node++;
366
*sptr_near = neg_inf; sptr_near++;
367
}
368
}
369
} while(valid_bits);
370
}
371
372
373
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
374
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,
375
Accel::Intersectors* __restrict__ This,
376
RayHitK<K>& __restrict__ ray,
377
RayQueryContext* context)
378
{
379
BVH* __restrict__ bvh = (BVH*)This->ptr;
380
381
/* filter out invalid rays */
382
vbool<K> valid = *valid_i == -1;
383
#if defined(EMBREE_IGNORE_INVALID_RAYS)
384
valid &= ray.valid();
385
#endif
386
387
/* return if there are no valid rays */
388
size_t valid_bits = movemask(valid);
389
if (unlikely(valid_bits == 0)) return;
390
391
/* verify correct input */
392
assert(all(valid, ray.valid()));
393
assert(all(valid, ray.tnear() >= 0.0f));
394
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
395
Precalculations pre(valid, ray);
396
397
/* load ray */
398
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
399
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
400
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
401
402
vint<K> octant = ray.octant();
403
octant = select(valid, octant, vint<K>(0xffffffff));
404
405
do
406
{
407
const size_t valid_index = bsf(valid_bits);
408
const vbool<K> octant_valid = octant[valid_index] == octant;
409
valid_bits &= ~(size_t)movemask(octant_valid);
410
411
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
412
tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));
413
414
Frustum<robust> frustum;
415
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
416
417
StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes
418
StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer
419
stack[0].ptr = bvh->root;
420
stack[0].dist = neg_inf;
421
422
while (1) pop:
423
{
424
/* pop next node from stack */
425
if (unlikely(stackPtr == stack)) break;
426
427
stackPtr--;
428
NodeRef cur = NodeRef(stackPtr->ptr);
429
430
/* cull node if behind closest hit point */
431
vfloat<K> curDist = *(float*)&stackPtr->dist;
432
const vbool<K> active = curDist < tray.tfar;
433
if (unlikely(none(active))) continue;
434
435
while (likely(!cur.isLeaf()))
436
{
437
/* process nodes */
438
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
439
const NodeRef nodeRef = cur;
440
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
441
442
vfloat<N> fmin;
443
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
444
445
if (unlikely(!m_frustum_node)) goto pop;
446
cur = BVH::emptyNode;
447
curDist = pos_inf;
448
449
#if defined(__AVX__)
450
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
451
#endif
452
size_t num_child_hits = 0;
453
do {
454
const size_t i = bscf(m_frustum_node);
455
vfloat<K> lnearP;
456
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
457
STAT3(normal.trav_nodes, 1, 1, 1);
458
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
459
460
if (likely(any(lhit)))
461
{
462
const vfloat<K> childDist = fmin[i];
463
const NodeRef child = node->child(i);
464
BVHN<N>::prefetch(child);
465
if (any(childDist < curDist))
466
{
467
if (likely(cur != BVH::emptyNode)) {
468
num_child_hits++;
469
stackPtr->ptr = cur;
470
*(float*)&stackPtr->dist = toScalar(curDist);
471
stackPtr++;
472
}
473
curDist = childDist;
474
cur = child;
475
}
476
/* push hit child onto stack */
477
else {
478
num_child_hits++;
479
stackPtr->ptr = child;
480
*(float*)&stackPtr->dist = toScalar(childDist);
481
stackPtr++;
482
}
483
}
484
} while(m_frustum_node);
485
486
if (unlikely(cur == BVH::emptyNode)) goto pop;
487
488
/* improved distance sorting for 3 or more hits */
489
if (unlikely(num_child_hits >= 2))
490
{
491
if (stackPtr[-2].dist < stackPtr[-1].dist)
492
std::swap(stackPtr[-2],stackPtr[-1]);
493
if (unlikely(num_child_hits >= 3))
494
{
495
if (stackPtr[-3].dist < stackPtr[-1].dist)
496
std::swap(stackPtr[-3],stackPtr[-1]);
497
if (stackPtr[-3].dist < stackPtr[-2].dist)
498
std::swap(stackPtr[-3],stackPtr[-2]);
499
}
500
}
501
}
502
503
/* intersect leaf */
504
assert(cur != BVH::invalidNode);
505
assert(cur != BVH::emptyNode);
506
const vbool<K> valid_leaf = tray.tfar > curDist;
507
STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);
508
if (unlikely(none(valid_leaf))) continue;
509
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
510
511
size_t lazy_node = 0;
512
PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);
513
514
/* reduce max distance interval on successful intersection */
515
if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))
516
{
517
tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);
518
frustum.template updateMaxDist<K>(tray.tfar);
519
}
520
521
if (unlikely(lazy_node)) {
522
stackPtr->ptr = lazy_node;
523
stackPtr->dist = neg_inf;
524
stackPtr++;
525
}
526
}
527
528
} while(valid_bits);
529
}
530
531
// ===================================================================================================================================================================
532
// ===================================================================================================================================================================
533
// ===================================================================================================================================================================
534
535
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
536
bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,
537
const BVH* bvh,
538
NodeRef root,
539
size_t k,
540
Precalculations& pre,
541
RayK<K>& ray,
542
const TravRayK<K, robust>& tray,
543
RayQueryContext* context)
544
{
545
/* stack state */
546
NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed
547
NodeRef* stackPtr = stack+1; // current stack pointer
548
NodeRef* stackEnd = stack+stackSizeSingle;
549
stack[0] = root;
550
551
/* load the ray into SIMD registers */
552
TravRay<N,robust> tray1;
553
tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);
554
555
/* pop loop */
556
while (true) pop:
557
{
558
/* pop next node */
559
if (unlikely(stackPtr == stack)) break;
560
stackPtr--;
561
NodeRef cur = (NodeRef)*stackPtr;
562
563
/* downtraversal loop */
564
while (true)
565
{
566
/* intersect node */
567
size_t mask; vfloat<N> tNear;
568
STAT3(shadow.trav_nodes, 1, 1, 1);
569
bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);
570
if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }
571
572
/* if no child is hit, pop next node */
573
if (unlikely(mask == 0))
574
goto pop;
575
576
/* select next child and push other children */
577
BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);
578
}
579
580
/* this is a leaf node */
581
assert(cur != BVH::emptyNode);
582
STAT3(shadow.trav_leaves, 1, 1, 1);
583
size_t num; Primitive* prim = (Primitive*)cur.leaf(num);
584
585
size_t lazy_node = 0;
586
if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {
587
ray.tfar[k] = neg_inf;
588
return true;
589
}
590
591
if (unlikely(lazy_node)) {
592
*stackPtr = lazy_node;
593
stackPtr++;
594
}
595
}
596
return false;
597
}
598
599
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
600
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,
601
Accel::Intersectors* __restrict__ This,
602
RayK<K>& __restrict__ ray,
603
RayQueryContext* context)
604
{
605
BVH* __restrict__ bvh = (BVH*)This->ptr;
606
607
/* we may traverse an empty BVH in case all geometry was invalid */
608
if (bvh->root == BVH::emptyNode)
609
return;
610
611
#if ENABLE_FAST_COHERENT_CODEPATHS == 1
612
assert(context);
613
if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))
614
{
615
occludedCoherent(valid_i, This, ray, context);
616
return;
617
}
618
#endif
619
620
/* filter out already occluded and invalid rays */
621
vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);
622
#if defined(EMBREE_IGNORE_INVALID_RAYS)
623
valid &= ray.valid();
624
#endif
625
626
/* return if there are no valid rays */
627
const size_t valid_bits = movemask(valid);
628
if (unlikely(valid_bits == 0)) return;
629
630
/* verify correct input */
631
assert(all(valid, ray.valid()));
632
assert(all(valid, ray.tnear() >= 0.0f));
633
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
634
Precalculations pre(valid, ray);
635
636
/* load ray */
637
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
638
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
639
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
640
641
tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));
642
tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));
643
644
vbool<K> terminated = !valid;
645
const vfloat<K> inf = vfloat<K>(pos_inf);
646
647
/* determine switch threshold based on flags */
648
const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;
649
650
/* allocate stack and push root node */
651
vfloat<K> stack_near[stackSizeChunk];
652
NodeRef stack_node[stackSizeChunk];
653
stack_node[0] = BVH::invalidNode;
654
stack_near[0] = inf;
655
stack_node[1] = bvh->root;
656
stack_near[1] = tray.tnear;
657
NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;
658
NodeRef* __restrict__ sptr_node = stack_node + 2;
659
vfloat<K>* __restrict__ sptr_near = stack_near + 2;
660
661
while (1) pop:
662
{
663
/* pop next node from stack */
664
assert(sptr_node > stack_node);
665
sptr_node--;
666
sptr_near--;
667
NodeRef cur = *sptr_node;
668
if (unlikely(cur == BVH::invalidNode)) {
669
assert(sptr_node == stack_node);
670
break;
671
}
672
673
/* cull node if behind closest hit point */
674
vfloat<K> curDist = *sptr_near;
675
const vbool<K> active = curDist < tray.tfar;
676
if (unlikely(none(active)))
677
continue;
678
679
/* switch to single ray traversal */
680
#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))
681
#if FORCE_SINGLE_MODE == 0
682
if (single)
683
#endif
684
{
685
size_t bits = movemask(active);
686
#if FORCE_SINGLE_MODE == 0
687
if (unlikely(popcnt(bits) <= switchThreshold))
688
#endif
689
{
690
for (; bits!=0; ) {
691
const size_t i = bscf(bits);
692
if (occluded1(This, bvh, cur, i, pre, ray, tray, context))
693
set(terminated, i);
694
}
695
if (all(terminated)) break;
696
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);
697
continue;
698
}
699
}
700
#endif
701
702
while (likely(!cur.isLeaf()))
703
{
704
/* process nodes */
705
const vbool<K> valid_node = tray.tfar > curDist;
706
STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);
707
const NodeRef nodeRef = cur;
708
const BaseNode* __restrict__ const node = nodeRef.baseNode();
709
710
/* set cur to invalid */
711
cur = BVH::emptyNode;
712
curDist = pos_inf;
713
714
for (unsigned i = 0; i < N; i++)
715
{
716
const NodeRef child = node->children[i];
717
if (unlikely(child == BVH::emptyNode)) break;
718
vfloat<K> lnearP;
719
vbool<K> lhit = valid_node;
720
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
721
722
/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */
723
if (likely(any(lhit)))
724
{
725
assert(sptr_node < stackEnd);
726
assert(child != BVH::emptyNode);
727
const vfloat<K> childDist = select(lhit, lnearP, inf);
728
729
/* push 'cur' node onto stack and continue with hit child */
730
if (likely(cur != BVH::emptyNode)) {
731
*sptr_node = cur; sptr_node++;
732
*sptr_near = curDist; sptr_near++;
733
}
734
curDist = childDist;
735
cur = child;
736
}
737
}
738
if (unlikely(cur == BVH::emptyNode))
739
goto pop;
740
741
#if SWITCH_DURING_DOWN_TRAVERSAL == 1
742
if (single)
743
{
744
// seems to be the best place for testing utilization
745
if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))
746
{
747
*sptr_node++ = cur;
748
*sptr_near++ = curDist;
749
goto pop;
750
}
751
}
752
#endif
753
}
754
755
/* return if stack is empty */
756
if (unlikely(cur == BVH::invalidNode)) {
757
assert(sptr_node == stack_node);
758
break;
759
}
760
761
762
/* intersect leaf */
763
assert(cur != BVH::emptyNode);
764
const vbool<K> valid_leaf = tray.tfar > curDist;
765
STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);
766
if (unlikely(none(valid_leaf))) continue;
767
size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);
768
769
size_t lazy_node = 0;
770
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
771
if (all(terminated)) break;
772
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
773
774
if (unlikely(lazy_node)) {
775
*sptr_node = lazy_node; sptr_node++;
776
*sptr_near = neg_inf; sptr_near++;
777
}
778
}
779
780
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
781
}
782
783
784
template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>
785
void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,
786
Accel::Intersectors* __restrict__ This,
787
RayK<K>& __restrict__ ray,
788
RayQueryContext* context)
789
{
790
BVH* __restrict__ bvh = (BVH*)This->ptr;
791
792
/* filter out invalid rays */
793
vbool<K> valid = *valid_i == -1;
794
#if defined(EMBREE_IGNORE_INVALID_RAYS)
795
valid &= ray.valid();
796
#endif
797
798
/* return if there are no valid rays */
799
size_t valid_bits = movemask(valid);
800
if (unlikely(valid_bits == 0)) return;
801
802
/* verify correct input */
803
assert(all(valid, ray.valid()));
804
assert(all(valid, ray.tnear() >= 0.0f));
805
assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));
806
Precalculations pre(valid,ray);
807
808
/* load ray */
809
TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);
810
const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);
811
const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);
812
813
vbool<K> terminated = !valid;
814
815
vint<K> octant = ray.octant();
816
octant = select(valid, octant, vint<K>(0xffffffff));
817
818
do
819
{
820
const size_t valid_index = bsf(valid_bits);
821
vbool<K> octant_valid = octant[valid_index] == octant;
822
valid_bits &= ~(size_t)movemask(octant_valid);
823
824
tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));
825
tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));
826
827
Frustum<robust> frustum;
828
frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);
829
830
StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes
831
StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer
832
stack[0].ptr = bvh->root;
833
stack[0].mask = movemask(octant_valid);
834
835
while (1) pop:
836
{
837
/* pop next node from stack */
838
if (unlikely(stackPtr == stack)) break;
839
840
stackPtr--;
841
NodeRef cur = NodeRef(stackPtr->ptr);
842
843
/* cull node of active rays have already been terminated */
844
size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));
845
846
if (unlikely(m_active == 0)) continue;
847
848
while (likely(!cur.isLeaf()))
849
{
850
/* process nodes */
851
//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);
852
const NodeRef nodeRef = cur;
853
const AABBNode* __restrict__ const node = nodeRef.getAABBNode();
854
855
vfloat<N> fmin;
856
size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);
857
858
if (unlikely(!m_frustum_node)) goto pop;
859
cur = BVH::emptyNode;
860
m_active = 0;
861
862
#if defined(__AVX__)
863
//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);
864
#endif
865
//size_t num_child_hits = 0;
866
do {
867
const size_t i = bscf(m_frustum_node);
868
vfloat<K> lnearP;
869
vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored
870
STAT3(normal.trav_nodes, 1, 1, 1);
871
BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);
872
873
if (likely(any(lhit)))
874
{
875
const NodeRef child = node->child(i);
876
assert(child != BVH::emptyNode);
877
BVHN<N>::prefetch(child);
878
if (likely(cur != BVH::emptyNode)) {
879
//num_child_hits++;
880
stackPtr->ptr = cur;
881
stackPtr->mask = m_active;
882
stackPtr++;
883
}
884
cur = child;
885
m_active = movemask(lhit);
886
}
887
} while(m_frustum_node);
888
889
if (unlikely(cur == BVH::emptyNode)) goto pop;
890
}
891
892
/* intersect leaf */
893
assert(cur != BVH::invalidNode);
894
assert(cur != BVH::emptyNode);
895
#if defined(__AVX__)
896
STAT3(normal.trav_leaves, 1, popcnt(m_active), K);
897
#endif
898
if (unlikely(!m_active)) continue;
899
size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);
900
901
size_t lazy_node = 0;
902
terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);
903
octant_valid &= !terminated;
904
if (unlikely(none(octant_valid))) break;
905
tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays
906
907
if (unlikely(lazy_node)) {
908
stackPtr->ptr = lazy_node;
909
stackPtr->mask = movemask(octant_valid);
910
stackPtr++;
911
}
912
}
913
} while(valid_bits);
914
915
vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);
916
}
917
}
918
}
919
920