Path: blob/master/thirdparty/embree/kernels/bvh/bvh_intersector_hybrid.cpp
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#include "bvh_intersector_hybrid.h"4#include "bvh_traverser1.h"5#include "node_intersector1.h"6#include "node_intersector_packet.h"78#include "../geometry/intersector_iterators.h"9#include "../geometry/triangle_intersector.h"10#include "../geometry/trianglev_intersector.h"11#include "../geometry/trianglev_mb_intersector.h"12#include "../geometry/trianglei_intersector.h"13#include "../geometry/quadv_intersector.h"14#include "../geometry/quadi_intersector.h"15#include "../geometry/curveNv_intersector.h"16#include "../geometry/curveNi_intersector.h"17#include "../geometry/curveNi_mb_intersector.h"18#include "../geometry/linei_intersector.h"19#include "../geometry/subdivpatch1_intersector.h"20#include "../geometry/object_intersector.h"21#include "../geometry/instance_intersector.h"22#include "../geometry/instance_array_intersector.h"23#include "../geometry/subgrid_intersector.h"24#include "../geometry/subgrid_mb_intersector.h"25#include "../geometry/curve_intersector_virtual.h"2627#define SWITCH_DURING_DOWN_TRAVERSAL 128#define FORCE_SINGLE_MODE 02930#define ENABLE_FAST_COHERENT_CODEPATHS 13132namespace embree33{34namespace isa35{36template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>37void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect1(Accel::Intersectors* This,38const BVH* bvh,39NodeRef root,40size_t k,41Precalculations& pre,42RayHitK<K>& ray,43const TravRayK<K, robust>& tray,44RayQueryContext* context)45{46/* stack state */47StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes48StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer49StackItemT<NodeRef>* stackEnd = stack + stackSizeSingle;50stack[0].ptr = root;51stack[0].dist = neg_inf;5253/* load the ray into SIMD registers */54TravRay<N,robust> tray1;55tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);5657/* pop loop */58while (true) pop:59{60/* pop next node */61if (unlikely(stackPtr == stack)) break;62stackPtr--;63NodeRef cur = NodeRef(stackPtr->ptr);6465/* if popped node is too far, pop next one */66if (unlikely(*(float*)&stackPtr->dist > ray.tfar[k]))67continue;6869/* downtraversal loop */70while (true)71{72/* intersect node */73size_t mask; vfloat<N> tNear;74STAT3(normal.trav_nodes, 1, 1, 1);75bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);76if (unlikely(!nodeIntersected)) { STAT3(normal.trav_nodes,-1,-1,-1); break; }7778/* if no child is hit, pop next node */79if (unlikely(mask == 0))80goto pop;8182/* select next child and push other children */83BVHNNodeTraverser1Hit<N, types>::traverseClosestHit(cur, mask, tNear, stackPtr, stackEnd);84}8586/* this is a leaf node */87assert(cur != BVH::emptyNode);88STAT3(normal.trav_leaves, 1, 1, 1);89size_t num; Primitive* prim = (Primitive*)cur.leaf(num);9091size_t lazy_node = 0;92PrimitiveIntersectorK::intersect(This, pre, ray, k, context, prim, num, tray1, lazy_node);9394tray1.tfar = ray.tfar[k];9596if (unlikely(lazy_node)) {97stackPtr->ptr = lazy_node;98stackPtr->dist = neg_inf;99stackPtr++;100}101}102}103104template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>105void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersect(vint<K>* __restrict__ valid_i,106Accel::Intersectors* __restrict__ This,107RayHitK<K>& __restrict__ ray,108RayQueryContext* __restrict__ context)109{110BVH* __restrict__ bvh = (BVH*)This->ptr;111112/* we may traverse an empty BVH in case all geometry was invalid */113if (bvh->root == BVH::emptyNode)114return;115116#if ENABLE_FAST_COHERENT_CODEPATHS == 1117assert(context);118if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))119{120intersectCoherent(valid_i, This, ray, context);121return;122}123#endif124125/* filter out invalid rays */126vbool<K> valid = *valid_i == -1;127#if defined(EMBREE_IGNORE_INVALID_RAYS)128valid &= ray.valid();129#endif130131/* return if there are no valid rays */132size_t valid_bits = movemask(valid);133134#if defined(__AVX__)135STAT3(normal.trav_hit_boxes[popcnt(movemask(valid))], 1, 1, 1);136#endif137138if (unlikely(valid_bits == 0)) return;139140/* verify correct input */141assert(all(valid, ray.valid()));142assert(all(valid, ray.tnear() >= 0.0f));143assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));144Precalculations pre(valid, ray);145146/* load ray */147TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);148const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);149const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);150151if (single)152{153tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));154tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));155156for (; valid_bits!=0; ) {157const size_t i = bscf(valid_bits);158intersect1(This, bvh, bvh->root, i, pre, ray, tray, context);159}160return;161}162163/* determine switch threshold based on flags */164const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;165166vint<K> octant = ray.octant();167octant = select(valid, octant, vint<K>(0xffffffff));168169/* test whether we have ray with opposing direction signs in the packet */170bool split = false;171{172size_t bits = valid_bits;173vbool<K> vsplit( false );174do175{176const size_t valid_index = bsf(bits);177vbool<K> octant_valid = octant[valid_index] == octant;178bits &= ~(size_t)movemask(octant_valid);179vsplit |= vint<K>(octant[valid_index]) == (octant^vint<K>(0x7));180} while (bits);181if (any(vsplit)) split = true;182}183184do185{186const size_t valid_index = bsf(valid_bits);187const vint<K> diff_octant = vint<K>(octant[valid_index])^octant;188const vint<K> count_diff_octant = \189((diff_octant >> 2) & 1) +190((diff_octant >> 1) & 1) +191((diff_octant >> 0) & 1);192193vbool<K> octant_valid = (count_diff_octant <= 1) & (octant != vint<K>(0xffffffff));194if (!single || !split) octant_valid = valid; // deactivate octant sorting in pure chunk mode, otherwise instance traversal performance goes down195196197octant = select(octant_valid,vint<K>(0xffffffff),octant);198valid_bits &= ~(size_t)movemask(octant_valid);199200tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));201tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));202203/* allocate stack and push root node */204vfloat<K> stack_near[stackSizeChunk];205NodeRef stack_node[stackSizeChunk];206stack_node[0] = BVH::invalidNode;207stack_near[0] = inf;208stack_node[1] = bvh->root;209stack_near[1] = tray.tnear;210NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;211NodeRef* __restrict__ sptr_node = stack_node + 2;212vfloat<K>* __restrict__ sptr_near = stack_near + 2;213214while (1) pop:215{216/* pop next node from stack */217assert(sptr_node > stack_node);218sptr_node--;219sptr_near--;220NodeRef cur = *sptr_node;221if (unlikely(cur == BVH::invalidNode)) {222assert(sptr_node == stack_node);223break;224}225226/* cull node if behind closest hit point */227vfloat<K> curDist = *sptr_near;228const vbool<K> active = curDist < tray.tfar;229if (unlikely(none(active)))230continue;231232/* switch to single ray traversal */233#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))234#if FORCE_SINGLE_MODE == 0235if (single)236#endif237{238size_t bits = movemask(active);239#if FORCE_SINGLE_MODE == 0240if (unlikely(popcnt(bits) <= switchThreshold))241#endif242{243for (; bits!=0; ) {244const size_t i = bscf(bits);245intersect1(This, bvh, cur, i, pre, ray, tray, context);246}247tray.tfar = min(tray.tfar, ray.tfar);248continue;249}250}251#endif252while (likely(!cur.isLeaf()))253{254/* process nodes */255const vbool<K> valid_node = tray.tfar > curDist;256STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);257const NodeRef nodeRef = cur;258const BaseNode* __restrict__ const node = nodeRef.baseNode();259260/* set cur to invalid */261cur = BVH::emptyNode;262curDist = pos_inf;263264size_t num_child_hits = 0;265266for (unsigned i = 0; i < N; i++)267{268const NodeRef child = node->children[i];269if (unlikely(child == BVH::emptyNode)) break;270vfloat<K> lnearP;271vbool<K> lhit = valid_node;272BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);273274/* if we hit the child we choose to continue with that child if it275is closer than the current next child, or we push it onto the stack */276if (likely(any(lhit)))277{278assert(sptr_node < stackEnd);279assert(child != BVH::emptyNode);280const vfloat<K> childDist = select(lhit, lnearP, inf);281/* push cur node onto stack and continue with hit child */282if (any(childDist < curDist))283{284if (likely(cur != BVH::emptyNode)) {285num_child_hits++;286*sptr_node = cur; sptr_node++;287*sptr_near = curDist; sptr_near++;288}289curDist = childDist;290cur = child;291}292293/* push hit child onto stack */294else {295num_child_hits++;296*sptr_node = child; sptr_node++;297*sptr_near = childDist; sptr_near++;298}299}300}301302#if defined(__AVX__)303//STAT3(normal.trav_hit_boxes[num_child_hits], 1, 1, 1);304#endif305306if (unlikely(cur == BVH::emptyNode))307goto pop;308309/* improved distance sorting for 3 or more hits */310if (unlikely(num_child_hits >= 2))311{312if (any(sptr_near[-2] < sptr_near[-1]))313{314std::swap(sptr_near[-2],sptr_near[-1]);315std::swap(sptr_node[-2],sptr_node[-1]);316}317if (unlikely(num_child_hits >= 3))318{319if (any(sptr_near[-3] < sptr_near[-1]))320{321std::swap(sptr_near[-3],sptr_near[-1]);322std::swap(sptr_node[-3],sptr_node[-1]);323}324if (any(sptr_near[-3] < sptr_near[-2]))325{326std::swap(sptr_near[-3],sptr_near[-2]);327std::swap(sptr_node[-3],sptr_node[-2]);328}329}330}331332#if SWITCH_DURING_DOWN_TRAVERSAL == 1333if (single)334{335// seems to be the best place for testing utilization336if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))337{338*sptr_node++ = cur;339*sptr_near++ = curDist;340goto pop;341}342}343#endif344}345346/* return if stack is empty */347if (unlikely(cur == BVH::invalidNode)) {348assert(sptr_node == stack_node);349break;350}351352/* intersect leaf */353assert(cur != BVH::emptyNode);354const vbool<K> valid_leaf = tray.tfar > curDist;355STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);356if (unlikely(none(valid_leaf))) continue;357size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);358359size_t lazy_node = 0;360PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);361tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);362363if (unlikely(lazy_node)) {364*sptr_node = lazy_node; sptr_node++;365*sptr_near = neg_inf; sptr_near++;366}367}368} while(valid_bits);369}370371372template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>373void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::intersectCoherent(vint<K>* __restrict__ valid_i,374Accel::Intersectors* __restrict__ This,375RayHitK<K>& __restrict__ ray,376RayQueryContext* context)377{378BVH* __restrict__ bvh = (BVH*)This->ptr;379380/* filter out invalid rays */381vbool<K> valid = *valid_i == -1;382#if defined(EMBREE_IGNORE_INVALID_RAYS)383valid &= ray.valid();384#endif385386/* return if there are no valid rays */387size_t valid_bits = movemask(valid);388if (unlikely(valid_bits == 0)) return;389390/* verify correct input */391assert(all(valid, ray.valid()));392assert(all(valid, ray.tnear() >= 0.0f));393assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));394Precalculations pre(valid, ray);395396/* load ray */397TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);398const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);399const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);400401vint<K> octant = ray.octant();402octant = select(valid, octant, vint<K>(0xffffffff));403404do405{406const size_t valid_index = bsf(valid_bits);407const vbool<K> octant_valid = octant[valid_index] == octant;408valid_bits &= ~(size_t)movemask(octant_valid);409410tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));411tray.tfar = select(octant_valid, org_ray_tfar , vfloat<K>(neg_inf));412413Frustum<robust> frustum;414frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);415416StackItemT<NodeRef> stack[stackSizeSingle]; // stack of nodes417StackItemT<NodeRef>* stackPtr = stack + 1; // current stack pointer418stack[0].ptr = bvh->root;419stack[0].dist = neg_inf;420421while (1) pop:422{423/* pop next node from stack */424if (unlikely(stackPtr == stack)) break;425426stackPtr--;427NodeRef cur = NodeRef(stackPtr->ptr);428429/* cull node if behind closest hit point */430vfloat<K> curDist = *(float*)&stackPtr->dist;431const vbool<K> active = curDist < tray.tfar;432if (unlikely(none(active))) continue;433434while (likely(!cur.isLeaf()))435{436/* process nodes */437//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);438const NodeRef nodeRef = cur;439const AABBNode* __restrict__ const node = nodeRef.getAABBNode();440441vfloat<N> fmin;442size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);443444if (unlikely(!m_frustum_node)) goto pop;445cur = BVH::emptyNode;446curDist = pos_inf;447448#if defined(__AVX__)449//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);450#endif451size_t num_child_hits = 0;452do {453const size_t i = bscf(m_frustum_node);454vfloat<K> lnearP;455vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored456STAT3(normal.trav_nodes, 1, 1, 1);457BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);458459if (likely(any(lhit)))460{461const vfloat<K> childDist = fmin[i];462const NodeRef child = node->child(i);463BVHN<N>::prefetch(child);464if (any(childDist < curDist))465{466if (likely(cur != BVH::emptyNode)) {467num_child_hits++;468stackPtr->ptr = cur;469*(float*)&stackPtr->dist = toScalar(curDist);470stackPtr++;471}472curDist = childDist;473cur = child;474}475/* push hit child onto stack */476else {477num_child_hits++;478stackPtr->ptr = child;479*(float*)&stackPtr->dist = toScalar(childDist);480stackPtr++;481}482}483} while(m_frustum_node);484485if (unlikely(cur == BVH::emptyNode)) goto pop;486487/* improved distance sorting for 3 or more hits */488if (unlikely(num_child_hits >= 2))489{490if (stackPtr[-2].dist < stackPtr[-1].dist)491std::swap(stackPtr[-2],stackPtr[-1]);492if (unlikely(num_child_hits >= 3))493{494if (stackPtr[-3].dist < stackPtr[-1].dist)495std::swap(stackPtr[-3],stackPtr[-1]);496if (stackPtr[-3].dist < stackPtr[-2].dist)497std::swap(stackPtr[-3],stackPtr[-2]);498}499}500}501502/* intersect leaf */503assert(cur != BVH::invalidNode);504assert(cur != BVH::emptyNode);505const vbool<K> valid_leaf = tray.tfar > curDist;506STAT3(normal.trav_leaves, 1, popcnt(valid_leaf), K);507if (unlikely(none(valid_leaf))) continue;508size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);509510size_t lazy_node = 0;511PrimitiveIntersectorK::intersect(valid_leaf, This, pre, ray, context, prim, items, tray, lazy_node);512513/* reduce max distance interval on successful intersection */514if (likely(any((ray.tfar < tray.tfar) & valid_leaf)))515{516tray.tfar = select(valid_leaf, ray.tfar, tray.tfar);517frustum.template updateMaxDist<K>(tray.tfar);518}519520if (unlikely(lazy_node)) {521stackPtr->ptr = lazy_node;522stackPtr->dist = neg_inf;523stackPtr++;524}525}526527} while(valid_bits);528}529530// ===================================================================================================================================================================531// ===================================================================================================================================================================532// ===================================================================================================================================================================533534template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>535bool BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded1(Accel::Intersectors* This,536const BVH* bvh,537NodeRef root,538size_t k,539Precalculations& pre,540RayK<K>& ray,541const TravRayK<K, robust>& tray,542RayQueryContext* context)543{544/* stack state */545NodeRef stack[stackSizeSingle]; // stack of nodes that still need to get traversed546NodeRef* stackPtr = stack+1; // current stack pointer547NodeRef* stackEnd = stack+stackSizeSingle;548stack[0] = root;549550/* load the ray into SIMD registers */551TravRay<N,robust> tray1;552tray1.template init<K>(k, tray.org, tray.dir, tray.rdir, tray.nearXYZ, tray.tnear[k], tray.tfar[k]);553554/* pop loop */555while (true) pop:556{557/* pop next node */558if (unlikely(stackPtr == stack)) break;559stackPtr--;560NodeRef cur = (NodeRef)*stackPtr;561562/* downtraversal loop */563while (true)564{565/* intersect node */566size_t mask; vfloat<N> tNear;567STAT3(shadow.trav_nodes, 1, 1, 1);568bool nodeIntersected = BVHNNodeIntersector1<N, types, robust>::intersect(cur, tray1, ray.time()[k], tNear, mask);569if (unlikely(!nodeIntersected)) { STAT3(shadow.trav_nodes,-1,-1,-1); break; }570571/* if no child is hit, pop next node */572if (unlikely(mask == 0))573goto pop;574575/* select next child and push other children */576BVHNNodeTraverser1Hit<N, types>::traverseAnyHit(cur, mask, tNear, stackPtr, stackEnd);577}578579/* this is a leaf node */580assert(cur != BVH::emptyNode);581STAT3(shadow.trav_leaves, 1, 1, 1);582size_t num; Primitive* prim = (Primitive*)cur.leaf(num);583584size_t lazy_node = 0;585if (PrimitiveIntersectorK::occluded(This, pre, ray, k, context, prim, num, tray1, lazy_node)) {586ray.tfar[k] = neg_inf;587return true;588}589590if (unlikely(lazy_node)) {591*stackPtr = lazy_node;592stackPtr++;593}594}595return false;596}597598template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>599void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occluded(vint<K>* __restrict__ valid_i,600Accel::Intersectors* __restrict__ This,601RayK<K>& __restrict__ ray,602RayQueryContext* context)603{604BVH* __restrict__ bvh = (BVH*)This->ptr;605606/* we may traverse an empty BVH in case all geometry was invalid */607if (bvh->root == BVH::emptyNode)608return;609610#if ENABLE_FAST_COHERENT_CODEPATHS == 1611assert(context);612if (unlikely(types == BVH_AN1 && context->user && context->isCoherent()))613{614occludedCoherent(valid_i, This, ray, context);615return;616}617#endif618619/* filter out already occluded and invalid rays */620vbool<K> valid = (*valid_i == -1) & (ray.tfar >= 0.0f);621#if defined(EMBREE_IGNORE_INVALID_RAYS)622valid &= ray.valid();623#endif624625/* return if there are no valid rays */626const size_t valid_bits = movemask(valid);627if (unlikely(valid_bits == 0)) return;628629/* verify correct input */630assert(all(valid, ray.valid()));631assert(all(valid, ray.tnear() >= 0.0f));632assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));633Precalculations pre(valid, ray);634635/* load ray */636TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);637const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);638const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);639640tray.tnear = select(valid, org_ray_tnear, vfloat<K>(pos_inf));641tray.tfar = select(valid, org_ray_tfar , vfloat<K>(neg_inf));642643vbool<K> terminated = !valid;644const vfloat<K> inf = vfloat<K>(pos_inf);645646/* determine switch threshold based on flags */647const size_t switchThreshold = (context->user && context->isCoherent()) ? 2 : switchThresholdIncoherent;648649/* allocate stack and push root node */650vfloat<K> stack_near[stackSizeChunk];651NodeRef stack_node[stackSizeChunk];652stack_node[0] = BVH::invalidNode;653stack_near[0] = inf;654stack_node[1] = bvh->root;655stack_near[1] = tray.tnear;656NodeRef* stackEnd MAYBE_UNUSED = stack_node+stackSizeChunk;657NodeRef* __restrict__ sptr_node = stack_node + 2;658vfloat<K>* __restrict__ sptr_near = stack_near + 2;659660while (1) pop:661{662/* pop next node from stack */663assert(sptr_node > stack_node);664sptr_node--;665sptr_near--;666NodeRef cur = *sptr_node;667if (unlikely(cur == BVH::invalidNode)) {668assert(sptr_node == stack_node);669break;670}671672/* cull node if behind closest hit point */673vfloat<K> curDist = *sptr_near;674const vbool<K> active = curDist < tray.tfar;675if (unlikely(none(active)))676continue;677678/* switch to single ray traversal */679#if (!defined(__WIN32__) || defined(__X86_64__)) && ((defined(__aarch64__)) || defined(__SSE4_2__))680#if FORCE_SINGLE_MODE == 0681if (single)682#endif683{684size_t bits = movemask(active);685#if FORCE_SINGLE_MODE == 0686if (unlikely(popcnt(bits) <= switchThreshold))687#endif688{689for (; bits!=0; ) {690const size_t i = bscf(bits);691if (occluded1(This, bvh, cur, i, pre, ray, tray, context))692set(terminated, i);693}694if (all(terminated)) break;695tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar);696continue;697}698}699#endif700701while (likely(!cur.isLeaf()))702{703/* process nodes */704const vbool<K> valid_node = tray.tfar > curDist;705STAT3(shadow.trav_nodes, 1, popcnt(valid_node), K);706const NodeRef nodeRef = cur;707const BaseNode* __restrict__ const node = nodeRef.baseNode();708709/* set cur to invalid */710cur = BVH::emptyNode;711curDist = pos_inf;712713for (unsigned i = 0; i < N; i++)714{715const NodeRef child = node->children[i];716if (unlikely(child == BVH::emptyNode)) break;717vfloat<K> lnearP;718vbool<K> lhit = valid_node;719BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);720721/* if we hit the child we push the previously hit node onto the stack, and continue with the currently hit child */722if (likely(any(lhit)))723{724assert(sptr_node < stackEnd);725assert(child != BVH::emptyNode);726const vfloat<K> childDist = select(lhit, lnearP, inf);727728/* push 'cur' node onto stack and continue with hit child */729if (likely(cur != BVH::emptyNode)) {730*sptr_node = cur; sptr_node++;731*sptr_near = curDist; sptr_near++;732}733curDist = childDist;734cur = child;735}736}737if (unlikely(cur == BVH::emptyNode))738goto pop;739740#if SWITCH_DURING_DOWN_TRAVERSAL == 1741if (single)742{743// seems to be the best place for testing utilization744if (unlikely(popcnt(tray.tfar > curDist) <= switchThreshold))745{746*sptr_node++ = cur;747*sptr_near++ = curDist;748goto pop;749}750}751#endif752}753754/* return if stack is empty */755if (unlikely(cur == BVH::invalidNode)) {756assert(sptr_node == stack_node);757break;758}759760761/* intersect leaf */762assert(cur != BVH::emptyNode);763const vbool<K> valid_leaf = tray.tfar > curDist;764STAT3(shadow.trav_leaves, 1, popcnt(valid_leaf), K);765if (unlikely(none(valid_leaf))) continue;766size_t items; const Primitive* prim = (Primitive*) cur.leaf(items);767768size_t lazy_node = 0;769terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);770if (all(terminated)) break;771tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays772773if (unlikely(lazy_node)) {774*sptr_node = lazy_node; sptr_node++;775*sptr_near = neg_inf; sptr_near++;776}777}778779vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);780}781782783template<int N, int K, int types, bool robust, typename PrimitiveIntersectorK, bool single>784void BVHNIntersectorKHybrid<N, K, types, robust, PrimitiveIntersectorK, single>::occludedCoherent(vint<K>* __restrict__ valid_i,785Accel::Intersectors* __restrict__ This,786RayK<K>& __restrict__ ray,787RayQueryContext* context)788{789BVH* __restrict__ bvh = (BVH*)This->ptr;790791/* filter out invalid rays */792vbool<K> valid = *valid_i == -1;793#if defined(EMBREE_IGNORE_INVALID_RAYS)794valid &= ray.valid();795#endif796797/* return if there are no valid rays */798size_t valid_bits = movemask(valid);799if (unlikely(valid_bits == 0)) return;800801/* verify correct input */802assert(all(valid, ray.valid()));803assert(all(valid, ray.tnear() >= 0.0f));804assert(!(types & BVH_MB) || all(valid, (ray.time() >= 0.0f) & (ray.time() <= 1.0f)));805Precalculations pre(valid,ray);806807/* load ray */808TravRayK<K, robust> tray(ray.org, ray.dir, single ? N : 0);809const vfloat<K> org_ray_tnear = max(ray.tnear(), 0.0f);810const vfloat<K> org_ray_tfar = max(ray.tfar , 0.0f);811812vbool<K> terminated = !valid;813814vint<K> octant = ray.octant();815octant = select(valid, octant, vint<K>(0xffffffff));816817do818{819const size_t valid_index = bsf(valid_bits);820vbool<K> octant_valid = octant[valid_index] == octant;821valid_bits &= ~(size_t)movemask(octant_valid);822823tray.tnear = select(octant_valid, org_ray_tnear, vfloat<K>(pos_inf));824tray.tfar = select(octant_valid, org_ray_tfar, vfloat<K>(neg_inf));825826Frustum<robust> frustum;827frustum.template init<K>(octant_valid, tray.org, tray.rdir, tray.tnear, tray.tfar, N);828829StackItemMaskT<NodeRef> stack[stackSizeSingle]; // stack of nodes830StackItemMaskT<NodeRef>* stackPtr = stack + 1; // current stack pointer831stack[0].ptr = bvh->root;832stack[0].mask = movemask(octant_valid);833834while (1) pop:835{836/* pop next node from stack */837if (unlikely(stackPtr == stack)) break;838839stackPtr--;840NodeRef cur = NodeRef(stackPtr->ptr);841842/* cull node of active rays have already been terminated */843size_t m_active = (size_t)stackPtr->mask & (~(size_t)movemask(terminated));844845if (unlikely(m_active == 0)) continue;846847while (likely(!cur.isLeaf()))848{849/* process nodes */850//STAT3(normal.trav_nodes, 1, popcnt(valid_node), K);851const NodeRef nodeRef = cur;852const AABBNode* __restrict__ const node = nodeRef.getAABBNode();853854vfloat<N> fmin;855size_t m_frustum_node = intersectNodeFrustum<N>(node, frustum, fmin);856857if (unlikely(!m_frustum_node)) goto pop;858cur = BVH::emptyNode;859m_active = 0;860861#if defined(__AVX__)862//STAT3(normal.trav_hit_boxes[popcnt(m_frustum_node)], 1, 1, 1);863#endif864//size_t num_child_hits = 0;865do {866const size_t i = bscf(m_frustum_node);867vfloat<K> lnearP;868vbool<K> lhit = false; // motion blur is not supported, so the initial value will be ignored869STAT3(normal.trav_nodes, 1, 1, 1);870BVHNNodeIntersectorK<N, K, types, robust>::intersect(nodeRef, i, tray, ray.time(), lnearP, lhit);871872if (likely(any(lhit)))873{874const NodeRef child = node->child(i);875assert(child != BVH::emptyNode);876BVHN<N>::prefetch(child);877if (likely(cur != BVH::emptyNode)) {878//num_child_hits++;879stackPtr->ptr = cur;880stackPtr->mask = m_active;881stackPtr++;882}883cur = child;884m_active = movemask(lhit);885}886} while(m_frustum_node);887888if (unlikely(cur == BVH::emptyNode)) goto pop;889}890891/* intersect leaf */892assert(cur != BVH::invalidNode);893assert(cur != BVH::emptyNode);894#if defined(__AVX__)895STAT3(normal.trav_leaves, 1, popcnt(m_active), K);896#endif897if (unlikely(!m_active)) continue;898size_t items; const Primitive* prim = (Primitive*)cur.leaf(items);899900size_t lazy_node = 0;901terminated |= PrimitiveIntersectorK::occluded(!terminated, This, pre, ray, context, prim, items, tray, lazy_node);902octant_valid &= !terminated;903if (unlikely(none(octant_valid))) break;904tray.tfar = select(terminated, vfloat<K>(neg_inf), tray.tfar); // ignore node intersections for terminated rays905906if (unlikely(lazy_node)) {907stackPtr->ptr = lazy_node;908stackPtr->mask = movemask(octant_valid);909stackPtr++;910}911}912} while(valid_bits);913914vfloat<K>::store(valid & terminated, &ray.tfar, neg_inf);915}916}917}918919920