Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_sah.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "heuristic_binning_array_aligned.h"6#include "heuristic_spatial_array.h"7#include "heuristic_openmerge_array.h"89#define NUM_OBJECT_BINS 3210#define NUM_SPATIAL_BINS 161112namespace embree13{14namespace isa15{16MAYBE_UNUSED static const float travCost = 1.0f;17MAYBE_UNUSED static const size_t DEFAULT_SINGLE_THREAD_THRESHOLD = 1024;1819struct GeneralBVHBuilder20{21static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor22static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree of we are that many levels before the maximum tree depth232425/*! settings for SAH builder */26struct Settings27{28/*! default settings */29Settings ()30: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),31travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf) {}3233/*! initialize settings from API settings */34Settings (const RTCBuildArguments& settings)35: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7),36travCost(1.0f), intCost(1.0f), singleThreadThreshold(1024), primrefarrayalloc(inf)37{38if (RTC_BUILD_ARGUMENTS_HAS(settings,maxBranchingFactor)) branchingFactor = settings.maxBranchingFactor;39if (RTC_BUILD_ARGUMENTS_HAS(settings,maxDepth )) maxDepth = settings.maxDepth;40if (RTC_BUILD_ARGUMENTS_HAS(settings,sahBlockSize )) logBlockSize = bsr(settings.sahBlockSize);41if (RTC_BUILD_ARGUMENTS_HAS(settings,minLeafSize )) minLeafSize = settings.minLeafSize;42if (RTC_BUILD_ARGUMENTS_HAS(settings,maxLeafSize )) maxLeafSize = settings.maxLeafSize;43if (RTC_BUILD_ARGUMENTS_HAS(settings,traversalCost )) travCost = settings.traversalCost;44if (RTC_BUILD_ARGUMENTS_HAS(settings,intersectionCost )) intCost = settings.intersectionCost;4546minLeafSize = min(minLeafSize,maxLeafSize);47}4849Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold, size_t primrefarrayalloc = inf)50: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(min(minLeafSize,maxLeafSize)), maxLeafSize(maxLeafSize),51travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold), primrefarrayalloc(primrefarrayalloc)52{53}5455public:56size_t branchingFactor; //!< branching factor of BVH to build57size_t maxDepth; //!< maximum depth of BVH to build58size_t logBlockSize; //!< log2 of blocksize for SAH heuristic59size_t minLeafSize; //!< minimum size of a leaf60size_t maxLeafSize; //!< maximum size of a leaf61float travCost; //!< estimated cost of one traversal step62float intCost; //!< estimated cost of one primitive intersection63size_t singleThreadThreshold; //!< threshold when we switch to single threaded build64size_t primrefarrayalloc; //!< builder uses prim ref array to allocate nodes and leaves when a subtree of that size is finished65};6667/*! recursive state of builder */68template<typename Set, typename Split>69struct BuildRecordT70{71public:72__forceinline BuildRecordT () {}7374__forceinline BuildRecordT (size_t depth)75: depth(depth), alloc_barrier(false), prims(empty) {}7677__forceinline BuildRecordT (size_t depth, const Set& prims)78: depth(depth), alloc_barrier(false), prims(prims) {}7980__forceinline BBox3fa bounds() const { return prims.geomBounds; }8182__forceinline friend bool operator< (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() < b.prims.size(); }83__forceinline friend bool operator> (const BuildRecordT& a, const BuildRecordT& b) { return a.prims.size() > b.prims.size(); }8485__forceinline size_t size() const { return prims.size(); }8687public:88size_t depth; //!< Depth of the root of this subtree.89bool alloc_barrier; //!< barrier used to reuse primref-array blocks to allocate nodes90Set prims; //!< The list of primitives.91};9293template<typename PrimRef, typename Set>94struct DefaultCanCreateLeafFunc95{96__forceinline bool operator()(const PrimRef*, const Set&) const { return true; }97};9899template<typename PrimRef, typename Set>100struct DefaultCanCreateLeafSplitFunc101{102__forceinline void operator()(PrimRef*, const Set&, Set&, Set&) const { }103};104105template<typename BuildRecord,106typename Heuristic,107typename Set,108typename PrimRef,109typename ReductionTy,110typename Allocator,111typename CreateAllocFunc,112typename CreateNodeFunc,113typename UpdateNodeFunc,114typename CreateLeafFunc,115typename CanCreateLeafFunc,116typename CanCreateLeafSplitFunc,117typename ProgressMonitor>118119class BuilderT120{121friend struct GeneralBVHBuilder;122123BuilderT (PrimRef* prims,124Heuristic& heuristic,125const CreateAllocFunc& createAlloc,126const CreateNodeFunc& createNode,127const UpdateNodeFunc& updateNode,128const CreateLeafFunc& createLeaf,129const CanCreateLeafFunc& canCreateLeaf,130const CanCreateLeafSplitFunc& canCreateLeafSplit,131const ProgressMonitor& progressMonitor,132const Settings& settings) :133cfg(settings),134prims(prims),135heuristic(heuristic),136createAlloc(createAlloc),137createNode(createNode),138updateNode(updateNode),139createLeaf(createLeaf),140canCreateLeaf(canCreateLeaf),141canCreateLeafSplit(canCreateLeafSplit),142progressMonitor(progressMonitor)143{144if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)145throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");146}147148const ReductionTy createLargeLeaf(const BuildRecord& current, Allocator alloc)149{150/* this should never occur but is a fatal error */151if (current.depth > cfg.maxDepth)152throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");153154/* create leaf for few primitives */155if (current.prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,current.prims))156return createLeaf(prims,current.prims,alloc);157158/* fill all children by always splitting the largest one */159ReductionTy values[MAX_BRANCHING_FACTOR];160BuildRecord children[MAX_BRANCHING_FACTOR];161size_t numChildren = 1;162children[0] = current;163do {164165/* find best child with largest bounding box area */166size_t bestChild = -1;167size_t bestSize = 0;168for (size_t i=0; i<numChildren; i++)169{170/* ignore leaves as they cannot get split */171if (children[i].prims.size() <= cfg.maxLeafSize && canCreateLeaf(prims,children[i].prims))172continue;173174/* remember child with largest size */175if (children[i].prims.size() > bestSize) {176bestSize = children[i].prims.size();177bestChild = i;178}179}180if (bestChild == (size_t)-1) break;181182/*! split best child into left and right child */183BuildRecord left(current.depth+1);184BuildRecord right(current.depth+1);185if (!canCreateLeaf(prims,children[bestChild].prims)) {186canCreateLeafSplit(prims,children[bestChild].prims,left.prims,right.prims);187} else {188heuristic.splitFallback(children[bestChild].prims,left.prims,right.prims);189}190191/* add new children left and right */192children[bestChild] = children[numChildren-1];193children[numChildren-1] = left;194children[numChildren+0] = right;195numChildren++;196197} while (numChildren < cfg.branchingFactor);198199/* set barrier for primrefarrayalloc */200if (unlikely(current.size() > cfg.primrefarrayalloc))201for (size_t i=0; i<numChildren; i++)202children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;203204/* create node */205auto node = createNode(children,numChildren,alloc);206207/* recurse into each child and perform reduction */208for (size_t i=0; i<numChildren; i++)209values[i] = createLargeLeaf(children[i],alloc);210211/* perform reduction */212return updateNode(current,children,node,values,numChildren);213}214215const ReductionTy recurse(BuildRecord& current, Allocator alloc, bool toplevel)216{217/* get thread local allocator */218if (!alloc)219alloc = createAlloc();220221/* call memory monitor function to signal progress */222if (toplevel && current.size() <= cfg.singleThreadThreshold)223progressMonitor(current.size());224225/*! find best split */226auto split = heuristic.find(current.prims,cfg.logBlockSize);227228/*! compute leaf and split cost */229const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);230const float splitSAH = cfg.travCost*halfArea(current.prims.geomBounds)+cfg.intCost*split.splitSAH();231assert((current.prims.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));232233/*! create a leaf node when threshold reached or SAH tells us to stop */234if (current.prims.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.prims.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {235heuristic.deterministic_order(current.prims);236return createLargeLeaf(current,alloc);237}238239/*! perform initial split */240Set lprims,rprims;241heuristic.split(split,current.prims,lprims,rprims);242243/*! initialize child list with initial split */244ReductionTy values[MAX_BRANCHING_FACTOR];245BuildRecord children[MAX_BRANCHING_FACTOR];246children[0] = BuildRecord(current.depth+1,lprims);247children[1] = BuildRecord(current.depth+1,rprims);248size_t numChildren = 2;249250/*! split until node is full or SAH tells us to stop */251while (numChildren < cfg.branchingFactor)252{253/*! find best child to split */254float bestArea = neg_inf;255ssize_t bestChild = -1;256for (size_t i=0; i<numChildren; i++)257{258/* ignore leaves as they cannot get split */259if (children[i].prims.size() <= cfg.minLeafSize) continue;260261/* find child with largest surface area */262if (halfArea(children[i].prims.geomBounds) > bestArea) {263bestChild = i;264bestArea = halfArea(children[i].prims.geomBounds);265}266}267if (bestChild == -1) break;268269/* perform best found split */270BuildRecord& brecord = children[bestChild];271BuildRecord lrecord(current.depth+1);272BuildRecord rrecord(current.depth+1);273auto split = heuristic.find(brecord.prims,cfg.logBlockSize);274heuristic.split(split,brecord.prims,lrecord.prims,rrecord.prims);275children[bestChild ] = lrecord;276children[numChildren] = rrecord;277numChildren++;278}279280/* set barrier for primrefarrayalloc */281if (unlikely(current.size() > cfg.primrefarrayalloc))282for (size_t i=0; i<numChildren; i++)283children[i].alloc_barrier = children[i].size() <= cfg.primrefarrayalloc;284285/* sort buildrecords for faster shadow ray traversal */286std::sort(&children[0],&children[numChildren],std::greater<BuildRecord>());287288/*! create an inner node */289auto node = createNode(children,numChildren,alloc);290291/* spawn tasks */292if (current.size() > cfg.singleThreadThreshold)293{294/*! parallel_for is faster than spawning sub-tasks */295parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) { // FIXME: no range here296for (size_t i=r.begin(); i<r.end(); i++) {297values[i] = recurse(children[i],nullptr,true);298_mm_mfence(); // to allow non-temporal stores during build299}300});301302return updateNode(current,children,node,values,numChildren);303}304/* recurse into each child */305else306{307for (size_t i=0; i<numChildren; i++)308values[i] = recurse(children[i],alloc,false);309310return updateNode(current,children,node,values,numChildren);311}312}313314private:315Settings cfg;316PrimRef* prims;317Heuristic& heuristic;318const CreateAllocFunc& createAlloc;319const CreateNodeFunc& createNode;320const UpdateNodeFunc& updateNode;321const CreateLeafFunc& createLeaf;322const CanCreateLeafFunc& canCreateLeaf;323const CanCreateLeafSplitFunc& canCreateLeafSplit;324const ProgressMonitor& progressMonitor;325};326327template<328typename ReductionTy,329typename Heuristic,330typename Set,331typename PrimRef,332typename CreateAllocFunc,333typename CreateNodeFunc,334typename UpdateNodeFunc,335typename CreateLeafFunc,336typename ProgressMonitor>337338__noinline static ReductionTy build(Heuristic& heuristic,339PrimRef* prims,340const Set& set,341CreateAllocFunc createAlloc,342CreateNodeFunc createNode, UpdateNodeFunc updateNode,343const CreateLeafFunc& createLeaf,344const ProgressMonitor& progressMonitor,345const Settings& settings)346{347typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;348349typedef BuilderT<350BuildRecord,351Heuristic,352Set,353PrimRef,354ReductionTy,355decltype(createAlloc()),356CreateAllocFunc,357CreateNodeFunc,358UpdateNodeFunc,359CreateLeafFunc,360DefaultCanCreateLeafFunc<PrimRef, Set>,361DefaultCanCreateLeafSplitFunc<PrimRef, Set>,362ProgressMonitor> Builder;363364/* instantiate builder */365Builder builder(prims,366heuristic,367createAlloc,368createNode,369updateNode,370createLeaf,371DefaultCanCreateLeafFunc<PrimRef, Set>(),372DefaultCanCreateLeafSplitFunc<PrimRef, Set>(),373progressMonitor,374settings);375376/* build hierarchy */377BuildRecord record(1,set);378const ReductionTy root = builder.recurse(record,nullptr,true);379_mm_mfence(); // to allow non-temporal stores during build380return root;381}382383template<384typename ReductionTy,385typename Heuristic,386typename Set,387typename PrimRef,388typename CreateAllocFunc,389typename CreateNodeFunc,390typename UpdateNodeFunc,391typename CreateLeafFunc,392typename CanCreateLeafFunc,393typename CanCreateLeafSplitFunc,394typename ProgressMonitor>395396__noinline static ReductionTy build(Heuristic& heuristic,397PrimRef* prims,398const Set& set,399CreateAllocFunc createAlloc,400CreateNodeFunc createNode, UpdateNodeFunc updateNode,401const CreateLeafFunc& createLeaf,402const CanCreateLeafFunc& canCreateLeaf,403const CanCreateLeafSplitFunc& canCreateLeafSplit,404const ProgressMonitor& progressMonitor,405const Settings& settings)406{407typedef BuildRecordT<Set,typename Heuristic::Split> BuildRecord;408409typedef BuilderT<410BuildRecord,411Heuristic,412Set,413PrimRef,414ReductionTy,415decltype(createAlloc()),416CreateAllocFunc,417CreateNodeFunc,418UpdateNodeFunc,419CreateLeafFunc,420CanCreateLeafFunc,421CanCreateLeafSplitFunc,422ProgressMonitor> Builder;423424/* instantiate builder */425Builder builder(prims,426heuristic,427createAlloc,428createNode,429updateNode,430createLeaf,431canCreateLeaf,432canCreateLeafSplit,433progressMonitor,434settings);435436/* build hierarchy */437BuildRecord record(1,set);438const ReductionTy root = builder.recurse(record,nullptr,true);439_mm_mfence(); // to allow non-temporal stores during build440return root;441}442};443444/* SAH builder that operates on an array of BuildRecords */445struct BVHBuilderBinnedSAH446{447typedef PrimInfoRange Set;448typedef HeuristicArrayBinningSAH<PrimRef,NUM_OBJECT_BINS> Heuristic;449typedef GeneralBVHBuilder::BuildRecordT<Set,typename Heuristic::Split> BuildRecord;450typedef GeneralBVHBuilder::Settings Settings;451452/*! special builder that propagates reduction over the tree */453template<454typename ReductionTy,455typename CreateAllocFunc,456typename CreateNodeFunc,457typename UpdateNodeFunc,458typename CreateLeafFunc,459typename ProgressMonitor>460461static ReductionTy build(CreateAllocFunc createAlloc,462CreateNodeFunc createNode, UpdateNodeFunc updateNode,463const CreateLeafFunc& createLeaf,464const ProgressMonitor& progressMonitor,465PrimRef* prims, const PrimInfo& pinfo,466const Settings& settings)467{468Heuristic heuristic(prims);469return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(470heuristic,471prims,472PrimInfoRange(0,pinfo.size(),pinfo),473createAlloc,474createNode,475updateNode,476createLeaf,477progressMonitor,478settings);479}480481/*! special builder that propagates reduction over the tree */482template<483typename ReductionTy,484typename CreateAllocFunc,485typename CreateNodeFunc,486typename UpdateNodeFunc,487typename CreateLeafFunc,488typename CanCreateLeafFunc,489typename CanCreateLeafSplitFunc,490typename ProgressMonitor>491492static ReductionTy build(CreateAllocFunc createAlloc,493CreateNodeFunc createNode, UpdateNodeFunc updateNode,494const CreateLeafFunc& createLeaf,495const CanCreateLeafFunc& canCreateLeaf,496const CanCreateLeafSplitFunc& canCreateLeafSplit,497const ProgressMonitor& progressMonitor,498PrimRef* prims, const PrimInfo& pinfo,499const Settings& settings)500{501Heuristic heuristic(prims);502return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(503heuristic,504prims,505PrimInfoRange(0,pinfo.size(),pinfo),506createAlloc,507createNode,508updateNode,509createLeaf,510canCreateLeaf,511canCreateLeafSplit,512progressMonitor,513settings);514}515};516517/* Spatial SAH builder that operates on an double-buffered array of BuildRecords */518struct BVHBuilderBinnedFastSpatialSAH519{520typedef PrimInfoExtRange Set;521typedef Split2<BinSplit<NUM_OBJECT_BINS>,SpatialBinSplit<NUM_SPATIAL_BINS> > Split;522typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;523typedef GeneralBVHBuilder::Settings Settings;524525static const unsigned int GEOMID_MASK = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;526static const unsigned int SPLITS_MASK = 0xFFFFFFFF << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);527528template<typename ReductionTy, typename UserCreateLeaf>529struct CreateLeafExt530{531__forceinline CreateLeafExt (const UserCreateLeaf userCreateLeaf)532: userCreateLeaf(userCreateLeaf) {}533534// __noinline is workaround for ICC2016 compiler bug535template<typename Allocator>536__noinline ReductionTy operator() (PrimRef* prims, const range<size_t>& range, Allocator alloc) const537{538for (size_t i=range.begin(); i<range.end(); i++)539prims[i].lower.u &= GEOMID_MASK;540541return userCreateLeaf(prims,range,alloc);542}543544const UserCreateLeaf userCreateLeaf;545};546547/*! special builder that propagates reduction over the tree */548template<549typename ReductionTy,550typename CreateAllocFunc,551typename CreateNodeFunc,552typename UpdateNodeFunc,553typename CreateLeafFunc,554typename SplitPrimitiveFunc,555typename ProgressMonitor>556557static ReductionTy build(CreateAllocFunc createAlloc,558CreateNodeFunc createNode,559UpdateNodeFunc updateNode,560const CreateLeafFunc& createLeaf,561SplitPrimitiveFunc splitPrimitive,562ProgressMonitor progressMonitor,563PrimRef* prims,564const size_t extSize,565const PrimInfo& pinfo,566const Settings& settings)567{568typedef HeuristicArraySpatialSAH<SplitPrimitiveFunc,PrimRef,NUM_OBJECT_BINS,NUM_SPATIAL_BINS> Heuristic;569Heuristic heuristic(splitPrimitive,prims,pinfo);570571/* calculate total surface area */ // FIXME: this sum is not deterministic572const float A = (float) parallel_reduce(size_t(0),pinfo.size(),0.0, [&] (const range<size_t>& r) -> double {573574double A = 0.0f;575for (size_t i=r.begin(); i<r.end(); i++)576{577PrimRef& prim = prims[i];578A += area(prim.bounds());579}580return A;581},std::plus<double>());582583584/* calculate maximum number of spatial splits per primitive */585const unsigned int maxSplits = ((size_t)1 << RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)-1;586const float f = 10.0f;587588const float invA = 1.0f / A;589parallel_for( size_t(0), pinfo.size(), [&](const range<size_t>& r) {590591for (size_t i=r.begin(); i<r.end(); i++)592{593PrimRef& prim = prims[i];594assert((prim.geomID() & SPLITS_MASK) == 0);595// FIXME: is there a better general heuristic ?596const float nf = ceilf(f*pinfo.size()*area(prim.bounds()) * invA);597unsigned int n = 4+min((int)maxSplits-4, max(1, (int)(nf)));598prim.lower.u |= n << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);599}600});601602return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,PrimRef>(603heuristic,604prims,605PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),606createAlloc,607createNode,608updateNode,609CreateLeafExt<ReductionTy,CreateLeafFunc>(createLeaf),610progressMonitor,611settings);612}613};614615/* Open/Merge SAH builder that operates on an array of BuildRecords */616struct BVHBuilderBinnedOpenMergeSAH617{618static const size_t NUM_OBJECT_BINS_HQ = 32;619typedef PrimInfoExtRange Set;620typedef BinSplit<NUM_OBJECT_BINS_HQ> Split;621typedef GeneralBVHBuilder::BuildRecordT<Set,Split> BuildRecord;622typedef GeneralBVHBuilder::Settings Settings;623624/*! special builder that propagates reduction over the tree */625template<626typename ReductionTy,627typename BuildRef,628typename CreateAllocFunc,629typename CreateNodeFunc,630typename UpdateNodeFunc,631typename CreateLeafFunc,632typename NodeOpenerFunc,633typename ProgressMonitor>634635static ReductionTy build(CreateAllocFunc createAlloc,636CreateNodeFunc createNode,637UpdateNodeFunc updateNode,638const CreateLeafFunc& createLeaf,639NodeOpenerFunc nodeOpenerFunc,640ProgressMonitor progressMonitor,641BuildRef* prims,642const size_t extSize,643const PrimInfo& pinfo,644const Settings& settings)645{646typedef HeuristicArrayOpenMergeSAH<NodeOpenerFunc,BuildRef,NUM_OBJECT_BINS_HQ> Heuristic;647Heuristic heuristic(nodeOpenerFunc,prims,settings.branchingFactor);648649return GeneralBVHBuilder::build<ReductionTy,Heuristic,Set,BuildRef>(650heuristic,651prims,652PrimInfoExtRange(0,pinfo.size(),extSize,pinfo),653createAlloc,654createNode,655updateNode,656createLeaf,657progressMonitor,658settings);659}660};661}662}663664665