Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_hair.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../bvh/bvh.h"6#include "../geometry/primitive.h"7#include "../builders/bvh_builder_sah.h"8#include "../builders/heuristic_binning_array_aligned.h"9#include "../builders/heuristic_binning_array_unaligned.h"10#include "../builders/heuristic_strand_array.h"1112#define NUM_HAIR_OBJECT_BINS 321314namespace embree15{16namespace isa17{18struct BVHBuilderHair19{20/*! settings for builder */21struct Settings22{23/*! default settings */24Settings ()25: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}2627public:28size_t branchingFactor; //!< branching factor of BVH to build29size_t maxDepth; //!< maximum depth of BVH to build30size_t logBlockSize; //!< log2 of blocksize for SAH heuristic31size_t minLeafSize; //!< minimum size of a leaf32size_t maxLeafSize; //!< maximum size of a leaf33size_t finished_range_threshold; //!< finished range threshold34};3536template<typename NodeRef,37typename CreateAllocFunc,38typename CreateAABBNodeFunc,39typename SetAABBNodeFunc,40typename CreateOBBNodeFunc,41typename SetOBBNodeFunc,42typename CreateLeafFunc,43typename ProgressMonitor,44typename ReportFinishedRangeFunc>4546class BuilderT47{48ALIGNED_CLASS_(16);49friend struct BVHBuilderHair;5051typedef FastAllocator::CachedAllocator Allocator;52typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;53typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;54typedef HeuristicStrandSplit HeuristicStrandSplitSAH;5556static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor57static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth58static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build5960static const size_t travCostAligned = 1;61static const size_t travCostUnaligned = 5;62static const size_t intCost = 6;6364BuilderT (Scene* scene,65PrimRef* prims,66const CreateAllocFunc& createAlloc,67const CreateAABBNodeFunc& createAABBNode,68const SetAABBNodeFunc& setAABBNode,69const CreateOBBNodeFunc& createOBBNode,70const SetOBBNodeFunc& setOBBNode,71const CreateLeafFunc& createLeaf,72const ProgressMonitor& progressMonitor,73const ReportFinishedRangeFunc& reportFinishedRange,74const Settings settings)7576: cfg(settings),77prims(prims),78createAlloc(createAlloc),79createAABBNode(createAABBNode),80setAABBNode(setAABBNode),81createOBBNode(createOBBNode),82setOBBNode(setOBBNode),83createLeaf(createLeaf),84progressMonitor(progressMonitor),85reportFinishedRange(reportFinishedRange),86alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}8788/*! checks if all primitives are from the same geometry */89__forceinline bool sameGeometry(const PrimInfoRange& range)90{91if (range.size() == 0) return true;92unsigned int firstGeomID = prims[range.begin()].geomID();93for (size_t i=range.begin()+1; i<range.end(); i++) {94if (prims[i].geomID() != firstGeomID){95return false;96}97}98return true;99}100101/*! creates a large leaf that could be larger than supported by the BVH */102NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)103{104/* this should never occur but is a fatal error */105if (depth > cfg.maxDepth)106throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");107108/* create leaf for few primitives */109if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))110return createLeaf(prims,pinfo,alloc);111112/* fill all children by always splitting the largest one */113PrimInfoRange children[MAX_BRANCHING_FACTOR];114unsigned numChildren = 1;115children[0] = pinfo;116117do {118119/* find best child with largest bounding box area */120int bestChild = -1;121size_t bestSize = 0;122for (unsigned i=0; i<numChildren; i++)123{124/* ignore leaves as they cannot get split */125if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))126continue;127128/* remember child with largest size */129if (children[i].size() > bestSize) {130bestSize = children[i].size();131bestChild = i;132}133}134if (bestChild == -1) break;135136/*! split best child into left and right child */137__aligned(64) PrimInfoRange left, right;138if (!sameGeometry(children[bestChild])) {139alignedHeuristic.splitByGeometry(children[bestChild],left,right);140} else {141alignedHeuristic.splitFallback(children[bestChild],left,right);142}143144/* add new children left and right */145children[bestChild] = children[numChildren-1];146children[numChildren-1] = left;147children[numChildren+0] = right;148numChildren++;149150} while (numChildren < cfg.branchingFactor);151152/* create node */153auto node = createAABBNode(alloc);154155for (size_t i=0; i<numChildren; i++) {156const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);157setAABBNode(node,i,child,children[i].geomBounds);158}159160return node;161}162163/*! performs split */164__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack165{166/* variable to track the SAH of the best splitting approach */167float bestSAH = inf;168const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;169const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);170171/* try standard binning in aligned space */172float alignedObjectSAH = inf;173HeuristicBinningSAH::Split alignedObjectSplit;174if (aligned) {175alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);176alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();177bestSAH = min(alignedObjectSAH,bestSAH);178}179180/* try standard binning in unaligned space */181UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;182LinearSpace3fa uspace;183float unalignedObjectSAH = inf;184if (bestSAH > 0.7f*leafSAH) {185uspace = unalignedHeuristic.computeAlignedSpace(pinfo);186const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);187unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);188unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();189bestSAH = min(unalignedObjectSAH,bestSAH);190}191192/* try splitting into two strands */193HeuristicStrandSplitSAH::Split strandSplit;194float strandSAH = inf;195if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {196strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);197strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();198bestSAH = min(strandSAH,bestSAH);199}200201/* fallback if SAH heuristics failed */202if (unlikely(!std::isfinite(bestSAH)))203{204alignedHeuristic.deterministic_order(pinfo);205alignedHeuristic.splitFallback(pinfo,linfo,rinfo);206}207208/* perform aligned split if this is best */209else if (bestSAH == alignedObjectSAH) {210alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);211}212213/* perform unaligned split if this is best */214else if (bestSAH == unalignedObjectSAH) {215unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);216aligned = false;217}218219/* perform strand split if this is best */220else if (bestSAH == strandSAH) {221strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);222aligned = false;223}224225/* can never happen */226else227assert(false);228}229230/*! recursive build */231NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)232{233/* get thread local allocator */234if (!alloc)235alloc = createAlloc();236237/* call memory monitor function to signal progress */238if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)239progressMonitor(pinfo.size());240241PrimInfoRange children[MAX_BRANCHING_FACTOR];242243/* create leaf node */244if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {245alignedHeuristic.deterministic_order(pinfo);246return createLargeLeaf(depth,pinfo,alloc);247}248249/* fill all children by always splitting the one with the largest surface area */250size_t numChildren = 1;251children[0] = pinfo;252bool aligned = true;253254do {255256/* find best child with largest bounding box area */257ssize_t bestChild = -1;258float bestArea = neg_inf;259for (size_t i=0; i<numChildren; i++)260{261/* ignore leaves as they cannot get split */262if (children[i].size() <= cfg.minLeafSize)263continue;264265/* remember child with largest area */266if (area(children[i].geomBounds) > bestArea) {267bestArea = area(children[i].geomBounds);268bestChild = i;269}270}271if (bestChild == -1) break;272273/*! split best child into left and right child */274PrimInfoRange left, right;275split(children[bestChild],left,right,aligned);276277/* add new children left and right */278children[bestChild] = children[numChildren-1];279children[numChildren-1] = left;280children[numChildren+0] = right;281numChildren++;282283} while (numChildren < cfg.branchingFactor);284285NodeRef node;286287/* create aligned node */288if (aligned)289{290node = createAABBNode(alloc);291292/* spawn tasks or ... */293if (pinfo.size() > SINGLE_THREADED_THRESHOLD)294{295parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {296for (size_t i=r.begin(); i<r.end(); i++) {297const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;298setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);299_mm_mfence(); // to allow non-temporal stores during build300}301});302}303/* ... continue sequentially */304else {305for (size_t i=0; i<numChildren; i++) {306const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;307setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);308}309}310}311312/* create unaligned node */313else314{315node = createOBBNode(alloc);316317/* spawn tasks or ... */318if (pinfo.size() > SINGLE_THREADED_THRESHOLD)319{320parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {321for (size_t i=r.begin(); i<r.end(); i++) {322const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);323const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);324const OBBox3fa obounds(space,sinfo.geomBounds);325const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;326setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);327_mm_mfence(); // to allow non-temporal stores during build328}329});330}331/* ... continue sequentially */332else333{334for (size_t i=0; i<numChildren; i++) {335const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);336const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);337const OBBox3fa obounds(space,sinfo.geomBounds);338const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;339setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);340}341}342}343344/* reports a finished range of primrefs */345if (unlikely(alloc_barrier))346reportFinishedRange(pinfo);347348return node;349}350351private:352Settings cfg;353PrimRef* prims;354const CreateAllocFunc& createAlloc;355const CreateAABBNodeFunc& createAABBNode;356const SetAABBNodeFunc& setAABBNode;357const CreateOBBNodeFunc& createOBBNode;358const SetOBBNodeFunc& setOBBNode;359const CreateLeafFunc& createLeaf;360const ProgressMonitor& progressMonitor;361const ReportFinishedRangeFunc& reportFinishedRange;362363private:364HeuristicBinningSAH alignedHeuristic;365UnalignedHeuristicBinningSAH unalignedHeuristic;366HeuristicStrandSplitSAH strandHeuristic;367};368369template<typename NodeRef,370typename CreateAllocFunc,371typename CreateAABBNodeFunc,372typename SetAABBNodeFunc,373typename CreateOBBNodeFunc,374typename SetOBBNodeFunc,375typename CreateLeafFunc,376typename ProgressMonitor,377typename ReportFinishedRangeFunc>378379static NodeRef build (const CreateAllocFunc& createAlloc,380const CreateAABBNodeFunc& createAABBNode,381const SetAABBNodeFunc& setAABBNode,382const CreateOBBNodeFunc& createOBBNode,383const SetOBBNodeFunc& setOBBNode,384const CreateLeafFunc& createLeaf,385const ProgressMonitor& progressMonitor,386const ReportFinishedRangeFunc& reportFinishedRange,387Scene* scene,388PrimRef* prims,389const PrimInfo& pinfo,390const Settings settings)391{392typedef BuilderT<NodeRef,393CreateAllocFunc,394CreateAABBNodeFunc,SetAABBNodeFunc,395CreateOBBNodeFunc,SetOBBNodeFunc,396CreateLeafFunc,ProgressMonitor,397ReportFinishedRangeFunc> Builder;398399Builder builder(scene,prims,createAlloc,400createAABBNode,setAABBNode,401createOBBNode,setOBBNode,402createLeaf,progressMonitor,reportFinishedRange,settings);403404NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);405_mm_mfence(); // to allow non-temporal stores during build406return root;407}408};409}410}411412413