Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../bvh/bvh.h"6#include "../geometry/primitive.h"7#include "../builders/bvh_builder_msmblur.h"8#include "../builders/heuristic_binning_array_aligned.h"9#include "../builders/heuristic_binning_array_unaligned.h"10#include "../builders/heuristic_timesplit_array.h"1112namespace embree13{14namespace isa15{16struct BVHBuilderHairMSMBlur17{18/*! settings for msmblur builder */19struct Settings20{21/*! default settings */22Settings ()23: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}2425public:26size_t branchingFactor; //!< branching factor of BVH to build27size_t maxDepth; //!< maximum depth of BVH to build28size_t logBlockSize; //!< log2 of blocksize for SAH heuristic29size_t minLeafSize; //!< minimum size of a leaf30size_t maxLeafSize; //!< maximum size of a leaf31};3233struct BuildRecord34{35public:36__forceinline BuildRecord () {}3738__forceinline BuildRecord (size_t depth)39: depth(depth) {}4041__forceinline BuildRecord (const SetMB& prims, size_t depth)42: depth(depth), prims(prims) {}4344__forceinline size_t size() const {45return prims.size();46}4748public:49size_t depth; //!< depth of the root of this subtree50SetMB prims; //!< the list of primitives51};5253template<typename NodeRef,54typename RecalculatePrimRef,55typename CreateAllocFunc,56typename CreateAABBNodeMBFunc,57typename SetAABBNodeMBFunc,58typename CreateOBBNodeMBFunc,59typename SetOBBNodeMBFunc,60typename CreateLeafFunc,61typename ProgressMonitor>6263class BuilderT64{65ALIGNED_CLASS_(16);6667static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor68static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth69static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build7071typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;72typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;7374typedef FastAllocator::CachedAllocator Allocator;75typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;7677typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;78typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;79typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;8081public:8283BuilderT (Scene* scene,84const RecalculatePrimRef& recalculatePrimRef,85const CreateAllocFunc& createAlloc,86const CreateAABBNodeMBFunc& createAABBNodeMB,87const SetAABBNodeMBFunc& setAABBNodeMB,88const CreateOBBNodeMBFunc& createOBBNodeMB,89const SetOBBNodeMBFunc& setOBBNodeMB,90const CreateLeafFunc& createLeaf,91const ProgressMonitor& progressMonitor,92const Settings settings)9394: cfg(settings),95scene(scene),96recalculatePrimRef(recalculatePrimRef),97createAlloc(createAlloc),98createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),99createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),100createLeaf(createLeaf),101progressMonitor(progressMonitor),102unalignedHeuristic(scene),103temporalSplitHeuristic(scene->device,recalculatePrimRef) {}104105private:106107/*! checks if all primitives are from the same geometry */108__forceinline bool sameGeometry(const SetMB& set)109{110mvector<PrimRefMB>& prims = *set.prims;111unsigned int firstGeomID = prims[set.begin()].geomID();112for (size_t i=set.begin()+1; i<set.end(); i++) {113if (prims[i].geomID() != firstGeomID){114return false;115}116}117return true;118}119120/*! performs some split if SAH approaches fail */121void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)122{123mvector<PrimRefMB>& prims = *set.prims;124125const size_t begin = set.begin();126const size_t end = set.end();127const size_t center = (begin + end)/2;128129PrimInfoMB linfo = empty;130for (size_t i=begin; i<center; i++)131linfo.add_primref(prims[i]);132133PrimInfoMB rinfo = empty;134for (size_t i=center; i<end; i++)135rinfo.add_primref(prims[i]);136137new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);138new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);139}140141void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)142{143assert(set.size() > 1);144const size_t begin = set.begin();145const size_t end = set.end();146PrimInfoMB linfo(empty);147PrimInfoMB rinfo(empty);148unsigned int geomID = (*set.prims)[begin].geomID();149size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,150[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },151[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });152153new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);154new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);155}156157/*! creates a large leaf that could be larger than supported by the BVH */158NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)159{160/* this should never occur but is a fatal error */161if (current.depth > cfg.maxDepth)162throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");163164/* special case when directly creating leaf without any splits that could shrink time_range */165bool force_split = false;166if (current.depth == 1 && current.size() > 0)167{168BBox1f c = empty;169BBox1f p = current.prims.time_range;170for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {171mvector<PrimRefMB>& prims = *current.prims.prims;172c.extend(prims[i].time_range);173}174175force_split = c.lower > p.lower || c.upper < p.upper;176}177178/* create leaf for few primitives */179if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)180return createLeaf(current.prims,alloc);181182/* fill all children by always splitting the largest one */183LocalChildList children(current);184NodeRecordMB4D values[MAX_BRANCHING_FACTOR];185186do {187188/* find best child with largest bounding box area */189int bestChild = -1;190size_t bestSize = 0;191for (unsigned i=0; i<children.size(); i++)192{193/* ignore leaves as they cannot get split */194if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)195continue;196197force_split = false;198199/* remember child with largest size */200if (children[i].size() > bestSize) {201bestSize = children[i].size();202bestChild = i;203}204}205if (bestChild == -1) break;206207/*! split best child into left and right child */208BuildRecord left(current.depth+1);209BuildRecord right(current.depth+1);210if (!sameGeometry(children[bestChild].prims)) {211splitByGeometry(children[bestChild].prims,left.prims,right.prims);212} else {213splitFallback(children[bestChild].prims,left.prims,right.prims);214}215children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());216217} while (children.size() < cfg.branchingFactor);218219220/* detect time_ranges that have shrunken */221bool timesplit = false;222for (size_t i=0; i<children.size(); i++) {223const BBox1f c = children[i].prims.time_range;224const BBox1f p = current.prims.time_range;225timesplit |= c.lower > p.lower || c.upper < p.upper;226}227228/* create node */229NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);230231LBBox3fa bounds = empty;232for (size_t i=0; i<children.size(); i++) {233values[i] = createLargeLeaf(children[i],alloc);234bounds.extend(values[i].lbounds);235}236237setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);238239if (timesplit)240bounds = current.prims.linearBounds(recalculatePrimRef);241242return NodeRecordMB4D(node,bounds,current.prims.time_range);243}244245/*! performs split */246std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)247{248/* variable to track the SAH of the best splitting approach */249float bestSAH = inf;250const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);251252/* perform standard binning in aligned space */253HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);254float alignedObjectSAH = alignedObjectSplit.splitSAH();255bestSAH = min(alignedObjectSAH,bestSAH);256257/* perform standard binning in unaligned space */258UnalignedHeuristicBinning::Split unalignedObjectSplit;259LinearSpace3fa uspace;260float unalignedObjectSAH = inf;261if (alignedObjectSAH > 0.7f*leafSAH) {262uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);263const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);264unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);265unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive266bestSAH = min(unalignedObjectSAH,bestSAH);267}268269/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */270float temporal_split_sah = inf;271typename HeuristicTemporal::Split temporal_split;272if (bestSAH > 0.5f*leafSAH) {273if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {274temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);275temporal_split_sah = temporal_split.splitSAH();276bestSAH = min(temporal_split_sah,bestSAH);277}278}279280/* perform fallback split if SAH heuristics failed */281if (unlikely(!std::isfinite(bestSAH))) {282current.prims.deterministic_order();283splitFallback(current.prims,lrecord.prims,rrecord.prims);284}285/* perform aligned split if this is best */286else if (likely(bestSAH == alignedObjectSAH)) {287alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);288}289/* perform unaligned split if this is best */290else if (likely(bestSAH == unalignedObjectSAH)) {291unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);292aligned = false;293}294/* perform temporal split if this is best */295else if (likely(bestSAH == temporal_split_sah)) {296timesplit = true;297return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);298}299else300assert(false);301302return std::unique_ptr<mvector<PrimRefMB>>();303}304305/*! recursive build */306NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)307{308/* get thread local allocator */309if (!alloc)310alloc = createAlloc();311312/* call memory monitor function to signal progress */313if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)314progressMonitor(current.size());315316/* create leaf node */317if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {318current.prims.deterministic_order();319return createLargeLeaf(current,alloc);320}321322/* fill all children by always splitting the one with the largest surface area */323NodeRecordMB4D values[MAX_BRANCHING_FACTOR];324LocalChildList children(current);325bool aligned = true;326bool timesplit = false;327328do {329330/* find best child with largest bounding box area */331ssize_t bestChild = -1;332float bestArea = neg_inf;333for (size_t i=0; i<children.size(); i++)334{335/* ignore leaves as they cannot get split */336if (children[i].size() <= cfg.minLeafSize)337continue;338339/* remember child with largest area */340const float A = children[i].prims.halfArea();341if (A > bestArea) {342bestArea = children[i].prims.halfArea();343bestChild = i;344}345}346if (bestChild == -1) break;347348/*! split best child into left and right child */349BuildRecord left(current.depth+1);350BuildRecord right(current.depth+1);351std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);352children.split(bestChild,left,right,std::move(new_vector));353354} while (children.size() < cfg.branchingFactor);355356/* detect time_ranges that have shrunken */357for (size_t i=0; i<children.size(); i++) {358const BBox1f c = children[i].prims.time_range;359const BBox1f p = current.prims.time_range;360timesplit |= c.lower > p.lower || c.upper < p.upper;361}362363/* create time split node */364if (timesplit)365{366const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);367368/* spawn tasks or ... */369if (current.size() > SINGLE_THREADED_THRESHOLD)370{371parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {372for (size_t i=r.begin(); i<r.end(); i++) {373values[i] = recurse(children[i],nullptr,true);374_mm_mfence(); // to allow non-temporal stores during build375}376});377}378/* ... continue sequential */379else {380for (size_t i=0; i<children.size(); i++) {381values[i] = recurse(children[i],alloc,false);382}383}384385setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);386387const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);388return NodeRecordMB4D(node,bounds,current.prims.time_range);389}390391/* create aligned node */392else if (aligned)393{394const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);395396/* spawn tasks or ... */397if (current.size() > SINGLE_THREADED_THRESHOLD)398{399LBBox3fa cbounds[MAX_BRANCHING_FACTOR];400parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {401for (size_t i=r.begin(); i<r.end(); i++) {402values[i] = recurse(children[i],nullptr,true);403cbounds[i] = values[i].lbounds;404_mm_mfence(); // to allow non-temporal stores during build405}406});407408LBBox3fa bounds = empty;409for (size_t i=0; i<children.size(); i++)410bounds.extend(cbounds[i]);411setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);412return NodeRecordMB4D(node,bounds,current.prims.time_range);413}414/* ... continue sequentially */415else416{417LBBox3fa bounds = empty;418for (size_t i=0; i<children.size(); i++) {419values[i] = recurse(children[i],alloc,false);420bounds.extend(values[i].lbounds);421}422setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);423return NodeRecordMB4D(node,bounds,current.prims.time_range);424}425}426427/* create unaligned node */428else429{430const NodeRef node = createOBBNodeMB(alloc);431432/* spawn tasks or ... */433if (current.size() > SINGLE_THREADED_THRESHOLD)434{435parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {436for (size_t i=r.begin(); i<r.end(); i++) {437const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);438const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);439const auto child = recurse(children[i],nullptr,true);440setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);441_mm_mfence(); // to allow non-temporal stores during build442}443});444}445/* ... continue sequentially */446else447{448for (size_t i=0; i<children.size(); i++) {449const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);450const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);451const auto child = recurse(children[i],alloc,false);452setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);453}454}455456const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);457return NodeRecordMB4D(node,bounds,current.prims.time_range);458}459}460461public:462463/*! entry point into builder */464NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)465{466BuildRecord record(SetMB(pinfo,&prims),1);467auto root = recurse(record,nullptr,true);468_mm_mfence(); // to allow non-temporal stores during build469return root;470}471472private:473Settings cfg;474Scene* scene;475const RecalculatePrimRef& recalculatePrimRef;476const CreateAllocFunc& createAlloc;477const CreateAABBNodeMBFunc& createAABBNodeMB;478const SetAABBNodeMBFunc& setAABBNodeMB;479const CreateOBBNodeMBFunc& createOBBNodeMB;480const SetOBBNodeMBFunc& setOBBNodeMB;481const CreateLeafFunc& createLeaf;482const ProgressMonitor& progressMonitor;483484private:485HeuristicBinning alignedHeuristic;486UnalignedHeuristicBinning unalignedHeuristic;487HeuristicTemporal temporalSplitHeuristic;488};489490template<typename NodeRef,491typename RecalculatePrimRef,492typename CreateAllocFunc,493typename CreateAABBNodeMBFunc,494typename SetAABBNodeMBFunc,495typename CreateOBBNodeMBFunc,496typename SetOBBNodeMBFunc,497typename CreateLeafFunc,498typename ProgressMonitor>499500static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,501const RecalculatePrimRef& recalculatePrimRef,502const CreateAllocFunc& createAlloc,503const CreateAABBNodeMBFunc& createAABBNodeMB,504const SetAABBNodeMBFunc& setAABBNodeMB,505const CreateOBBNodeMBFunc& createOBBNodeMB,506const SetOBBNodeMBFunc& setOBBNodeMB,507const CreateLeafFunc& createLeaf,508const ProgressMonitor& progressMonitor,509const Settings settings)510{511typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,512CreateAABBNodeMBFunc,SetAABBNodeMBFunc,513CreateOBBNodeMBFunc,SetOBBNodeMBFunc,514CreateLeafFunc,ProgressMonitor> Builder;515516Builder builder(scene,recalculatePrimRef,createAlloc,517createAABBNodeMB,setAABBNodeMB,518createOBBNodeMB,setOBBNodeMB,519createLeaf,progressMonitor,settings);520521return builder(prims,pinfo);522}523};524}525}526527528