Path: blob/master/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023// TODO:4// - adjust parallel build thresholds5// - openNodesBasedOnExtend should consider max extended size67#pragma once89#include "heuristic_binning.h"10#include "heuristic_spatial.h"1112/* stop opening of all bref.geomIDs are the same */13#define EQUAL_GEOMID_STOP_CRITERIA 11415/* 10% spatial extend threshold */16#define MAX_EXTEND_THRESHOLD 0.1f1718/* maximum is 8 children */19#define MAX_OPENED_CHILD_NODES 82021/* open until all build refs are below threshold size in one step */22#define USE_LOOP_OPENING 02324namespace embree25{26namespace isa27{28/*! Performs standard object binning */29template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>30struct HeuristicArrayOpenMergeSAH31{32typedef BinSplit<OBJECT_BINS> Split;33typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;3435static const size_t PARALLEL_THRESHOLD = 1024;36static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;37static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;3839static const size_t MOVE_STEP_SIZE = 64;40static const size_t CREATE_SPLITS_STEP_SIZE = 128;4142__forceinline HeuristicArrayOpenMergeSAH ()43: prims0(nullptr) {}4445/*! remember prim array */46__forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)47: prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)48{49assert(max_open_size <= MAX_OPENED_CHILD_NODES);50}5152struct OpenHeuristic53{54__forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )55{56const Vec3fa diag = pinfo.geomBounds.size();57dim = maxDim(diag);58assert(diag[dim] > 0.0f);59inv_max_extend = 1.0f / diag[dim];60}6162__forceinline bool operator () ( PrimRef& prim ) const {63return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;64}6566private:67size_t dim;68float inv_max_extend;69};7071/*! compute extended ranges */72__forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)73{74assert(set.ext_range_size() > 0);75const float left_factor = (float)lweight / (lweight + rweight);76const size_t ext_range_size = set.ext_range_size();77const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);78const size_t right_ext_range_size = ext_range_size - left_ext_range_size;79lset.set_ext_range(lset.end() + left_ext_range_size);80rset.set_ext_range(rset.end() + right_ext_range_size);81}8283/*! move ranges */84__forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)85{86const size_t left_ext_range_size = lset.ext_range_size();87const size_t right_size = rset.size();8889/* has the left child an extended range? */90if (left_ext_range_size > 0)91{92/* left extended range smaller than right range ? */93if (left_ext_range_size < right_size)94{95/* only move a small part of the beginning of the right range to the end */96parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {97for (size_t i=r.begin(); i<r.end(); i++)98prims0[i+right_size] = prims0[i];99});100}101else102{103/* no overlap, move entire right range to new location, can be made fully parallel */104parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {105for (size_t i=r.begin(); i<r.end(); i++)106prims0[i+left_ext_range_size] = prims0[i];107});108}109/* update right range */110assert(rset.ext_end() + left_ext_range_size == set.ext_end());111rset.move_right(left_ext_range_size);112}113}114115/* estimates the extra space required when opening, and checks if all primitives are from same geometry */116__noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)117{118const OpenHeuristic heuristic(set);119const unsigned int geomID = prims0[set.begin()].geomID();120121auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {122bool commonGeomID = true;123size_t opens = 0;124for (size_t i=r.begin(); i<r.end(); i++) {125commonGeomID &= prims0[i].geomID() == geomID;126if (heuristic(prims0[i]))127opens += prims0[i].node.getN()-1; // coarse approximation128}129return std::pair<size_t,bool>(opens,commonGeomID);130};131auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {132return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);133};134return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);135}136137// FIXME: should consider maximum available extended size138__noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)139{140const OpenHeuristic heuristic(set);141const size_t ext_range_start = set.end();142143if (false && set.size() < PARALLEL_THRESHOLD)144{145size_t extra_elements = 0;146for (size_t i=set.begin(); i<set.end(); i++)147{148if (heuristic(prims0[i]))149{150PrimRef tmp[MAX_OPENED_CHILD_NODES];151const size_t n = nodeOpenerFunc(prims0[i],tmp);152assert(extra_elements + n-1 <= set.ext_range_size());153for (size_t j=0; j<n; j++)154set.extend_center2(tmp[j]);155156prims0[i] = tmp[0];157for (size_t j=1; j<n; j++)158prims0[ext_range_start+extra_elements+j-1] = tmp[j];159extra_elements += n-1;160}161}162set._end += extra_elements;163}164else165{166std::atomic<size_t> ext_elements;167ext_elements.store(0);168PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {169PrimInfo info(empty);170for (size_t i=r.begin(); i<r.end(); i++)171if (heuristic(prims0[i]))172{173PrimRef tmp[MAX_OPENED_CHILD_NODES];174const size_t n = nodeOpenerFunc(prims0[i],tmp);175const size_t ID = ext_elements.fetch_add(n-1);176assert(ID + n-1 <= set.ext_range_size());177178for (size_t j=0; j<n; j++)179info.extend_center2(tmp[j]);180181prims0[i] = tmp[0];182for (size_t j=1; j<n; j++)183prims0[ext_range_start+ID+j-1] = tmp[j];184}185return info;186}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });187set.centBounds.extend(info.centBounds);188assert(ext_elements.load() <= set.ext_range_size());189set._end += ext_elements.load();190}191}192193__noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)194{195const OpenHeuristic heuristic(set);196size_t next_iteration_extra_elements = est_new_elements;197198while (next_iteration_extra_elements <= set.ext_range_size())199{200next_iteration_extra_elements = 0;201size_t extra_elements = 0;202const size_t ext_range_start = set.end();203204for (size_t i=set.begin(); i<set.end(); i++)205{206if (heuristic(prims0[i]))207{208PrimRef tmp[MAX_OPENED_CHILD_NODES];209const size_t n = nodeOpenerFunc(prims0[i],tmp);210assert(extra_elements + n-1 <= set.ext_range_size());211for (size_t j=0;j<n;j++)212set.extend_center2(tmp[j]);213214prims0[i] = tmp[0];215for (size_t j=1;j<n;j++)216prims0[ext_range_start+extra_elements+j-1] = tmp[j];217extra_elements += n-1;218219for (size_t j=0; j<n; j++)220if (heuristic(tmp[j]))221next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation222223}224}225assert( extra_elements <= set.ext_range_size());226set._end += extra_elements;227228for (size_t i=set.begin();i<set.end();i++)229assert(prims0[i].numPrimitives() > 0);230231if (unlikely(next_iteration_extra_elements == 0)) break;232}233}234235__noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)236{237/* single element */238if (set.size() <= 1)239return Split();240241/* disable opening if there is no overlap */242const size_t D = 4;243if (unlikely(set.has_ext_range() && set.size() <= D))244{245bool disjoint = true;246for (size_t j=set.begin(); j<set.end()-1; j++) {247for (size_t i=set.begin()+1; i<set.end(); i++) {248if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {249disjoint = false; break;250}251}252}253if (disjoint) set.set_ext_range(set.end()); /* disables opening */254}255256std::pair<size_t,bool> p(0,false);257258/* disable opening when all primitives are from same geometry */259if (unlikely(set.has_ext_range()))260{261p = getProperties(set);262#if EQUAL_GEOMID_STOP_CRITERIA == 1263if (p.second) set.set_ext_range(set.end()); /* disable opening */264#endif265}266267/* open nodes when we have sufficient space available */268if (unlikely(set.has_ext_range()))269{270#if USE_LOOP_OPENING == 1271openNodesBasedOnExtendLoop(set,p.first);272#else273if (p.first <= set.ext_range_size())274openNodesBasedOnExtend(set);275#endif276277/* disable opening when insufficient space for opening a node available */278if (set.ext_range_size() < max_open_size-1)279set.set_ext_range(set.end()); /* disable opening */280}281282/* find best split */283return object_find(set,logBlockSize);284}285286287/*! finds the best object split */288__forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)289{290if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);291else return parallel_object_find (set,logBlockSize);292}293294/*! finds the best object split */295__noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)296{297Binner binner(empty);298const BinMapping<OBJECT_BINS> mapping(set.centBounds);299binner.bin(prims0,set.begin(),set.end(),mapping);300return binner.best(mapping,logBlockSize);301}302303/*! finds the best split */304__noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)305{306Binner binner(empty);307const BinMapping<OBJECT_BINS> mapping(set.centBounds);308const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround309auto body = [&] (const range<size_t>& r) -> Binner {310Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;311};312auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {313Binner r = b0; r.merge(b1,_mapping.size()); return r;314};315binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);316return binner.best(mapping,logBlockSize);317}318319/*! array partitioning */320__noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)321{322PrimInfoExtRange set = set_i;323324/* valid split */325if (unlikely(!split.valid())) {326deterministic_order(set);327splitFallback(set,lset,rset);328return;329}330331std::pair<size_t,size_t> ext_weights(0,0);332333/* object split */334if (likely(set.size() < PARALLEL_THRESHOLD))335ext_weights = sequential_object_split(split,set,lset,rset);336else337ext_weights = parallel_object_split(split,set,lset,rset);338339/* if we have an extended range, set extended child ranges and move right split range */340if (unlikely(set.has_ext_range()))341{342setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);343moveExtentedRange(set,lset,rset);344}345}346347/*! array partitioning */348std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)349{350const size_t begin = set.begin();351const size_t end = set.end();352PrimInfo local_left(empty);353PrimInfo local_right(empty);354const unsigned int splitPos = split.pos;355const unsigned int splitDim = split.dim;356const unsigned int splitDimMask = (unsigned int)1 << splitDim;357358const vint4 vSplitPos(splitPos);359const vbool4 vSplitMask( (int)splitDimMask );360361size_t center = serial_partitioning(prims0,362begin,end,local_left,local_right,363[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },364[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });365366new (&lset) PrimInfoExtRange(begin,center,center,local_left);367new (&rset) PrimInfoExtRange(center,end,end,local_right);368assert(area(lset.geomBounds) >= 0.0f);369assert(area(rset.geomBounds) >= 0.0f);370return std::pair<size_t,size_t>(local_left.size(),local_right.size());371}372373/*! array partitioning */374__noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)375{376const size_t begin = set.begin();377const size_t end = set.end();378PrimInfo left(empty);379PrimInfo right(empty);380const unsigned int splitPos = split.pos;381const unsigned int splitDim = split.dim;382const unsigned int splitDimMask = (unsigned int)1 << splitDim;383384const vint4 vSplitPos(splitPos);385const vbool4 vSplitMask( (int)splitDimMask );386auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };387388const size_t center = parallel_partitioning(389prims0,begin,end,EmptyTy(),left,right,isLeft,390[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },391[] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },392PARALLEL_PARTITION_BLOCK_SIZE);393394new (&lset) PrimInfoExtRange(begin,center,center,left);395new (&rset) PrimInfoExtRange(center,end,end,right);396assert(area(lset.geomBounds) >= 0.0f);397assert(area(rset.geomBounds) >= 0.0f);398399return std::pair<size_t,size_t>(left.size(),right.size());400}401402void deterministic_order(const extended_range<size_t>& set)403{404/* required as parallel partition destroys original primitive order */405std::sort(&prims0[set.begin()],&prims0[set.end()]);406}407408__forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)409{410const size_t begin = set.begin();411const size_t end = set.end();412const size_t center = (begin + end)/2;413414PrimInfo left(empty);415for (size_t i=begin; i<center; i++)416left.add_center2(prims0[i]);417418const size_t lweight = left.end;419420PrimInfo right(empty);421for (size_t i=center; i<end; i++)422right.add_center2(prims0[i]);423424const size_t rweight = right.end;425new (&lset) PrimInfoExtRange(begin,center,center,left);426new (&rset) PrimInfoExtRange(center,end,end,right);427428/* if we have an extended range */429if (set.has_ext_range())430{431setExtentedRanges(set,lset,rset,lweight,rweight);432moveExtentedRange(set,lset,rset);433}434}435436private:437PrimRef* const prims0;438const NodeOpenerFunc& nodeOpenerFunc;439size_t max_open_size;440};441}442}443444445