Path: blob/master/thirdparty/embree/kernels/builders/heuristic_binning_array_aligned.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "heuristic_binning.h"67namespace embree8{9namespace isa10{11struct PrimInfoRange : public CentGeomBBox3fa, public range<size_t>12{13__forceinline PrimInfoRange () {14}1516__forceinline PrimInfoRange(const PrimInfo& pinfo)17: CentGeomBBox3fa(pinfo), range<size_t>(pinfo.begin,pinfo.end) {}1819__forceinline PrimInfoRange(EmptyTy)20: CentGeomBBox3fa(EmptyTy()), range<size_t>(0,0) {}2122__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)23: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}2425__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)26: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}2728__forceinline float leafSAH() const {29return expectedApproxHalfArea(geomBounds)*float(size());30}3132__forceinline float leafSAH(size_t block_shift) const {33return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);34}3536__forceinline range<size_t> get_range() const {37return range<size_t>(begin(),end());38}3940template<typename PrimRef>41__forceinline void add_primref(const PrimRef& prim)42{43CentGeomBBox3fa::extend_primref(prim);44_end++;45}46};4748inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)49{50const size_t begin = pinfo.begin();51const size_t end = pinfo.end();52const size_t center = (begin + end)/2;5354CentGeomBBox3fa left(empty);55for (size_t i=begin; i<center; i++)56left.extend_center2(prims[i]);57new (&linfo) PrimInfoRange(begin,center,left);5859CentGeomBBox3fa right(empty);60for (size_t i=center; i<end; i++)61right.extend_center2(prims[i]);62new (&rinfo) PrimInfoRange(center,end,right);63}6465template<typename Type, typename getTypeFunc>66inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)67{68CentGeomBBox3fa local_left(empty), local_right(empty);69auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };70const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);71linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);72rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);73}7475/*! Performs standard object binning */76template<typename PrimRef, size_t BINS>77struct HeuristicArrayBinningSAH78{79typedef BinSplit<BINS> Split;80typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;81typedef range<size_t> Set;8283static const size_t PARALLEL_THRESHOLD = 3 * 1024;84static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;85static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;8687__forceinline HeuristicArrayBinningSAH ()88: prims(nullptr) {}8990/*! remember prim array */91__forceinline HeuristicArrayBinningSAH (PrimRef* prims)92: prims(prims) {}9394/*! finds the best split */95__noinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize)96{97if (likely(pinfo.size() < PARALLEL_THRESHOLD))98return find_template<false>(pinfo,logBlockSize);99else100return find_template<true>(pinfo,logBlockSize);101}102103template<bool parallel>104__forceinline const Split find_template(const PrimInfoRange& pinfo, const size_t logBlockSize)105{106Binner binner(empty);107const BinMapping<BINS> mapping(pinfo);108bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);109return binner.best(mapping,logBlockSize);110}111112/*! finds the best split */113__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)114{115if (likely(pinfo.size() < PARALLEL_THRESHOLD))116return find_block_size_template<false>(pinfo,blockSize);117else118return find_block_size_template<true>(pinfo,blockSize);119}120121template<bool parallel>122__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)123{124Binner binner(empty);125const BinMapping<BINS> mapping(pinfo);126bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);127return binner.best_block_size(mapping,blockSize);128}129130/*! array partitioning */131__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)132{133if (likely(pinfo.size() < PARALLEL_THRESHOLD))134split_template<false>(split,pinfo,linfo,rinfo);135else136split_template<true>(split,pinfo,linfo,rinfo);137}138139template<bool parallel>140__forceinline void split_template(const Split& split, const PrimInfoRange& set, PrimInfoRange& lset, PrimInfoRange& rset)141{142if (!split.valid()) {143deterministic_order(set);144return splitFallback(set,lset,rset);145}146147const size_t begin = set.begin();148const size_t end = set.end();149CentGeomBBox3fa local_left(empty);150CentGeomBBox3fa local_right(empty);151const unsigned int splitPos = split.pos;152const unsigned int splitDim = split.dim;153const unsigned int splitDimMask = (unsigned int)1 << splitDim;154155const typename Binner::vint vSplitPos(splitPos);156const typename Binner::vbool vSplitMask(splitDimMask);157auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };158159size_t center = 0;160if (!parallel)161center = serial_partitioning(prims,begin,end,local_left,local_right,isLeft,162[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });163else164center = parallel_partitioning(165prims,begin,end,EmptyTy(),local_left,local_right,isLeft,166[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },167[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },168PARALLEL_PARTITION_BLOCK_SIZE);169170new (&lset) PrimInfoRange(begin,center,local_left);171new (&rset) PrimInfoRange(center,end,local_right);172assert(area(lset.geomBounds) >= 0.0f);173assert(area(rset.geomBounds) >= 0.0f);174}175176void deterministic_order(const PrimInfoRange& pinfo)177{178/* required as parallel partition destroys original primitive order */179std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);180}181182void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {183performFallbackSplit(prims,pinfo,linfo,rinfo);184}185186void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)187{188assert(range.size() > 1);189CentGeomBBox3fa left(empty);190CentGeomBBox3fa right(empty);191unsigned int geomID = prims[range.begin()].geomID();192size_t center = serial_partitioning(prims,range.begin(),range.end(),left,right,193[&] ( const PrimRef& prim ) { return prim.geomID() == geomID; },194[ ] ( CentGeomBBox3fa& a, const PrimRef& ref ) { a.extend_center2(ref); });195196new (&linfo) PrimInfoRange(range.begin(),center,left);197new (&rinfo) PrimInfoRange(center,range.end(),right);198}199200private:201PrimRef* const prims;202};203204#if !defined(RTHWIF_STANDALONE)205206/*! Performs standard object binning */207template<typename PrimRefMB, size_t BINS>208struct HeuristicArrayBinningMB209{210typedef BinSplit<BINS> Split;211typedef typename PrimRefMB::BBox BBox;212typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;213static const size_t PARALLEL_THRESHOLD = 3 * 1024;214static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;215static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;216217/*! finds the best split */218const Split find(const SetMB& set, const size_t logBlockSize)219{220ObjectBinner binner(empty);221const BinMapping<BINS> mapping(set.size(),set.centBounds);222bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping);223Split osplit = binner.best(mapping,logBlockSize);224osplit.sah *= set.time_range.size();225if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split226return osplit;227}228229/*! array partitioning */230__forceinline void split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)231{232const size_t begin = set.begin();233const size_t end = set.end();234PrimInfoMB left = empty;235PrimInfoMB right = empty;236const vint4 vSplitPos(split.pos);237const vbool4 vSplitMask(1 << split.dim);238auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref) < vSplitPos) & vSplitMask); };239auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };240auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };241size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);242new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);243new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);244}245};246#endif247}248}249250251