Path: blob/master/thirdparty/embree/kernels/builders/heuristic_binning.h
9913 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "priminfo.h"6#include "priminfo_mb.h"7#include "../../common/algorithms/parallel_reduce.h"8#include "../../common/algorithms/parallel_partition.h"910namespace embree11{12namespace isa13{14/*! mapping into bins */15template<size_t BINS>16struct BinMapping17{18public:19__forceinline BinMapping() {}2021/*! calculates the mapping */22__forceinline BinMapping(size_t N, const BBox3fa& centBounds)23{24num = min(BINS,size_t(4.0f + 0.05f*N));25assert(num >= 1);26const vfloat4 eps = 1E-34f;27const vfloat4 diag = max(eps, (vfloat4) centBounds.size());28scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));29ofs = (vfloat4) centBounds.lower;30}3132/*! calculates the mapping */33__forceinline BinMapping(const BBox3fa& centBounds)34{35num = BINS;36const vfloat4 eps = 1E-34f;37const vfloat4 diag = max(eps, (vfloat4) centBounds.size());38scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));39ofs = (vfloat4) centBounds.lower;40}4142/*! calculates the mapping */43template<typename PrimInfo>44__forceinline BinMapping(const PrimInfo& pinfo)45{46const vfloat4 eps = 1E-34f;47num = min(BINS,size_t(4.0f + 0.05f*pinfo.size()));48const vfloat4 diag = max(eps,(vfloat4) pinfo.centBounds.size());49scale = select(diag > eps,vfloat4(0.99f*num)/diag,vfloat4(0.0f));50ofs = (vfloat4) pinfo.centBounds.lower;51}5253/*! returns number of bins */54__forceinline size_t size() const { return num; }5556/*! slower but safe binning */57__forceinline Vec3ia bin(const Vec3fa& p) const58{59const vint4 i = floori((vfloat4(p)-ofs)*scale);60assert(i[0] >= 0 && (size_t)i[0] < num);61assert(i[1] >= 0 && (size_t)i[1] < num);62assert(i[2] >= 0 && (size_t)i[2] < num);6364// we clamp to handle corner cases that could calculate out of bounds bin65return Vec3ia(clamp(i,vint4(0),vint4(num-1)));66}6768/*! faster but unsafe binning */69__forceinline Vec3ia bin_unsafe(const Vec3fa& p) const {70return Vec3ia(floori((vfloat4(p)-ofs)*scale));71}7273/*! faster but unsafe binning */74template<typename PrimRef>75__forceinline Vec3ia bin_unsafe(const PrimRef& p) const {76return bin_unsafe(p.binCenter());77}7879/*! faster but unsafe binning */80template<typename PrimRef, typename BinBoundsAndCenter>81__forceinline Vec3ia bin_unsafe(const PrimRef& p, const BinBoundsAndCenter& binBoundsAndCenter) const {82return bin_unsafe(binBoundsAndCenter.binCenter(p));83}8485template<typename PrimRef>86__forceinline bool bin_unsafe(const PrimRef& ref,87const vint4& vSplitPos,88const vbool4& splitDimMask) const // FIXME: rename to isLeft89{90return any(((vint4)bin_unsafe(center2(ref.bounds())) < vSplitPos) & splitDimMask);91}92/*! calculates left spatial position of bin */93__forceinline float pos(const size_t bin, const size_t dim) const {94return madd(float(bin),1.0f / scale[dim],ofs[dim]);95}9697/*! returns true if the mapping is invalid in some dimension */98__forceinline bool invalid(const size_t dim) const {99return scale[dim] == 0.0f;100}101102/*! stream output */103friend embree_ostream operator<<(embree_ostream cout, const BinMapping& mapping) {104return cout << "BinMapping { num = " << mapping.num << ", ofs = " << mapping.ofs << ", scale = " << mapping.scale << "}";105}106107public:108size_t num;109vfloat4 ofs,scale; //!< linear function that maps to bin ID110};111112/*! stores all information to perform some split */113template<size_t BINS>114struct BinSplit115{116enum117{118SPLIT_OBJECT = 0,119SPLIT_FALLBACK = 1,120SPLIT_ENFORCE = 2, // splits with larger ID are enforced in createLargeLeaf even if we could create a leaf already121SPLIT_TEMPORAL = 2,122SPLIT_GEOMID = 3,123};124125/*! construct an invalid split by default */126__forceinline BinSplit()127: sah(inf), dim(-1), pos(0), data(0) {}128129__forceinline BinSplit(float sah, unsigned data, int dim = 0, float fpos = 0)130: sah(sah), dim(dim), fpos(fpos), data(data) {}131132/*! constructs specified split */133__forceinline BinSplit(float sah, int dim, int pos, const BinMapping<BINS>& mapping)134: sah(sah), dim(dim), pos(pos), data(0), mapping(mapping) {}135136/*! tests if this split is valid */137__forceinline bool valid() const { return dim != -1; }138139/*! calculates surface area heuristic for performing the split */140__forceinline float splitSAH() const { return sah; }141142/*! stream output */143friend embree_ostream operator<<(embree_ostream cout, const BinSplit& split) {144return cout << "BinSplit { sah = " << split.sah << ", dim = " << split.dim << ", pos = " << split.pos << "}";145}146147public:148float sah; //!< SAH cost of the split149int dim; //!< split dimension150union { int pos; float fpos; }; //!< bin index for splitting151unsigned int data; //!< extra optional split data152BinMapping<BINS> mapping; //!< mapping into bins153};154155/*! stores extended information about the split */156template<typename BBox>157struct SplitInfoT158{159160__forceinline SplitInfoT () {}161162__forceinline SplitInfoT (size_t leftCount, const BBox& leftBounds, size_t rightCount, const BBox& rightBounds)163: leftCount(leftCount), rightCount(rightCount), leftBounds(leftBounds), rightBounds(rightBounds) {}164165public:166size_t leftCount,rightCount;167BBox leftBounds,rightBounds;168};169170typedef SplitInfoT<BBox3fa> SplitInfo;171typedef SplitInfoT<LBBox3fa> SplitInfo2;172173/*! stores all binning information */174template<size_t BINS, typename PrimRef, typename BBox>175struct __aligned(64) BinInfoT176{177typedef BinSplit<BINS> Split;178typedef vbool4 vbool;179typedef vint4 vint;180typedef vfloat4 vfloat;181182__forceinline BinInfoT() {183}184185__forceinline BinInfoT(EmptyTy) {186clear();187}188189/*! bin access function */190__forceinline BBox &bounds(const size_t binID, const size_t dimID) { return _bounds[binID][dimID]; }191__forceinline const BBox &bounds(const size_t binID, const size_t dimID) const { return _bounds[binID][dimID]; }192193__forceinline unsigned int &counts(const size_t binID, const size_t dimID) { return _counts[binID][dimID]; }194__forceinline const unsigned int &counts(const size_t binID, const size_t dimID) const { return _counts[binID][dimID]; }195196__forceinline vuint4 &counts(const size_t binID) { return _counts[binID]; }197__forceinline const vuint4 &counts(const size_t binID) const { return _counts[binID]; }198199/*! clears the bin info */200__forceinline void clear()201{202for (size_t i=0; i<BINS; i++) {203bounds(i,0) = bounds(i,1) = bounds(i,2) = empty;204counts(i) = vuint4(zero);205}206}207208/*! bins an array of primitives */209__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping)210{211if (unlikely(N == 0)) return;212size_t i;213for (i=0; i<N-1; i+=2)214{215/*! map even and odd primitive to bin */216BBox prim0; Vec3fa center0;217prims[i+0].binBoundsAndCenter(prim0,center0);218const vint4 bin0 = (vint4)mapping.bin(center0);219220BBox prim1; Vec3fa center1;221prims[i+1].binBoundsAndCenter(prim1,center1);222const vint4 bin1 = (vint4)mapping.bin(center1);223224/*! increase bounds for bins for even primitive */225const unsigned int b00 = extract<0>(bin0); bounds(b00,0).extend(prim0);226const unsigned int b01 = extract<1>(bin0); bounds(b01,1).extend(prim0);227const unsigned int b02 = extract<2>(bin0); bounds(b02,2).extend(prim0);228const unsigned int s0 = (unsigned int)prims[i+0].size();229counts(b00,0)+=s0;230counts(b01,1)+=s0;231counts(b02,2)+=s0;232233/*! increase bounds of bins for odd primitive */234const unsigned int b10 = extract<0>(bin1); bounds(b10,0).extend(prim1);235const unsigned int b11 = extract<1>(bin1); bounds(b11,1).extend(prim1);236const unsigned int b12 = extract<2>(bin1); bounds(b12,2).extend(prim1);237const unsigned int s1 = (unsigned int)prims[i+1].size();238counts(b10,0)+=s1;239counts(b11,1)+=s1;240counts(b12,2)+=s1;241}242/*! for uneven number of primitives */243if (i < N)244{245/*! map primitive to bin */246BBox prim0; Vec3fa center0;247prims[i].binBoundsAndCenter(prim0,center0);248const vint4 bin0 = (vint4)mapping.bin(center0);249250/*! increase bounds of bins */251const unsigned int s0 = (unsigned int)prims[i].size();252const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);253const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);254const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);255}256}257258/*! bins an array of primitives */259template<typename BinBoundsAndCenter>260__forceinline void bin (const PrimRef* prims, size_t N, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter)261{262if (N == 0) return;263264size_t i;265for (i=0; i<N-1; i+=2)266{267/*! map even and odd primitive to bin */268BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);269const vint4 bin0 = (vint4)mapping.bin(center0);270BBox prim1; Vec3fa center1; binBoundsAndCenter.binBoundsAndCenter(prims[i+1],prim1,center1);271const vint4 bin1 = (vint4)mapping.bin(center1);272273/*! increase bounds for bins for even primitive */274const unsigned int s0 = prims[i+0].size();275const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);276const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);277const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);278279/*! increase bounds of bins for odd primitive */280const unsigned int s1 = prims[i+1].size();281const int b10 = extract<0>(bin1); counts(b10,0)+=s1; bounds(b10,0).extend(prim1);282const int b11 = extract<1>(bin1); counts(b11,1)+=s1; bounds(b11,1).extend(prim1);283const int b12 = extract<2>(bin1); counts(b12,2)+=s1; bounds(b12,2).extend(prim1);284}285286/*! for uneven number of primitives */287if (i < N)288{289/*! map primitive to bin */290BBox prim0; Vec3fa center0; binBoundsAndCenter.binBoundsAndCenter(prims[i+0],prim0,center0);291const vint4 bin0 = (vint4)mapping.bin(center0);292293/*! increase bounds of bins */294const unsigned int s0 = prims[i+0].size();295const int b00 = extract<0>(bin0); counts(b00,0)+=s0; bounds(b00,0).extend(prim0);296const int b01 = extract<1>(bin0); counts(b01,1)+=s0; bounds(b01,1).extend(prim0);297const int b02 = extract<2>(bin0); counts(b02,2)+=s0; bounds(b02,2).extend(prim0);298}299}300301__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping) {302bin(prims+begin,end-begin,mapping);303}304305template<typename BinBoundsAndCenter>306__forceinline void bin(const PrimRef* prims, size_t begin, size_t end, const BinMapping<BINS>& mapping, const BinBoundsAndCenter& binBoundsAndCenter) {307bin<BinBoundsAndCenter>(prims+begin,end-begin,mapping,binBoundsAndCenter);308}309310/*! merges in other binning information */311__forceinline void merge (const BinInfoT& other, size_t numBins)312{313314for (size_t i=0; i<numBins; i++)315{316counts(i) += other.counts(i);317bounds(i,0).extend(other.bounds(i,0));318bounds(i,1).extend(other.bounds(i,1));319bounds(i,2).extend(other.bounds(i,2));320}321}322323/*! reduces binning information */324static __forceinline const BinInfoT reduce (const BinInfoT& a, const BinInfoT& b, const size_t numBins = BINS)325{326BinInfoT c;327for (size_t i=0; i<numBins; i++)328{329c.counts(i) = a.counts(i)+b.counts(i);330c.bounds(i,0) = embree::merge(a.bounds(i,0),b.bounds(i,0));331c.bounds(i,1) = embree::merge(a.bounds(i,1),b.bounds(i,1));332c.bounds(i,2) = embree::merge(a.bounds(i,2),b.bounds(i,2));333}334return c;335}336337/*! finds the best split by scanning binning information */338__forceinline Split best(const BinMapping<BINS>& mapping, const size_t blocks_shift) const339{340/* sweep from right to left and compute parallel prefix of merged bounds */341vfloat4 rAreas[BINS];342vuint4 rCounts[BINS];343vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;344for (size_t i=mapping.size()-1; i>0; i--)345{346count += counts(i);347rCounts[i] = count;348bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);349by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);350bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);351rAreas[i][3] = 0.0f;352}353/* sweep from left to right and compute SAH */354vuint4 blocks_add = (1 << blocks_shift)-1;355vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;356count = 0; bx = empty; by = empty; bz = empty;357for (size_t i=1; i<mapping.size(); i++, ii+=1)358{359count += counts(i-1);360bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);361by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);362bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);363const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);364const vfloat4 rArea = rAreas[i];365const vuint4 lCount = (count +blocks_add) >> (unsigned int)(blocks_shift); // if blocks_shift >=1 then lCount < 4B and could be represented with an vint4, which would allow for faster vfloat4 conversions.366const vuint4 rCount = (rCounts[i]+blocks_add) >> (unsigned int)(blocks_shift);367const vfloat4 sah = madd(lArea,vfloat4(lCount),rArea*vfloat4(rCount));368//const vfloat4 sah = madd(lArea,vfloat4(vint4(lCount)),rArea*vfloat4(vint4(rCount)));369370vbestPos = select(sah < vbestSAH,ii ,vbestPos);371vbestSAH = select(sah < vbestSAH,sah,vbestSAH);372}373374/* find best dimension */375float bestSAH = inf;376int bestDim = -1;377int bestPos = 0;378for (int dim=0; dim<3; dim++)379{380/* ignore zero sized dimensions */381if (unlikely(mapping.invalid(dim)))382continue;383384/* test if this is a better dimension */385if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {386bestDim = dim;387bestPos = vbestPos[dim];388bestSAH = vbestSAH[dim];389}390}391return Split(bestSAH,bestDim,bestPos,mapping);392}393394/*! finds the best split by scanning binning information */395__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const396{397/* sweep from right to left and compute parallel prefix of merged bounds */398vfloat4 rAreas[BINS];399vuint4 rCounts[BINS];400vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;401for (size_t i=mapping.size()-1; i>0; i--)402{403count += counts(i);404rCounts[i] = count;405bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);406by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);407bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);408rAreas[i][3] = 0.0f;409}410/* sweep from left to right and compute SAH */411vuint4 blocks_add = blockSize-1;412vfloat4 blocks_factor = 1.0f/float(blockSize);413vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;414count = 0; bx = empty; by = empty; bz = empty;415for (size_t i=1; i<mapping.size(); i++, ii+=1)416{417count += counts(i-1);418bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);419by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);420bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);421const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);422const vfloat4 rArea = rAreas[i];423const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);424const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);425const vfloat4 sah = madd(lArea,lCount,rArea*rCount);426427vbestPos = select(sah < vbestSAH,ii ,vbestPos);428vbestSAH = select(sah < vbestSAH,sah,vbestSAH);429}430431/* find best dimension */432float bestSAH = inf;433int bestDim = -1;434int bestPos = 0;435for (int dim=0; dim<3; dim++)436{437/* ignore zero sized dimensions */438if (unlikely(mapping.invalid(dim)))439continue;440441/* test if this is a better dimension */442if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {443bestDim = dim;444bestPos = vbestPos[dim];445bestSAH = vbestSAH[dim];446}447}448return Split(bestSAH,bestDim,bestPos,mapping);449}450451/*! calculates extended split information */452__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const453{454if (split.dim == -1) {455new (&info) SplitInfoT<BBox>(0,empty,0,empty);456return;457}458459size_t leftCount = 0;460BBox leftBounds = empty;461for (size_t i=0; i<(size_t)split.pos; i++) {462leftCount += counts(i,split.dim);463leftBounds.extend(bounds(i,split.dim));464}465size_t rightCount = 0;466BBox rightBounds = empty;467for (size_t i=split.pos; i<mapping.size(); i++) {468rightCount += counts(i,split.dim);469rightBounds.extend(bounds(i,split.dim));470}471new (&info) SplitInfoT<BBox>(leftCount,leftBounds,rightCount,rightBounds);472}473474/*! gets the number of primitives left of the split */475__forceinline size_t getLeftCount(const BinMapping<BINS>& mapping, const Split& split) const476{477if (unlikely(split.dim == -1)) return -1;478479size_t leftCount = 0;480for (size_t i = 0; i < (size_t)split.pos; i++) {481leftCount += counts(i, split.dim);482}483return leftCount;484}485486/*! gets the number of primitives right of the split */487__forceinline size_t getRightCount(const BinMapping<BINS>& mapping, const Split& split) const488{489if (unlikely(split.dim == -1)) return -1;490491size_t rightCount = 0;492for (size_t i = (size_t)split.pos; i<mapping.size(); i++) {493rightCount += counts(i, split.dim);494}495return rightCount;496}497498private:499BBox _bounds[BINS][3]; //!< geometry bounds for each bin in each dimension500vuint4 _counts[BINS]; //!< counts number of primitives that map into the bins501};502}503504template<typename BinInfoT, typename BinMapping, typename PrimRef>505__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping)506{507if (likely(end-begin < parallelThreshold)) {508binner.bin(prims,begin,end,mapping);509} else {510binner = parallel_reduce(begin,end,blockSize,binner,511[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },512[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });513}514}515516template<typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>517__forceinline void bin_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, size_t parallelThreshold, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)518{519if (likely(end-begin < parallelThreshold)) {520binner.bin(prims,begin,end,mapping,binBoundsAndCenter);521} else {522binner = parallel_reduce(begin,end,blockSize,binner,523[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },524[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });525}526}527528template<bool parallel, typename BinInfoT, typename BinMapping, typename PrimRef>529__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping)530{531if (!parallel) {532binner.bin(prims,begin,end,mapping);533} else {534binner = parallel_reduce(begin,end,blockSize,binner,535[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping); return binner; },536[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });537}538}539540template<bool parallel, typename BinBoundsAndCenter, typename BinInfoT, typename BinMapping, typename PrimRef>541__forceinline void bin_serial_or_parallel(BinInfoT& binner, const PrimRef* prims, size_t begin, size_t end, size_t blockSize, const BinMapping& mapping, const BinBoundsAndCenter& binBoundsAndCenter)542{543if (!parallel) {544binner.bin(prims,begin,end,mapping,binBoundsAndCenter);545} else {546binner = parallel_reduce(begin,end,blockSize,binner,547[&](const range<size_t>& r) -> BinInfoT { BinInfoT binner(empty); binner.bin(prims + r.begin(), r.size(), mapping, binBoundsAndCenter); return binner; },548[&](const BinInfoT& b0, const BinInfoT& b1) -> BinInfoT { BinInfoT r = b0; r.merge(b1, mapping.size()); return r; });549}550}551}552553554