Path: blob/master/thirdparty/embree/kernels/builders/heuristic_spatial_array.h
9913 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "heuristic_binning.h"6#include "heuristic_spatial.h"78namespace embree9{10namespace isa11{12#if 013#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.2f14#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.95f15#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.0f16#else17#define SPATIAL_ASPLIT_OVERLAP_THRESHOLD 0.1f18#define SPATIAL_ASPLIT_SAH_THRESHOLD 0.99f19#define SPATIAL_ASPLIT_AREA_THRESHOLD 0.000005f20#endif2122struct PrimInfoExtRange : public CentGeomBBox3fa, public extended_range<size_t>23{24__forceinline PrimInfoExtRange() {25}2627__forceinline PrimInfoExtRange(EmptyTy)28: CentGeomBBox3fa(EmptyTy()), extended_range<size_t>(0,0,0) {}2930__forceinline PrimInfoExtRange(size_t begin, size_t end, size_t ext_end, const CentGeomBBox3fa& centGeomBounds)31: CentGeomBBox3fa(centGeomBounds), extended_range<size_t>(begin,end,ext_end) {}3233__forceinline float leafSAH() const {34return expectedApproxHalfArea(geomBounds)*float(size());35}3637__forceinline float leafSAH(size_t block_shift) const {38return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);39}40};4142template<typename ObjectSplit, typename SpatialSplit>43struct Split244{45__forceinline Split2 () {}4647__forceinline Split2 (const Split2& other)48{49spatial = other.spatial;50sah = other.sah;51if (spatial) spatialSplit() = other.spatialSplit();52else objectSplit() = other.objectSplit();53}5455__forceinline Split2& operator= (const Split2& other)56{57spatial = other.spatial;58sah = other.sah;59if (spatial) spatialSplit() = other.spatialSplit();60else objectSplit() = other.objectSplit();61return *this;62}6364__forceinline ObjectSplit& objectSplit() { return *( ObjectSplit*)data; }65__forceinline const ObjectSplit& objectSplit() const { return *(const ObjectSplit*)data; }6667__forceinline SpatialSplit& spatialSplit() { return *( SpatialSplit*)data; }68__forceinline const SpatialSplit& spatialSplit() const { return *(const SpatialSplit*)data; }6970__forceinline Split2 (const ObjectSplit& objectSplit, float sah)71: spatial(false), sah(sah)72{73new (data) ObjectSplit(objectSplit);74}7576__forceinline Split2 (const SpatialSplit& spatialSplit, float sah)77: spatial(true), sah(sah)78{79new (data) SpatialSplit(spatialSplit);80}8182__forceinline float splitSAH() const {83return sah;84}8586__forceinline bool valid() const {87return sah < float(inf);88}8990public:91__aligned(64) char data[sizeof(ObjectSplit) > sizeof(SpatialSplit) ? sizeof(ObjectSplit) : sizeof(SpatialSplit)];92bool spatial;93float sah;94};9596/*! Performs standard object binning */97template<typename PrimitiveSplitterFactory, typename PrimRef, size_t OBJECT_BINS, size_t SPATIAL_BINS>98struct HeuristicArraySpatialSAH99{100typedef BinSplit<OBJECT_BINS> ObjectSplit;101typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> ObjectBinner;102103typedef SpatialBinSplit<SPATIAL_BINS> SpatialSplit;104typedef SpatialBinInfo<SPATIAL_BINS,PrimRef> SpatialBinner;105106//typedef extended_range<size_t> Set;107typedef Split2<ObjectSplit,SpatialSplit> Split;108109static const size_t PARALLEL_THRESHOLD = 3*1024;110static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;111static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;112113static const size_t MOVE_STEP_SIZE = 64;114static const size_t CREATE_SPLITS_STEP_SIZE = 64;115116__forceinline HeuristicArraySpatialSAH ()117: prims0(nullptr) {}118119/*! remember prim array */120__forceinline HeuristicArraySpatialSAH (const PrimitiveSplitterFactory& splitterFactory, PrimRef* prims0, const CentGeomBBox3fa& root_info)121: prims0(prims0), splitterFactory(splitterFactory), root_info(root_info) {}122123124/*! compute extended ranges */125__noinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)126{127assert(set.ext_range_size() > 0);128const float left_factor = (float)lweight / (lweight + rweight);129const size_t ext_range_size = set.ext_range_size();130const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);131const size_t right_ext_range_size = ext_range_size - left_ext_range_size;132lset.set_ext_range(lset.end() + left_ext_range_size);133rset.set_ext_range(rset.end() + right_ext_range_size);134}135136/*! move ranges */137__noinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)138{139const size_t left_ext_range_size = lset.ext_range_size();140const size_t right_size = rset.size();141142/* has the left child an extended range? */143if (left_ext_range_size > 0)144{145/* left extended range smaller than right range ? */146if (left_ext_range_size < right_size)147{148/* only move a small part of the beginning of the right range to the end */149parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {150for (size_t i=r.begin(); i<r.end(); i++)151prims0[i+right_size] = prims0[i];152});153}154else155{156/* no overlap, move entire right range to new location, can be made fully parallel */157parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {158for (size_t i=r.begin(); i<r.end(); i++)159prims0[i+left_ext_range_size] = prims0[i];160});161}162/* update right range */163assert(rset.ext_end() + left_ext_range_size == set.ext_end());164rset.move_right(left_ext_range_size);165}166}167168/*! finds the best split */169const Split find(const PrimInfoExtRange& set, const size_t logBlockSize)170{171SplitInfo oinfo;172const ObjectSplit object_split = object_find(set,logBlockSize,oinfo);173const float object_split_sah = object_split.splitSAH();174175if (unlikely(set.has_ext_range()))176{177const BBox3fa overlap = intersect(oinfo.leftBounds, oinfo.rightBounds);178179/* do only spatial splits if the child bounds overlap */180if (safeArea(overlap) >= SPATIAL_ASPLIT_AREA_THRESHOLD*safeArea(root_info.geomBounds) &&181safeArea(overlap) >= SPATIAL_ASPLIT_OVERLAP_THRESHOLD*safeArea(set.geomBounds))182{183const SpatialSplit spatial_split = spatial_find(set, logBlockSize);184const float spatial_split_sah = spatial_split.splitSAH();185186/* valid spatial split, better SAH and number of splits do not exceed extended range */187if (spatial_split_sah < SPATIAL_ASPLIT_SAH_THRESHOLD*object_split_sah &&188spatial_split.left + spatial_split.right - set.size() <= set.ext_range_size())189{190return Split(spatial_split,spatial_split_sah);191}192}193}194195return Split(object_split,object_split_sah);196}197198/*! finds the best object split */199__forceinline const ObjectSplit object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)200{201if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize,info);202else return parallel_object_find (set,logBlockSize,info);203}204205/*! finds the best object split */206__noinline const ObjectSplit sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)207{208ObjectBinner binner(empty);209const BinMapping<OBJECT_BINS> mapping(set);210binner.bin(prims0,set.begin(),set.end(),mapping);211ObjectSplit s = binner.best(mapping,logBlockSize);212binner.getSplitInfo(mapping, s, info);213return s;214}215216/*! finds the best split */217__noinline const ObjectSplit parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize, SplitInfo &info)218{219ObjectBinner binner(empty);220const BinMapping<OBJECT_BINS> mapping(set);221const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround222binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,223[&] (const range<size_t>& r) -> ObjectBinner { ObjectBinner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner; },224[&] (const ObjectBinner& b0, const ObjectBinner& b1) -> ObjectBinner { ObjectBinner r = b0; r.merge(b1,_mapping.size()); return r; });225ObjectSplit s = binner.best(mapping,logBlockSize);226binner.getSplitInfo(mapping, s, info);227return s;228}229230/*! finds the best spatial split */231__forceinline const SpatialSplit spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)232{233if (set.size() < PARALLEL_THRESHOLD) return sequential_spatial_find(set, logBlockSize);234else return parallel_spatial_find (set, logBlockSize);235}236237/*! finds the best spatial split */238__noinline const SpatialSplit sequential_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)239{240SpatialBinner binner(empty);241const SpatialBinMapping<SPATIAL_BINS> mapping(set);242binner.bin2(splitterFactory,prims0,set.begin(),set.end(),mapping);243/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/244return binner.best(mapping,logBlockSize); //,set.ext_size());245}246247__noinline const SpatialSplit parallel_spatial_find(const PrimInfoExtRange& set, const size_t logBlockSize)248{249SpatialBinner binner(empty);250const SpatialBinMapping<SPATIAL_BINS> mapping(set);251const SpatialBinMapping<SPATIAL_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround252binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,253[&] (const range<size_t>& r) -> SpatialBinner {254SpatialBinner binner(empty);255binner.bin2(splitterFactory,prims0,r.begin(),r.end(),_mapping);256return binner; },257[&] (const SpatialBinner& b0, const SpatialBinner& b1) -> SpatialBinner { return SpatialBinner::reduce(b0,b1); });258/* todo: best spatial split not exceeding the extended range does not provide any benefit ?*/259return binner.best(mapping,logBlockSize); //,set.ext_size());260}261262263/*! subdivides primitives based on a spatial split */264__noinline void create_spatial_splits(PrimInfoExtRange& set, const SpatialSplit& split, const SpatialBinMapping<SPATIAL_BINS> &mapping)265{266assert(set.has_ext_range());267const size_t max_ext_range_size = set.ext_range_size();268const size_t ext_range_start = set.end();269270/* atomic counter for number of primref splits */271std::atomic<size_t> ext_elements;272ext_elements.store(0);273274const float fpos = split.mapping.pos(split.pos,split.dim);275276const unsigned int mask = 0xFFFFFFFF >> RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS;277278parallel_for( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, [&](const range<size_t>& r) {279for (size_t i=r.begin();i<r.end();i++)280{281const unsigned int splits = prims0[i].geomID() >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS);282283if (likely(splits <= 1)) continue; /* todo: does this ever happen ? */284285const int bin0 = split.mapping.bin(prims0[i].lower)[split.dim];286const int bin1 = split.mapping.bin(prims0[i].upper)[split.dim];287if (unlikely(bin0 < split.pos && bin1 >= split.pos))288{289assert(splits > 1);290291PrimRef left,right;292const auto splitter = splitterFactory(prims0[i]);293splitter(prims0[i],split.dim,fpos,left,right);294295// no empty splits296if (unlikely(left.bounds().empty() || right.bounds().empty())) continue;297298left.lower.u = (left.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));299right.lower.u = (right.lower.u & mask) | ((splits-1) << (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));300301const size_t ID = ext_elements.fetch_add(1);302303/* break if the number of subdivided elements are greater than the maximum allowed size */304if (unlikely(ID >= max_ext_range_size))305break;306307/* only write within the correct bounds */308assert(ID < max_ext_range_size);309prims0[i] = left;310prims0[ext_range_start+ID] = right;311}312}313});314315const size_t numExtElements = min(max_ext_range_size,ext_elements.load());316assert(set.end()+numExtElements<=set.ext_end());317set._end += numExtElements;318}319320/*! array partitioning */321void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)322{323PrimInfoExtRange set = set_i;324325/* valid split */326if (unlikely(!split.valid())) {327deterministic_order(set);328return splitFallback(set,lset,rset);329}330331std::pair<size_t,size_t> ext_weights(0,0);332333if (unlikely(split.spatial))334{335create_spatial_splits(set,split.spatialSplit(), split.spatialSplit().mapping);336337/* spatial split */338if (likely(set.size() < PARALLEL_THRESHOLD))339ext_weights = sequential_spatial_split(split.spatialSplit(),set,lset,rset);340else341ext_weights = parallel_spatial_split(split.spatialSplit(),set,lset,rset);342}343else344{345/* object split */346if (likely(set.size() < PARALLEL_THRESHOLD))347ext_weights = sequential_object_split(split.objectSplit(),set,lset,rset);348else349ext_weights = parallel_object_split(split.objectSplit(),set,lset,rset);350}351352/* if we have an extended range, set extended child ranges and move right split range */353if (unlikely(set.has_ext_range()))354{355setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);356moveExtentedRange(set,lset,rset);357}358}359360/*! array partitioning */361std::pair<size_t,size_t> sequential_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)362{363const size_t begin = set.begin();364const size_t end = set.end();365PrimInfo local_left(empty);366PrimInfo local_right(empty);367const unsigned int splitPos = split.pos;368const unsigned int splitDim = split.dim;369const unsigned int splitDimMask = (unsigned int)1 << splitDim;370371const typename ObjectBinner::vint vSplitPos(splitPos);372const typename ObjectBinner::vbool vSplitMask(splitDimMask);373size_t center = serial_partitioning(prims0,374begin,end,local_left,local_right,375[&] (const PrimRef& ref) {376return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask);377},378[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });379const size_t left_weight = local_left.end;380const size_t right_weight = local_right.end;381382new (&lset) PrimInfoExtRange(begin,center,center,local_left);383new (&rset) PrimInfoExtRange(center,end,end,local_right);384385assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);386assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);387return std::pair<size_t,size_t>(left_weight,right_weight);388}389390391/*! array partitioning */392__noinline std::pair<size_t,size_t> sequential_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)393{394const size_t begin = set.begin();395const size_t end = set.end();396PrimInfo local_left(empty);397PrimInfo local_right(empty);398const unsigned int splitPos = split.pos;399const unsigned int splitDim = split.dim;400const unsigned int splitDimMask = (unsigned int)1 << splitDim;401402/* init spatial mapping */403const SpatialBinMapping<SPATIAL_BINS> &mapping = split.mapping;404const vint4 vSplitPos(splitPos);405const vbool4 vSplitMask( (int)splitDimMask );406407size_t center = serial_partitioning(prims0,408begin,end,local_left,local_right,409[&] (const PrimRef& ref) {410const Vec3fa c = ref.bounds().center();411return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask);412},413[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); });414415const size_t left_weight = local_left.end;416const size_t right_weight = local_right.end;417418new (&lset) PrimInfoExtRange(begin,center,center,local_left);419new (&rset) PrimInfoExtRange(center,end,end,local_right);420assert(!lset.geomBounds.empty() && area(lset.geomBounds) >= 0.0f);421assert(!rset.geomBounds.empty() && area(rset.geomBounds) >= 0.0f);422return std::pair<size_t,size_t>(left_weight,right_weight);423}424425426427/*! array partitioning */428__noinline std::pair<size_t,size_t> parallel_object_split(const ObjectSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)429{430const size_t begin = set.begin();431const size_t end = set.end();432PrimInfo left(empty);433PrimInfo right(empty);434const unsigned int splitPos = split.pos;435const unsigned int splitDim = split.dim;436const unsigned int splitDimMask = (unsigned int)1 << splitDim;437438const typename ObjectBinner::vint vSplitPos(splitPos);439const typename ObjectBinner::vbool vSplitMask(splitDimMask);440auto isLeft = [&] (const PrimRef &ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };441442const size_t center = parallel_partitioning(443prims0,begin,end,EmptyTy(),left,right,isLeft,444[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },445[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },446PARALLEL_PARTITION_BLOCK_SIZE);447448const size_t left_weight = left.end;449const size_t right_weight = right.end;450451left.begin = begin; left.end = center;452right.begin = center; right.end = end;453454new (&lset) PrimInfoExtRange(begin,center,center,left);455new (&rset) PrimInfoExtRange(center,end,end,right);456457assert(area(left.geomBounds) >= 0.0f);458assert(area(right.geomBounds) >= 0.0f);459return std::pair<size_t,size_t>(left_weight,right_weight);460}461462/*! array partitioning */463__noinline std::pair<size_t,size_t> parallel_spatial_split(const SpatialSplit& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)464{465const size_t begin = set.begin();466const size_t end = set.end();467PrimInfo left(empty);468PrimInfo right(empty);469const unsigned int splitPos = split.pos;470const unsigned int splitDim = split.dim;471const unsigned int splitDimMask = (unsigned int)1 << splitDim;472473/* init spatial mapping */474const SpatialBinMapping<SPATIAL_BINS>& mapping = split.mapping;475const vint4 vSplitPos(splitPos);476const vbool4 vSplitMask( (int)splitDimMask );477478auto isLeft = [&] (const PrimRef &ref) {479const Vec3fa c = ref.bounds().center();480return any(((vint4)mapping.bin(c) < vSplitPos) & vSplitMask); };481482const size_t center = parallel_partitioning(483prims0,begin,end,EmptyTy(),left,right,isLeft,484[] (PrimInfo &pinfo,const PrimRef &ref) { pinfo.add_center2(ref,ref.lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS)); },485[] (PrimInfo &pinfo0,const PrimInfo &pinfo1) { pinfo0.merge(pinfo1); },486PARALLEL_PARTITION_BLOCK_SIZE);487488const size_t left_weight = left.end;489const size_t right_weight = right.end;490491left.begin = begin; left.end = center;492right.begin = center; right.end = end;493494new (&lset) PrimInfoExtRange(begin,center,center,left);495new (&rset) PrimInfoExtRange(center,end,end,right);496497assert(area(left.geomBounds) >= 0.0f);498assert(area(right.geomBounds) >= 0.0f);499return std::pair<size_t,size_t>(left_weight,right_weight);500}501502void deterministic_order(const PrimInfoExtRange& set)503{504/* required as parallel partition destroys original primitive order */505std::sort(&prims0[set.begin()],&prims0[set.end()]);506}507508void splitFallback(const PrimInfoExtRange& set,509PrimInfoExtRange& lset,510PrimInfoExtRange& rset)511{512const size_t begin = set.begin();513const size_t end = set.end();514const size_t center = (begin + end)/2;515516PrimInfo left(empty);517for (size_t i=begin; i<center; i++) {518left.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));519}520const size_t lweight = left.end;521522PrimInfo right(empty);523for (size_t i=center; i<end; i++) {524right.add_center2(prims0[i],prims0[i].lower.u >> (32-RESERVED_NUM_SPATIAL_SPLITS_GEOMID_BITS));525}526const size_t rweight = right.end;527528new (&lset) PrimInfoExtRange(begin,center,center,left);529new (&rset) PrimInfoExtRange(center,end,end,right);530531/* if we have an extended range */532if (set.has_ext_range()) {533setExtentedRanges(set,lset,rset,lweight,rweight);534moveExtentedRange(set,lset,rset);535}536}537538private:539PrimRef* const prims0;540const PrimitiveSplitterFactory& splitterFactory;541const CentGeomBBox3fa& root_info;542};543}544}545546547