Path: blob/master/thirdparty/embree/kernels/builders/heuristic_binning_array_unaligned.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "heuristic_binning.h"67namespace embree8{9namespace isa10{11/*! Performs standard object binning */12template<typename PrimRef, size_t BINS>13struct UnalignedHeuristicArrayBinningSAH14{15typedef BinSplit<BINS> Split;16typedef BinInfoT<BINS,PrimRef,BBox3fa> Binner;17typedef range<size_t> Set;1819__forceinline UnalignedHeuristicArrayBinningSAH () // FIXME: required?20: scene(nullptr), prims(nullptr) {}2122/*! remember prim array */23__forceinline UnalignedHeuristicArrayBinningSAH (Scene* scene, PrimRef* prims)24: scene(scene), prims(prims) {}2526const LinearSpace3fa computeAlignedSpace(const range<size_t>& set)27{28Vec3fa axis(0,0,1);29uint64_t bestGeomPrimID = -1;3031/*! find curve with minimum ID that defines valid direction */32for (size_t i=set.begin(); i<set.end(); i++)33{34const unsigned int geomID = prims[i].geomID();35const unsigned int primID = prims[i].primID();36const uint64_t geomprimID = prims[i].ID64();37if (geomprimID >= bestGeomPrimID) continue;38const Vec3fa axis1 = scene->get(geomID)->computeDirection(primID);39if (sqr_length(axis1) > 1E-18f) {40axis = normalize(axis1);41bestGeomPrimID = geomprimID;42}43}44return frame(axis).transposed();45}4647const PrimInfo computePrimInfo(const range<size_t>& set, const LinearSpace3fa& space)48{49auto computeBounds = [&](const range<size_t>& r) -> CentGeomBBox3fa50{51CentGeomBBox3fa bounds(empty);52for (size_t i=r.begin(); i<r.end(); i++) {53Geometry* mesh = scene->get(prims[i].geomID());54bounds.extend(mesh->vbounds(space,prims[i].primID()));55}56return bounds;57};5859const CentGeomBBox3fa bounds = parallel_reduce(set.begin(), set.end(), size_t(1024), size_t(4096),60CentGeomBBox3fa(empty), computeBounds, CentGeomBBox3fa::merge2);6162return PrimInfo(set.begin(),set.end(),bounds);63}6465struct BinBoundsAndCenter66{67__forceinline BinBoundsAndCenter(Scene* scene, const LinearSpace3fa& space)68: scene(scene), space(space) {}6970/*! returns center for binning */71__forceinline Vec3fa binCenter(const PrimRef& ref) const72{73Geometry* mesh = (Geometry*) scene->get(ref.geomID());74BBox3fa bounds = mesh->vbounds(space,ref.primID());75return embree::center2(bounds);76}7778/*! returns bounds and centroid used for binning */79__forceinline void binBoundsAndCenter(const PrimRef& ref, BBox3fa& bounds_o, Vec3fa& center_o) const80{81Geometry* mesh = (Geometry*) scene->get(ref.geomID());82BBox3fa bounds = mesh->vbounds(space,ref.primID());83bounds_o = bounds;84center_o = embree::center2(bounds);85}8687private:88Scene* scene;89const LinearSpace3fa space;90};9192/*! finds the best split */93__forceinline const Split find(const PrimInfoRange& pinfo, const size_t logBlockSize, const LinearSpace3fa& space)94{95if (likely(pinfo.size() < 10000))96return find_template<false>(pinfo,logBlockSize,space);97else98return find_template<true>(pinfo,logBlockSize,space);99}100101/*! finds the best split */102template<bool parallel>103const Split find_template(const PrimInfoRange& set, const size_t logBlockSize, const LinearSpace3fa& space)104{105Binner binner(empty);106const BinMapping<BINS> mapping(set);107BinBoundsAndCenter binBoundsAndCenter(scene,space);108bin_serial_or_parallel<parallel>(binner,prims,set.begin(),set.end(),size_t(4096),mapping,binBoundsAndCenter);109return binner.best(mapping,logBlockSize);110}111112/*! array partitioning */113__forceinline void split(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)114{115if (likely(set.size() < 10000))116split_template<false>(split,space,set,lset,rset);117else118split_template<true>(split,space,set,lset,rset);119}120121/*! array partitioning */122template<bool parallel>123__forceinline void split_template(const Split& split, const LinearSpace3fa& space, const Set& set, PrimInfoRange& lset, PrimInfoRange& rset)124{125if (!split.valid()) {126deterministic_order(set);127return splitFallback(set,lset,rset);128}129130const size_t begin = set.begin();131const size_t end = set.end();132CentGeomBBox3fa local_left(empty);133CentGeomBBox3fa local_right(empty);134const int splitPos = split.pos;135const int splitDim = split.dim;136BinBoundsAndCenter binBoundsAndCenter(scene,space);137138size_t center = 0;139if (likely(set.size() < 10000))140center = serial_partitioning(prims,begin,end,local_left,local_right,141[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },142[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); });143else144center = parallel_partitioning(prims,begin,end,EmptyTy(),local_left,local_right,145[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,binBoundsAndCenter)[splitDim] < splitPos; },146[] (CentGeomBBox3fa& pinfo,const PrimRef& ref) { pinfo.extend_center2(ref); },147[] (CentGeomBBox3fa& pinfo0,const CentGeomBBox3fa& pinfo1) { pinfo0.merge(pinfo1); },148128);149150new (&lset) PrimInfoRange(begin,center,local_left);151new (&rset) PrimInfoRange(center,end,local_right);152assert(area(lset.geomBounds) >= 0.0f);153assert(area(rset.geomBounds) >= 0.0f);154}155156void deterministic_order(const range<size_t>& set)157{158/* required as parallel partition destroys original primitive order */159std::sort(&prims[set.begin()],&prims[set.end()]);160}161162void splitFallback(const range<size_t>& set, PrimInfoRange& lset, PrimInfoRange& rset)163{164const size_t begin = set.begin();165const size_t end = set.end();166const size_t center = (begin + end)/2;167168CentGeomBBox3fa left(empty);169for (size_t i=begin; i<center; i++)170left.extend_center2(prims[i]);171new (&lset) PrimInfoRange(begin,center,left);172173CentGeomBBox3fa right(empty);174for (size_t i=center; i<end; i++)175right.extend_center2(prims[i]);176new (&rset) PrimInfoRange(center,end,right);177}178179private:180Scene* const scene;181PrimRef* const prims;182};183184/*! Performs standard object binning */185template<typename PrimRefMB, size_t BINS>186struct UnalignedHeuristicArrayBinningMB187{188typedef BinSplit<BINS> Split;189typedef typename PrimRefMB::BBox BBox;190typedef BinInfoT<BINS,PrimRefMB,BBox> ObjectBinner;191192static const size_t PARALLEL_THRESHOLD = 3 * 1024;193static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;194static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;195196UnalignedHeuristicArrayBinningMB(Scene* scene)197: scene(scene) {}198199const LinearSpace3fa computeAlignedSpaceMB(Scene* scene, const SetMB& set)200{201Vec3fa axis0(0,0,1);202uint64_t bestGeomPrimID = -1;203204/*! find curve with minimum ID that defines valid direction */205for (size_t i=set.begin(); i<set.end(); i++)206{207const PrimRefMB& prim = (*set.prims)[i];208const unsigned int geomID = prim.geomID();209const unsigned int primID = prim.primID();210const uint64_t geomprimID = prim.ID64();211if (geomprimID >= bestGeomPrimID) continue;212213const Geometry* mesh = scene->get(geomID);214const range<int> tbounds = mesh->timeSegmentRange(set.time_range);215if (tbounds.size() == 0) continue;216217const size_t t = (tbounds.begin()+tbounds.end())/2;218const Vec3fa axis1 = mesh->computeDirection(primID,t);219if (sqr_length(axis1) > 1E-18f) {220axis0 = normalize(axis1);221bestGeomPrimID = geomprimID;222}223}224225return frame(axis0).transposed();226}227228struct BinBoundsAndCenter229{230__forceinline BinBoundsAndCenter(Scene* scene, BBox1f time_range, const LinearSpace3fa& space)231: scene(scene), time_range(time_range), space(space) {}232233/*! returns center for binning */234template<typename PrimRef>235__forceinline Vec3fa binCenter(const PrimRef& ref) const236{237Geometry* mesh = scene->get(ref.geomID());238LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);239return center2(lbounds.interpolate(0.5f));240}241242/*! returns bounds and centroid used for binning */243__noinline void binBoundsAndCenter (const PrimRefMB& ref, BBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX244{245Geometry* mesh = scene->get(ref.geomID());246LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);247bounds_o = lbounds.interpolate(0.5f);248center_o = center2(bounds_o);249}250251/*! returns bounds and centroid used for binning */252__noinline void binBoundsAndCenter (const PrimRefMB& ref, LBBox3fa& bounds_o, Vec3fa& center_o) const // __noinline is workaround for ICC16 bug under MacOSX253{254Geometry* mesh = scene->get(ref.geomID());255LBBox3fa lbounds = mesh->vlinearBounds(space,ref.primID(),time_range);256bounds_o = lbounds;257center_o = center2(lbounds.interpolate(0.5f));258}259260private:261Scene* scene;262BBox1f time_range;263const LinearSpace3fa space;264};265266/*! finds the best split */267const Split find(const SetMB& set, const size_t logBlockSize, const LinearSpace3fa& space)268{269BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);270ObjectBinner binner(empty);271const BinMapping<BINS> mapping(set.size(),set.centBounds);272bin_parallel(binner,set.prims->data(),set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,mapping,binBoundsAndCenter);273Split osplit = binner.best(mapping,logBlockSize);274osplit.sah *= set.time_range.size();275if (!osplit.valid()) osplit.data = Split::SPLIT_FALLBACK; // use fallback split276return osplit;277}278279/*! array partitioning */280__forceinline void split(const Split& split, const LinearSpace3fa& space, const SetMB& set, SetMB& lset, SetMB& rset)281{282BinBoundsAndCenter binBoundsAndCenter(scene,set.time_range,space);283const size_t begin = set.begin();284const size_t end = set.end();285PrimInfoMB left = empty;286PrimInfoMB right = empty;287const vint4 vSplitPos(split.pos);288const vbool4 vSplitMask(1 << split.dim);289auto isLeft = [&] (const PrimRefMB &ref) { return any(((vint4)split.mapping.bin_unsafe(ref,binBoundsAndCenter) < vSplitPos) & vSplitMask); };290auto reduction = [] (PrimInfoMB& pinfo, const PrimRefMB& ref) { pinfo.add_primref(ref); };291auto reduction2 = [] (PrimInfoMB& pinfo0,const PrimInfoMB& pinfo1) { pinfo0.merge(pinfo1); };292size_t center = parallel_partitioning(set.prims->data(),begin,end,EmptyTy(),left,right,isLeft,reduction,reduction2,PARALLEL_PARTITION_BLOCK_SIZE,PARALLEL_THRESHOLD);293new (&lset) SetMB(left,set.prims,range<size_t>(begin,center),set.time_range);294new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);295}296297private:298Scene* scene;299};300}301}302303304