Path: blob/master/thirdparty/embree/kernels/builders/primrefgen_presplit.h
9906 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "../../common/algorithms/parallel_reduce.h"6#include "../../common/algorithms/parallel_sort.h"7#include "../builders/heuristic_spatial.h"8#include "../builders/splitter.h"910#include "../../common/algorithms/parallel_partition.h"11#include "../../common/algorithms/parallel_for_for.h"12#include "../../common/algorithms/parallel_for_for_prefix_sum.h"1314#define DBG_PRESPLIT(x)15#define CHECK_PRESPLIT(x)1617#define GRID_SIZE 102418//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 619#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 520#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)21//#define PRIORITY_CUTOFF_THRESHOLD 2.0f22#define PRIORITY_SPLIT_POS_WEIGHT 1.5f2324namespace embree25{26namespace isa27{28struct SplittingGrid29{30__forceinline SplittingGrid(const BBox3fa& bounds)31{32base = bounds.lower;33const Vec3fa diag = bounds.size();34extend = max(diag.x,max(diag.y,diag.z));35scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;36}3738__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const39{40/* compute morton code */41const Vec3fa lower = prim.lower;42const Vec3fa upper = prim.upper;43const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);44const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);45Vec3ia ilower(floor(glower));46Vec3ia iupper(floor(gupper));4748/* this ignores dimensions that are empty */49iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));5051/* compute a morton code for the lower and upper grid coordinates. */52const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);53const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);5455/* if all bits are equal then we cannot split */56if (unlikely(lower_code == upper_code))57return false;5859/* compute octree level and dimension to perform the split in */60const unsigned int diff = 31 - lzcnt(lower_code^upper_code);61const unsigned int level = diff / 3;62const unsigned int dim = diff % 3;6364/* now we compute the grid position of the split */65const unsigned int isplit = iupper[dim] & ~((1<<level)-1);6667/* compute world space position of split */68const float inv_grid_size = 1.0f / GRID_SIZE;69const float fsplit = base[dim] + isplit * inv_grid_size * extend;70assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);7172dim_o = dim;73fsplit_o = fsplit;74return true;75}7677__forceinline Vec2i computeMC(const PrimRef& ref) const78{79const Vec3fa lower = ref.lower;80const Vec3fa upper = ref.upper;81const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);82const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);83Vec3ia ilower(floor(glower));84Vec3ia iupper(floor(gupper));8586/* this ignores dimensions that are empty */87iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));8889/* compute a morton code for the lower and upper grid coordinates. */90const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);91const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);92return Vec2i(lower_code,upper_code);93}9495Vec3fa base;96float scale;97float extend;98};99100struct PresplitItem101{102union {103float priority;104unsigned int data;105};106unsigned int index;107108__forceinline operator unsigned() const {109return data;110}111112template<typename ProjectedPrimitiveAreaFunc>113__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)114{115const float area_aabb = area(ref.bounds());116const float area_prim = primitiveArea(ref);117if (area_prim == 0.0f) return 0.0f;118const unsigned int diff = 31 - lzcnt(mc.x^mc.y);119//assert(area_prim <= area_aabb); // may trigger due to numerical issues120const float area_diff = max(0.0f, area_aabb - area_prim);121//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);122const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));123//const float priority = sqrtf(sqrtf( area_diff ) );124//const float priority = sqrtfarea_diff;125//const float priority = area_diff; // 104 fps !!!!!!!!!!126//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps127//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);128//const float priority = area_prim;129assert(priority >= 0.0f && priority < FLT_LARGE);130return priority;131}132133};134135inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {136return cout << "index " << item.index << " priority " << item.priority;137};138139#if 1140141template<typename Splitter>142void splitPrimitive(const Splitter& splitter,143const PrimRef& prim,144const unsigned int splitprims,145const SplittingGrid& grid,146PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],147unsigned int& numSubPrims)148{149assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);150151if (splitprims == 1)152{153assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);154subPrims[numSubPrims++] = prim;155}156else157{158unsigned int dim; float fsplit;159if (!grid.split_pos(prim, dim, fsplit))160{161assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);162subPrims[numSubPrims++] = prim;163return;164}165166/* split primitive */167PrimRef left,right;168splitter(prim,dim,fsplit,left,right);169assert(!left.bounds().empty());170assert(!right.bounds().empty());171172const unsigned int splitprims_left = splitprims/2;173const unsigned int splitprims_right = splitprims - splitprims_left;174splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);175splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);176}177}178179#else180181template<typename Splitter>182void splitPrimitive(const Splitter& splitter,183const PrimRef& prim,184const unsigned int targetSubPrims,185const SplittingGrid& grid,186PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],187unsigned int& numSubPrims)188{189assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);190191auto compare = [] ( const PrimRef& a, const PrimRef& b ) {192return area(a.bounds()) < area(b.bounds());193};194195subPrims[numSubPrims++] = prim;196197while (numSubPrims < targetSubPrims)198{199/* get top heap element */200std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);201PrimRef top = subPrims[--numSubPrims];202203unsigned int dim; float fsplit;204if (!grid.split_pos(top, dim, fsplit))205{206assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);207subPrims[numSubPrims++] = top;208return;209}210211/* split primitive */212PrimRef left,right;213splitter(top,dim,fsplit,left,right);214assert(!left.bounds().empty());215assert(!right.bounds().empty());216217subPrims[numSubPrims++] = left;218std::push_heap(subPrims+0, subPrims+numSubPrims, compare);219220subPrims[numSubPrims++] = right;221std::push_heap(subPrims+0, subPrims+numSubPrims, compare);222}223}224225#endif226227#if !defined(RTHWIF_STANDALONE)228229template<typename Mesh, typename SplitterFactory>230PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)231{232ParallelPrefixSumState<PrimInfo> pstate;233234/* first try */235progressMonitor(0);236PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {237return geometry->createPrimRefArray(prims,r,r.begin(),geomID);238}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });239240/* if we need to filter out geometry, run again */241if (pinfo.size() != numPrimRefs)242{243progressMonitor(0);244pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {245return geometry->createPrimRefArray(prims,r,base.size(),geomID);246}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });247}248return pinfo;249}250#endif251252template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>253PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,254PrimVector& prims,255const PrimInfo& pinfo,256const SplitPrimitiveFunc& splitPrimitive,257const ProjectedPrimitiveAreaFunc& primitiveArea)258{259static const size_t MIN_STEP_SIZE = 128;260261/* use correct number of primitives */262size_t numPrimitives = pinfo.size();263const size_t numPrimitivesExt = prims.size();264const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;265266/* allocate double buffer presplit items */267avector<PresplitItem> preSplitItem0(numPrimitivesExt);268avector<PresplitItem> preSplitItem1(numPrimitivesExt);269270/* compute grid */271SplittingGrid grid(pinfo.geomBounds);272273/* init presplit items and get total sum */274const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {275float sum = 0.0f;276for (size_t i=r.begin(); i<r.end(); i++)277{278preSplitItem0[i].index = (unsigned int)i;279const Vec2i mc = grid.computeMC(prims[i]);280/* if all bits are equal then we cannot split */281preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;282/* FIXME: sum undeterministic */283sum += preSplitItem0[i].priority;284}285return sum;286},[](const float& a, const float& b) -> float { return a+b; });287288/* compute number of splits per primitive */289const float inv_psum = 1.0f / psum;290parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {291for (size_t i=r.begin(); i<r.end(); i++)292{293if (preSplitItem0[i].priority <= 0.0f) {294preSplitItem0[i].data = 1;295continue;296}297298const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;299if (rel_p < 1) {300preSplitItem0[i].data = 1;301continue;302}303304//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);305preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);306preSplitItem0[i].data = 1 << preSplitItem0[i].data;307assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);308}309});310311auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };312size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);313assert(center <= numPrimitives);314315/* anything to split ? */316if (center >= numPrimitives)317return pinfo;318319size_t numPrimitivesToSplit = numPrimitives - center;320assert(preSplitItem0[center].data >= 1.0f);321322/* sort presplit items in ascending order */323radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);324325CHECK_PRESPLIT(326parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {327for (size_t i=r.begin(); i<r.end(); i++)328assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);329});330);331332unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();333unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;334335/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */336const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {337size_t sum = 0;338for (size_t i=t.begin(); i<t.end(); i++)339{340const unsigned int primrefID = preSplitItem0[i].index;341const unsigned int splitprims = preSplitItem0[i].data;342assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);343344unsigned int numSubPrims = 0;345PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];346splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);347assert(numSubPrims);348349numSubPrims--; // can reuse slot350sum+=numSubPrims;351preSplitItem0[i].data = (numSubPrims << 16) | splitprims;352353primOffset0[i-center] = numSubPrims;354}355return sum;356},[](const size_t& a, const size_t& b) -> size_t { return a+b; });357358/* if we are over budget, need to shrink the range */359if (totalNumSubPrims > numSplitPrimitivesBudget)360{361size_t new_center = numPrimitives-1;362size_t sum = 0;363for (;new_center>=center;new_center--)364{365const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;366if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;367sum += numSubPrims;368}369new_center++;370371primOffset0 += new_center - center;372numPrimitivesToSplit -= new_center - center;373center = new_center;374assert(numPrimitivesToSplit == (numPrimitives - center));375}376377/* parallel prefix sum to compute offsets for storing sub-primitives */378const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());379assert(numPrimitives+offset <= numPrimitivesExt);380381/* iterate over range, and split primitives into sub primitives and append them to prims array */382parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {383for (size_t j=rn.begin(); j<rn.end(); j++)384{385const unsigned int primrefID = preSplitItem0[j].index;386const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;387assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);388389unsigned int numSubPrims = 0;390PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];391splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);392393const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;394assert(numSubPrims-1 == numSubPrimsExpected);395396const size_t newID = numPrimitives + primOffset1[j-center];397assert(newID+numSubPrims-1 <= numPrimitivesExt);398399prims[primrefID] = subPrims[0];400for (size_t i=1;i<numSubPrims;i++)401prims[newID+i-1] = subPrims[i];402}403});404405numPrimitives += offset;406407/* recompute centroid bounding boxes */408const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {409PrimInfo p(empty);410for (size_t j=r.begin(); j<r.end(); j++)411p.add_center2(prims[j]);412return p;413}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });414415assert(pinfo1.size() == numPrimitives);416417return pinfo1;418}419420#if !defined(RTHWIF_STANDALONE)421422template<typename Mesh, typename SplitterFactory>423PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)424{425ParallelForForPrefixSumState<PrimInfo> pstate;426Scene::Iterator2 iter(scene,types,mblur);427428/* first try */429progressMonitor(0);430pstate.init(iter,size_t(1024));431PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {432return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);433}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });434435/* if we need to filter out geometry, run again */436if (pinfo.size() != numPrimRefs)437{438progressMonitor(0);439pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {440return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);441}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });442}443444445SplitterFactory Splitter(scene);446447auto split_primitive = [&] (const PrimRef &prim,448const unsigned int splitprims,449const SplittingGrid& grid,450PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],451unsigned int& numSubPrims)452{453const auto splitter = Splitter(prim);454splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);455};456457auto primitiveArea = [&] (const PrimRef &ref) {458const unsigned int geomID = ref.geomID();459const unsigned int primID = ref.primID();460return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);461};462463return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);464}465#endif466}467}468469470