Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/builders/primrefgen_presplit.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../../common/algorithms/parallel_reduce.h"
7
#include "../../common/algorithms/parallel_sort.h"
8
#include "../builders/heuristic_spatial.h"
9
#include "../builders/splitter.h"
10
11
#include "../../common/algorithms/parallel_partition.h"
12
#include "../../common/algorithms/parallel_for_for.h"
13
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
14
15
#define DBG_PRESPLIT(x)
16
#define CHECK_PRESPLIT(x)
17
18
#define GRID_SIZE 1024
19
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
20
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
21
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
22
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
23
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
24
25
namespace embree
26
{
27
namespace isa
28
{
29
struct SplittingGrid
30
{
31
__forceinline SplittingGrid(const BBox3fa& bounds)
32
{
33
base = bounds.lower;
34
const Vec3fa diag = bounds.size();
35
extend = max(diag.x,max(diag.y,diag.z));
36
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
37
}
38
39
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
40
{
41
/* compute morton code */
42
const Vec3fa lower = prim.lower;
43
const Vec3fa upper = prim.upper;
44
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
45
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
46
Vec3ia ilower(floor(glower));
47
Vec3ia iupper(floor(gupper));
48
49
/* this ignores dimensions that are empty */
50
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
51
52
/* compute a morton code for the lower and upper grid coordinates. */
53
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
54
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
55
56
/* if all bits are equal then we cannot split */
57
if (unlikely(lower_code == upper_code))
58
return false;
59
60
/* compute octree level and dimension to perform the split in */
61
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
62
const unsigned int level = diff / 3;
63
const unsigned int dim = diff % 3;
64
65
/* now we compute the grid position of the split */
66
const unsigned int isplit = iupper[dim] & ~((1<<level)-1);
67
68
/* compute world space position of split */
69
const float inv_grid_size = 1.0f / GRID_SIZE;
70
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
71
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
72
73
dim_o = dim;
74
fsplit_o = fsplit;
75
return true;
76
}
77
78
__forceinline Vec2i computeMC(const PrimRef& ref) const
79
{
80
const Vec3fa lower = ref.lower;
81
const Vec3fa upper = ref.upper;
82
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
83
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
84
Vec3ia ilower(floor(glower));
85
Vec3ia iupper(floor(gupper));
86
87
/* this ignores dimensions that are empty */
88
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
89
90
/* compute a morton code for the lower and upper grid coordinates. */
91
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
92
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
93
return Vec2i(lower_code,upper_code);
94
}
95
96
Vec3fa base;
97
float scale;
98
float extend;
99
};
100
101
struct PresplitItem
102
{
103
union {
104
float priority;
105
unsigned int data;
106
};
107
unsigned int index;
108
109
__forceinline operator unsigned() const {
110
return data;
111
}
112
113
template<typename ProjectedPrimitiveAreaFunc>
114
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
115
{
116
const float area_aabb = area(ref.bounds());
117
const float area_prim = primitiveArea(ref);
118
if (area_prim == 0.0f) return 0.0f;
119
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
120
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
121
const float area_diff = max(0.0f, area_aabb - area_prim);
122
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
123
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
124
//const float priority = sqrtf(sqrtf( area_diff ) );
125
//const float priority = sqrtfarea_diff;
126
//const float priority = area_diff; // 104 fps !!!!!!!!!!
127
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
128
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
129
//const float priority = area_prim;
130
assert(priority >= 0.0f && priority < FLT_LARGE);
131
return priority;
132
}
133
134
};
135
136
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
137
return cout << "index " << item.index << " priority " << item.priority;
138
};
139
140
#if 1
141
142
template<typename Splitter>
143
void splitPrimitive(const Splitter& splitter,
144
const PrimRef& prim,
145
const unsigned int splitprims,
146
const SplittingGrid& grid,
147
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
148
unsigned int& numSubPrims)
149
{
150
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
151
152
if (splitprims == 1)
153
{
154
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
155
subPrims[numSubPrims++] = prim;
156
}
157
else
158
{
159
unsigned int dim; float fsplit;
160
if (!grid.split_pos(prim, dim, fsplit))
161
{
162
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
163
subPrims[numSubPrims++] = prim;
164
return;
165
}
166
167
/* split primitive */
168
PrimRef left,right;
169
splitter(prim,dim,fsplit,left,right);
170
assert(!left.bounds().empty());
171
assert(!right.bounds().empty());
172
173
const unsigned int splitprims_left = splitprims/2;
174
const unsigned int splitprims_right = splitprims - splitprims_left;
175
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
176
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
177
}
178
}
179
180
#else
181
182
template<typename Splitter>
183
void splitPrimitive(const Splitter& splitter,
184
const PrimRef& prim,
185
const unsigned int targetSubPrims,
186
const SplittingGrid& grid,
187
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
188
unsigned int& numSubPrims)
189
{
190
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
191
192
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
193
return area(a.bounds()) < area(b.bounds());
194
};
195
196
subPrims[numSubPrims++] = prim;
197
198
while (numSubPrims < targetSubPrims)
199
{
200
/* get top heap element */
201
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
202
PrimRef top = subPrims[--numSubPrims];
203
204
unsigned int dim; float fsplit;
205
if (!grid.split_pos(top, dim, fsplit))
206
{
207
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
208
subPrims[numSubPrims++] = top;
209
return;
210
}
211
212
/* split primitive */
213
PrimRef left,right;
214
splitter(top,dim,fsplit,left,right);
215
assert(!left.bounds().empty());
216
assert(!right.bounds().empty());
217
218
subPrims[numSubPrims++] = left;
219
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
220
221
subPrims[numSubPrims++] = right;
222
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
223
}
224
}
225
226
#endif
227
228
#if !defined(RTHWIF_STANDALONE)
229
230
template<typename Mesh, typename SplitterFactory>
231
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
232
{
233
ParallelPrefixSumState<PrimInfo> pstate;
234
235
/* first try */
236
progressMonitor(0);
237
PrimInfo pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
238
return geometry->createPrimRefArray(prims,r,r.begin(),geomID);
239
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
240
241
/* if we need to filter out geometry, run again */
242
if (pinfo.size() != numPrimRefs)
243
{
244
progressMonitor(0);
245
pinfo = parallel_prefix_sum( pstate, size_t(0), geometry->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
246
return geometry->createPrimRefArray(prims,r,base.size(),geomID);
247
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
248
}
249
return pinfo;
250
}
251
#endif
252
253
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
254
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
255
PrimVector& prims,
256
const PrimInfo& pinfo,
257
const SplitPrimitiveFunc& splitPrimitive,
258
const ProjectedPrimitiveAreaFunc& primitiveArea)
259
{
260
static const size_t MIN_STEP_SIZE = 128;
261
262
/* use correct number of primitives */
263
size_t numPrimitives = pinfo.size();
264
const size_t numPrimitivesExt = prims.size();
265
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
266
267
/* allocate double buffer presplit items */
268
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
269
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
270
271
/* compute grid */
272
SplittingGrid grid(pinfo.geomBounds);
273
274
/* init presplit items and get total sum */
275
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
276
float sum = 0.0f;
277
for (size_t i=r.begin(); i<r.end(); i++)
278
{
279
preSplitItem0[i].index = (unsigned int)i;
280
const Vec2i mc = grid.computeMC(prims[i]);
281
/* if all bits are equal then we cannot split */
282
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
283
/* FIXME: sum undeterministic */
284
sum += preSplitItem0[i].priority;
285
}
286
return sum;
287
},[](const float& a, const float& b) -> float { return a+b; });
288
289
/* compute number of splits per primitive */
290
const float inv_psum = 1.0f / psum;
291
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
292
for (size_t i=r.begin(); i<r.end(); i++)
293
{
294
if (preSplitItem0[i].priority <= 0.0f) {
295
preSplitItem0[i].data = 1;
296
continue;
297
}
298
299
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
300
if (rel_p < 1) {
301
preSplitItem0[i].data = 1;
302
continue;
303
}
304
305
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
306
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
307
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
308
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
309
}
310
});
311
312
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
313
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
314
assert(center <= numPrimitives);
315
316
/* anything to split ? */
317
if (center >= numPrimitives)
318
return pinfo;
319
320
size_t numPrimitivesToSplit = numPrimitives - center;
321
assert(preSplitItem0[center].data >= 1.0f);
322
323
/* sort presplit items in ascending order */
324
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
325
326
CHECK_PRESPLIT(
327
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
328
for (size_t i=r.begin(); i<r.end(); i++)
329
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
330
});
331
);
332
333
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
334
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
335
336
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
337
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
338
size_t sum = 0;
339
for (size_t i=t.begin(); i<t.end(); i++)
340
{
341
const unsigned int primrefID = preSplitItem0[i].index;
342
const unsigned int splitprims = preSplitItem0[i].data;
343
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
344
345
unsigned int numSubPrims = 0;
346
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
347
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
348
assert(numSubPrims);
349
350
numSubPrims--; // can reuse slot
351
sum+=numSubPrims;
352
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
353
354
primOffset0[i-center] = numSubPrims;
355
}
356
return sum;
357
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
358
359
/* if we are over budget, need to shrink the range */
360
if (totalNumSubPrims > numSplitPrimitivesBudget)
361
{
362
size_t new_center = numPrimitives-1;
363
size_t sum = 0;
364
for (;new_center>=center;new_center--)
365
{
366
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
367
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
368
sum += numSubPrims;
369
}
370
new_center++;
371
372
primOffset0 += new_center - center;
373
numPrimitivesToSplit -= new_center - center;
374
center = new_center;
375
assert(numPrimitivesToSplit == (numPrimitives - center));
376
}
377
378
/* parallel prefix sum to compute offsets for storing sub-primitives */
379
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
380
assert(numPrimitives+offset <= numPrimitivesExt);
381
382
/* iterate over range, and split primitives into sub primitives and append them to prims array */
383
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
384
for (size_t j=rn.begin(); j<rn.end(); j++)
385
{
386
const unsigned int primrefID = preSplitItem0[j].index;
387
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
388
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
389
390
unsigned int numSubPrims = 0;
391
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
392
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
393
394
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
395
assert(numSubPrims-1 == numSubPrimsExpected);
396
397
const size_t newID = numPrimitives + primOffset1[j-center];
398
assert(newID+numSubPrims-1 <= numPrimitivesExt);
399
400
prims[primrefID] = subPrims[0];
401
for (size_t i=1;i<numSubPrims;i++)
402
prims[newID+i-1] = subPrims[i];
403
}
404
});
405
406
numPrimitives += offset;
407
408
/* recompute centroid bounding boxes */
409
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
410
PrimInfo p(empty);
411
for (size_t j=r.begin(); j<r.end(); j++)
412
p.add_center2(prims[j]);
413
return p;
414
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
415
416
assert(pinfo1.size() == numPrimitives);
417
418
return pinfo1;
419
}
420
421
#if !defined(RTHWIF_STANDALONE)
422
423
template<typename Mesh, typename SplitterFactory>
424
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
425
{
426
ParallelForForPrefixSumState<PrimInfo> pstate;
427
Scene::Iterator2 iter(scene,types,mblur);
428
429
/* first try */
430
progressMonitor(0);
431
pstate.init(iter,size_t(1024));
432
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
433
return mesh->createPrimRefArray(prims,r,k,(unsigned)geomID);
434
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
435
436
/* if we need to filter out geometry, run again */
437
if (pinfo.size() != numPrimRefs)
438
{
439
progressMonitor(0);
440
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
441
return mesh->createPrimRefArray(prims,r,base.size(),(unsigned)geomID);
442
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
443
}
444
445
446
SplitterFactory Splitter(scene);
447
448
auto split_primitive = [&] (const PrimRef &prim,
449
const unsigned int splitprims,
450
const SplittingGrid& grid,
451
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
452
unsigned int& numSubPrims)
453
{
454
const auto splitter = Splitter(prim);
455
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
456
};
457
458
auto primitiveArea = [&] (const PrimRef &ref) {
459
const unsigned int geomID = ref.geomID();
460
const unsigned int primID = ref.primID();
461
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
462
};
463
464
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
465
}
466
#endif
467
}
468
}
469
470