Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_msmblur_hair.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../bvh/bvh.h"
7
#include "../geometry/primitive.h"
8
#include "../builders/bvh_builder_msmblur.h"
9
#include "../builders/heuristic_binning_array_aligned.h"
10
#include "../builders/heuristic_binning_array_unaligned.h"
11
#include "../builders/heuristic_timesplit_array.h"
12
13
namespace embree
14
{
15
namespace isa
16
{
17
struct BVHBuilderHairMSMBlur
18
{
19
/*! settings for msmblur builder */
20
struct Settings
21
{
22
/*! default settings */
23
Settings ()
24
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8) {}
25
26
public:
27
size_t branchingFactor; //!< branching factor of BVH to build
28
size_t maxDepth; //!< maximum depth of BVH to build
29
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
30
size_t minLeafSize; //!< minimum size of a leaf
31
size_t maxLeafSize; //!< maximum size of a leaf
32
};
33
34
struct BuildRecord
35
{
36
public:
37
__forceinline BuildRecord () {}
38
39
__forceinline BuildRecord (size_t depth)
40
: depth(depth) {}
41
42
__forceinline BuildRecord (const SetMB& prims, size_t depth)
43
: depth(depth), prims(prims) {}
44
45
__forceinline size_t size() const {
46
return prims.size();
47
}
48
49
public:
50
size_t depth; //!< depth of the root of this subtree
51
SetMB prims; //!< the list of primitives
52
};
53
54
template<typename NodeRef,
55
typename RecalculatePrimRef,
56
typename CreateAllocFunc,
57
typename CreateAABBNodeMBFunc,
58
typename SetAABBNodeMBFunc,
59
typename CreateOBBNodeMBFunc,
60
typename SetOBBNodeMBFunc,
61
typename CreateLeafFunc,
62
typename ProgressMonitor>
63
64
class BuilderT
65
{
66
ALIGNED_CLASS_(16);
67
68
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
69
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
70
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
71
72
typedef BVHNodeRecordMB<NodeRef> NodeRecordMB;
73
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
74
75
typedef FastAllocator::CachedAllocator Allocator;
76
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
77
78
typedef HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> HeuristicTemporal;
79
typedef HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> HeuristicBinning;
80
typedef UnalignedHeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> UnalignedHeuristicBinning;
81
82
public:
83
84
BuilderT (Scene* scene,
85
const RecalculatePrimRef& recalculatePrimRef,
86
const CreateAllocFunc& createAlloc,
87
const CreateAABBNodeMBFunc& createAABBNodeMB,
88
const SetAABBNodeMBFunc& setAABBNodeMB,
89
const CreateOBBNodeMBFunc& createOBBNodeMB,
90
const SetOBBNodeMBFunc& setOBBNodeMB,
91
const CreateLeafFunc& createLeaf,
92
const ProgressMonitor& progressMonitor,
93
const Settings settings)
94
95
: cfg(settings),
96
scene(scene),
97
recalculatePrimRef(recalculatePrimRef),
98
createAlloc(createAlloc),
99
createAABBNodeMB(createAABBNodeMB), setAABBNodeMB(setAABBNodeMB),
100
createOBBNodeMB(createOBBNodeMB), setOBBNodeMB(setOBBNodeMB),
101
createLeaf(createLeaf),
102
progressMonitor(progressMonitor),
103
unalignedHeuristic(scene),
104
temporalSplitHeuristic(scene->device,recalculatePrimRef) {}
105
106
private:
107
108
/*! checks if all primitives are from the same geometry */
109
__forceinline bool sameGeometry(const SetMB& set)
110
{
111
mvector<PrimRefMB>& prims = *set.prims;
112
unsigned int firstGeomID = prims[set.begin()].geomID();
113
for (size_t i=set.begin()+1; i<set.end(); i++) {
114
if (prims[i].geomID() != firstGeomID){
115
return false;
116
}
117
}
118
return true;
119
}
120
121
/*! performs some split if SAH approaches fail */
122
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
123
{
124
mvector<PrimRefMB>& prims = *set.prims;
125
126
const size_t begin = set.begin();
127
const size_t end = set.end();
128
const size_t center = (begin + end)/2;
129
130
PrimInfoMB linfo = empty;
131
for (size_t i=begin; i<center; i++)
132
linfo.add_primref(prims[i]);
133
134
PrimInfoMB rinfo = empty;
135
for (size_t i=center; i<end; i++)
136
rinfo.add_primref(prims[i]);
137
138
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
139
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
140
}
141
142
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
143
{
144
assert(set.size() > 1);
145
const size_t begin = set.begin();
146
const size_t end = set.end();
147
PrimInfoMB linfo(empty);
148
PrimInfoMB rinfo(empty);
149
unsigned int geomID = (*set.prims)[begin].geomID();
150
size_t center = serial_partitioning(set.prims->data(),begin,end,linfo,rinfo,
151
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
152
[ ] ( PrimInfoMB& a, const PrimRefMB& ref ) { a.add_primref(ref); });
153
154
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
155
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
156
}
157
158
/*! creates a large leaf that could be larger than supported by the BVH */
159
NodeRecordMB4D createLargeLeaf(BuildRecord& current, Allocator alloc)
160
{
161
/* this should never occur but is a fatal error */
162
if (current.depth > cfg.maxDepth)
163
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
164
165
/* special case when directly creating leaf without any splits that could shrink time_range */
166
bool force_split = false;
167
if (current.depth == 1 && current.size() > 0)
168
{
169
BBox1f c = empty;
170
BBox1f p = current.prims.time_range;
171
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
172
mvector<PrimRefMB>& prims = *current.prims.prims;
173
c.extend(prims[i].time_range);
174
}
175
176
force_split = c.lower > p.lower || c.upper < p.upper;
177
}
178
179
/* create leaf for few primitives */
180
if (current.size() <= cfg.maxLeafSize && sameGeometry(current.prims) && !force_split)
181
return createLeaf(current.prims,alloc);
182
183
/* fill all children by always splitting the largest one */
184
LocalChildList children(current);
185
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
186
187
do {
188
189
/* find best child with largest bounding box area */
190
int bestChild = -1;
191
size_t bestSize = 0;
192
for (unsigned i=0; i<children.size(); i++)
193
{
194
/* ignore leaves as they cannot get split */
195
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i].prims) && !force_split)
196
continue;
197
198
force_split = false;
199
200
/* remember child with largest size */
201
if (children[i].size() > bestSize) {
202
bestSize = children[i].size();
203
bestChild = i;
204
}
205
}
206
if (bestChild == -1) break;
207
208
/*! split best child into left and right child */
209
BuildRecord left(current.depth+1);
210
BuildRecord right(current.depth+1);
211
if (!sameGeometry(children[bestChild].prims)) {
212
splitByGeometry(children[bestChild].prims,left.prims,right.prims);
213
} else {
214
splitFallback(children[bestChild].prims,left.prims,right.prims);
215
}
216
children.split(bestChild,left,right,std::unique_ptr<mvector<PrimRefMB>>());
217
218
} while (children.size() < cfg.branchingFactor);
219
220
221
/* detect time_ranges that have shrunken */
222
bool timesplit = false;
223
for (size_t i=0; i<children.size(); i++) {
224
const BBox1f c = children[i].prims.time_range;
225
const BBox1f p = current.prims.time_range;
226
timesplit |= c.lower > p.lower || c.upper < p.upper;
227
}
228
229
/* create node */
230
NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,timesplit);
231
232
LBBox3fa bounds = empty;
233
for (size_t i=0; i<children.size(); i++) {
234
values[i] = createLargeLeaf(children[i],alloc);
235
bounds.extend(values[i].lbounds);
236
}
237
238
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
239
240
if (timesplit)
241
bounds = current.prims.linearBounds(recalculatePrimRef);
242
243
return NodeRecordMB4D(node,bounds,current.prims.time_range);
244
}
245
246
/*! performs split */
247
std::unique_ptr<mvector<PrimRefMB>> split(const BuildRecord& current, BuildRecord& lrecord, BuildRecord& rrecord, bool& aligned, bool& timesplit)
248
{
249
/* variable to track the SAH of the best splitting approach */
250
float bestSAH = inf;
251
const float leafSAH = current.prims.leafSAH(cfg.logBlockSize);
252
253
/* perform standard binning in aligned space */
254
HeuristicBinning::Split alignedObjectSplit = alignedHeuristic.find(current.prims,cfg.logBlockSize);
255
float alignedObjectSAH = alignedObjectSplit.splitSAH();
256
bestSAH = min(alignedObjectSAH,bestSAH);
257
258
/* perform standard binning in unaligned space */
259
UnalignedHeuristicBinning::Split unalignedObjectSplit;
260
LinearSpace3fa uspace;
261
float unalignedObjectSAH = inf;
262
if (alignedObjectSAH > 0.7f*leafSAH) {
263
uspace = unalignedHeuristic.computeAlignedSpaceMB(scene,current.prims);
264
const SetMB sset = current.prims.primInfo(recalculatePrimRef,uspace);
265
unalignedObjectSplit = unalignedHeuristic.find(sset,cfg.logBlockSize,uspace);
266
unalignedObjectSAH = 1.3f*unalignedObjectSplit.splitSAH(); // makes unaligned splits more expensive
267
bestSAH = min(unalignedObjectSAH,bestSAH);
268
}
269
270
/* do temporal splits only if previous approaches failed to produce good SAH and the the time range is large enough */
271
float temporal_split_sah = inf;
272
typename HeuristicTemporal::Split temporal_split;
273
if (bestSAH > 0.5f*leafSAH) {
274
if (current.prims.time_range.size() > 1.01f/float(current.prims.max_num_time_segments)) {
275
temporal_split = temporalSplitHeuristic.find(current.prims,cfg.logBlockSize);
276
temporal_split_sah = temporal_split.splitSAH();
277
bestSAH = min(temporal_split_sah,bestSAH);
278
}
279
}
280
281
/* perform fallback split if SAH heuristics failed */
282
if (unlikely(!std::isfinite(bestSAH))) {
283
current.prims.deterministic_order();
284
splitFallback(current.prims,lrecord.prims,rrecord.prims);
285
}
286
/* perform aligned split if this is best */
287
else if (likely(bestSAH == alignedObjectSAH)) {
288
alignedHeuristic.split(alignedObjectSplit,current.prims,lrecord.prims,rrecord.prims);
289
}
290
/* perform unaligned split if this is best */
291
else if (likely(bestSAH == unalignedObjectSAH)) {
292
unalignedHeuristic.split(unalignedObjectSplit,uspace,current.prims,lrecord.prims,rrecord.prims);
293
aligned = false;
294
}
295
/* perform temporal split if this is best */
296
else if (likely(bestSAH == temporal_split_sah)) {
297
timesplit = true;
298
return temporalSplitHeuristic.split(temporal_split,current.prims,lrecord.prims,rrecord.prims);
299
}
300
else
301
assert(false);
302
303
return std::unique_ptr<mvector<PrimRefMB>>();
304
}
305
306
/*! recursive build */
307
NodeRecordMB4D recurse(BuildRecord& current, Allocator alloc, bool toplevel)
308
{
309
/* get thread local allocator */
310
if (!alloc)
311
alloc = createAlloc();
312
313
/* call memory monitor function to signal progress */
314
if (toplevel && current.size() <= SINGLE_THREADED_THRESHOLD)
315
progressMonitor(current.size());
316
317
/* create leaf node */
318
if (current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || current.size() <= cfg.minLeafSize) {
319
current.prims.deterministic_order();
320
return createLargeLeaf(current,alloc);
321
}
322
323
/* fill all children by always splitting the one with the largest surface area */
324
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
325
LocalChildList children(current);
326
bool aligned = true;
327
bool timesplit = false;
328
329
do {
330
331
/* find best child with largest bounding box area */
332
ssize_t bestChild = -1;
333
float bestArea = neg_inf;
334
for (size_t i=0; i<children.size(); i++)
335
{
336
/* ignore leaves as they cannot get split */
337
if (children[i].size() <= cfg.minLeafSize)
338
continue;
339
340
/* remember child with largest area */
341
const float A = children[i].prims.halfArea();
342
if (A > bestArea) {
343
bestArea = children[i].prims.halfArea();
344
bestChild = i;
345
}
346
}
347
if (bestChild == -1) break;
348
349
/*! split best child into left and right child */
350
BuildRecord left(current.depth+1);
351
BuildRecord right(current.depth+1);
352
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(children[bestChild],left,right,aligned,timesplit);
353
children.split(bestChild,left,right,std::move(new_vector));
354
355
} while (children.size() < cfg.branchingFactor);
356
357
/* detect time_ranges that have shrunken */
358
for (size_t i=0; i<children.size(); i++) {
359
const BBox1f c = children[i].prims.time_range;
360
const BBox1f p = current.prims.time_range;
361
timesplit |= c.lower > p.lower || c.upper < p.upper;
362
}
363
364
/* create time split node */
365
if (timesplit)
366
{
367
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
368
369
/* spawn tasks or ... */
370
if (current.size() > SINGLE_THREADED_THRESHOLD)
371
{
372
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
373
for (size_t i=r.begin(); i<r.end(); i++) {
374
values[i] = recurse(children[i],nullptr,true);
375
_mm_mfence(); // to allow non-temporal stores during build
376
}
377
});
378
}
379
/* ... continue sequential */
380
else {
381
for (size_t i=0; i<children.size(); i++) {
382
values[i] = recurse(children[i],alloc,false);
383
}
384
}
385
386
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
387
388
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
389
return NodeRecordMB4D(node,bounds,current.prims.time_range);
390
}
391
392
/* create aligned node */
393
else if (aligned)
394
{
395
const NodeRef node = createAABBNodeMB(children.children.data(),children.numChildren,alloc,true);
396
397
/* spawn tasks or ... */
398
if (current.size() > SINGLE_THREADED_THRESHOLD)
399
{
400
LBBox3fa cbounds[MAX_BRANCHING_FACTOR];
401
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
402
for (size_t i=r.begin(); i<r.end(); i++) {
403
values[i] = recurse(children[i],nullptr,true);
404
cbounds[i] = values[i].lbounds;
405
_mm_mfence(); // to allow non-temporal stores during build
406
}
407
});
408
409
LBBox3fa bounds = empty;
410
for (size_t i=0; i<children.size(); i++)
411
bounds.extend(cbounds[i]);
412
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
413
return NodeRecordMB4D(node,bounds,current.prims.time_range);
414
}
415
/* ... continue sequentially */
416
else
417
{
418
LBBox3fa bounds = empty;
419
for (size_t i=0; i<children.size(); i++) {
420
values[i] = recurse(children[i],alloc,false);
421
bounds.extend(values[i].lbounds);
422
}
423
setAABBNodeMB(current,children.children.data(),node,values,children.numChildren);
424
return NodeRecordMB4D(node,bounds,current.prims.time_range);
425
}
426
}
427
428
/* create unaligned node */
429
else
430
{
431
const NodeRef node = createOBBNodeMB(alloc);
432
433
/* spawn tasks or ... */
434
if (current.size() > SINGLE_THREADED_THRESHOLD)
435
{
436
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
437
for (size_t i=r.begin(); i<r.end(); i++) {
438
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
439
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
440
const auto child = recurse(children[i],nullptr,true);
441
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
442
_mm_mfence(); // to allow non-temporal stores during build
443
}
444
});
445
}
446
/* ... continue sequentially */
447
else
448
{
449
for (size_t i=0; i<children.size(); i++) {
450
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpaceMB(scene,children[i].prims);
451
const LBBox3fa lbounds = children[i].prims.linearBounds(recalculatePrimRef,space);
452
const auto child = recurse(children[i],alloc,false);
453
setOBBNodeMB(node,i,child.ref,space,lbounds,children[i].prims.time_range);
454
}
455
}
456
457
const LBBox3fa bounds = current.prims.linearBounds(recalculatePrimRef);
458
return NodeRecordMB4D(node,bounds,current.prims.time_range);
459
}
460
}
461
462
public:
463
464
/*! entry point into builder */
465
NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
466
{
467
BuildRecord record(SetMB(pinfo,&prims),1);
468
auto root = recurse(record,nullptr,true);
469
_mm_mfence(); // to allow non-temporal stores during build
470
return root;
471
}
472
473
private:
474
Settings cfg;
475
Scene* scene;
476
const RecalculatePrimRef& recalculatePrimRef;
477
const CreateAllocFunc& createAlloc;
478
const CreateAABBNodeMBFunc& createAABBNodeMB;
479
const SetAABBNodeMBFunc& setAABBNodeMB;
480
const CreateOBBNodeMBFunc& createOBBNodeMB;
481
const SetOBBNodeMBFunc& setOBBNodeMB;
482
const CreateLeafFunc& createLeaf;
483
const ProgressMonitor& progressMonitor;
484
485
private:
486
HeuristicBinning alignedHeuristic;
487
UnalignedHeuristicBinning unalignedHeuristic;
488
HeuristicTemporal temporalSplitHeuristic;
489
};
490
491
template<typename NodeRef,
492
typename RecalculatePrimRef,
493
typename CreateAllocFunc,
494
typename CreateAABBNodeMBFunc,
495
typename SetAABBNodeMBFunc,
496
typename CreateOBBNodeMBFunc,
497
typename SetOBBNodeMBFunc,
498
typename CreateLeafFunc,
499
typename ProgressMonitor>
500
501
static BVHNodeRecordMB4D<NodeRef> build (Scene* scene, mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo,
502
const RecalculatePrimRef& recalculatePrimRef,
503
const CreateAllocFunc& createAlloc,
504
const CreateAABBNodeMBFunc& createAABBNodeMB,
505
const SetAABBNodeMBFunc& setAABBNodeMB,
506
const CreateOBBNodeMBFunc& createOBBNodeMB,
507
const SetOBBNodeMBFunc& setOBBNodeMB,
508
const CreateLeafFunc& createLeaf,
509
const ProgressMonitor& progressMonitor,
510
const Settings settings)
511
{
512
typedef BuilderT<NodeRef,RecalculatePrimRef,CreateAllocFunc,
513
CreateAABBNodeMBFunc,SetAABBNodeMBFunc,
514
CreateOBBNodeMBFunc,SetOBBNodeMBFunc,
515
CreateLeafFunc,ProgressMonitor> Builder;
516
517
Builder builder(scene,recalculatePrimRef,createAlloc,
518
createAABBNodeMB,setAABBNodeMB,
519
createOBBNodeMB,setOBBNodeMB,
520
createLeaf,progressMonitor,settings);
521
522
return builder(prims,pinfo);
523
}
524
};
525
}
526
}
527
528