Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_msmblur.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#define MBLUR_NUM_TEMPORAL_BINS 2
7
#define MBLUR_NUM_OBJECT_BINS 32
8
9
#include "../bvh/bvh.h"
10
#include "../builders/primref_mb.h"
11
#include "heuristic_binning_array_aligned.h"
12
#include "heuristic_timesplit_array.h"
13
14
namespace embree
15
{
16
namespace isa
17
{
18
template<typename T>
19
struct SharedVector
20
{
21
__forceinline SharedVector() {}
22
23
__forceinline SharedVector(T* ptr, size_t refCount = 1)
24
: prims(ptr), refCount(refCount) {}
25
26
__forceinline void incRef() {
27
refCount++;
28
}
29
30
__forceinline void decRef()
31
{
32
if (--refCount == 0)
33
delete prims;
34
}
35
36
T* prims;
37
size_t refCount;
38
};
39
40
template<typename BuildRecord, int MAX_BRANCHING_FACTOR>
41
struct LocalChildListT
42
{
43
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
44
45
__forceinline LocalChildListT (const BuildRecord& record)
46
: numChildren(1), numSharedPrimVecs(1)
47
{
48
/* the local root will be freed in the ancestor where it was created (thus refCount is 2) */
49
children[0] = record;
50
primvecs[0] = new (&sharedPrimVecs[0]) SharedPrimRefVector(record.prims.prims, 2);
51
}
52
53
__forceinline ~LocalChildListT()
54
{
55
for (size_t i = 0; i < numChildren; i++)
56
primvecs[i]->decRef();
57
}
58
59
__forceinline BuildRecord& operator[] ( const size_t i ) {
60
return children[i];
61
}
62
63
__forceinline size_t size() const {
64
return numChildren;
65
}
66
67
__forceinline void split(ssize_t bestChild, const BuildRecord& lrecord, const BuildRecord& rrecord, std::unique_ptr<mvector<PrimRefMB>> new_vector)
68
{
69
SharedPrimRefVector* bsharedPrimVec = primvecs[bestChild];
70
if (lrecord.prims.prims == bsharedPrimVec->prims) {
71
primvecs[bestChild] = bsharedPrimVec;
72
bsharedPrimVec->incRef();
73
}
74
else {
75
primvecs[bestChild] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(lrecord.prims.prims);
76
}
77
78
if (rrecord.prims.prims == bsharedPrimVec->prims) {
79
primvecs[numChildren] = bsharedPrimVec;
80
bsharedPrimVec->incRef();
81
}
82
else {
83
primvecs[numChildren] = new (&sharedPrimVecs[numSharedPrimVecs++]) SharedPrimRefVector(rrecord.prims.prims);
84
}
85
bsharedPrimVec->decRef();
86
new_vector.release();
87
88
children[bestChild] = lrecord;
89
children[numChildren] = rrecord;
90
numChildren++;
91
}
92
93
public:
94
array_t<BuildRecord,MAX_BRANCHING_FACTOR> children;
95
array_t<SharedPrimRefVector*,MAX_BRANCHING_FACTOR> primvecs;
96
size_t numChildren;
97
98
array_t<SharedPrimRefVector,2*MAX_BRANCHING_FACTOR> sharedPrimVecs;
99
size_t numSharedPrimVecs;
100
};
101
102
template<typename Mesh>
103
struct RecalculatePrimRef
104
{
105
Scene* scene;
106
107
__forceinline RecalculatePrimRef (Scene* scene)
108
: scene(scene) {}
109
110
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
111
{
112
const unsigned geomID = prim.geomID();
113
const unsigned primID = prim.primID();
114
const Mesh* mesh = scene->get<Mesh>(geomID);
115
const LBBox3fa lbounds = mesh->linearBounds(primID, time_range);
116
const range<int> tbounds = mesh->timeSegmentRange(time_range);
117
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
118
}
119
120
// __noinline is workaround for ICC16 bug under MacOSX
121
__noinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
122
{
123
const unsigned geomID = prim.geomID();
124
const unsigned primID = prim.primID();
125
const Mesh* mesh = scene->get<Mesh>(geomID);
126
const LBBox3fa lbounds = mesh->linearBounds(space, primID, time_range);
127
const range<int> tbounds = mesh->timeSegmentRange(time_range);
128
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
129
}
130
131
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
132
return scene->get<Mesh>(prim.geomID())->linearBounds(prim.primID(), time_range);
133
}
134
135
// __noinline is workaround for ICC16 bug under MacOSX
136
__noinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
137
return scene->get<Mesh>(prim.geomID())->linearBounds(space, prim.primID(), time_range);
138
}
139
};
140
141
struct VirtualRecalculatePrimRef
142
{
143
Scene* scene;
144
const SubGridBuildData * const sgrids;
145
146
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
147
: scene(scene), sgrids(sgrids) {}
148
149
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
150
{
151
const unsigned geomID = prim.geomID();
152
const unsigned primID = prim.primID();
153
const Geometry* mesh = scene->get(geomID);
154
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
155
const range<int> tbounds = mesh->timeSegmentRange(time_range);
156
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
157
}
158
159
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const
160
{
161
const unsigned geomID = prim.geomID();
162
const unsigned primID = prim.primID();
163
const Geometry* mesh = scene->get(geomID);
164
const LBBox3fa lbounds = mesh->vlinearBounds(space, primID, time_range);
165
const range<int> tbounds = mesh->timeSegmentRange(time_range);
166
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
167
}
168
169
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
170
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
171
}
172
173
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {
174
return scene->get(prim.geomID())->vlinearBounds(space, prim.primID(), time_range);
175
}
176
};
177
178
struct BVHBuilderMSMBlur
179
{
180
/*! settings for msmblur builder */
181
struct Settings
182
{
183
/*! default settings */
184
Settings ()
185
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(8),
186
travCost(1.0f), intCost(1.0f), singleLeafTimeSegment(false),
187
singleThreadThreshold(1024) {}
188
189
190
Settings (size_t sahBlockSize, size_t minLeafSize, size_t maxLeafSize, float travCost, float intCost, size_t singleThreadThreshold)
191
: branchingFactor(2), maxDepth(32), logBlockSize(bsr(sahBlockSize)), minLeafSize(minLeafSize), maxLeafSize(maxLeafSize),
192
travCost(travCost), intCost(intCost), singleThreadThreshold(singleThreadThreshold)
193
{
194
minLeafSize = min(minLeafSize,maxLeafSize);
195
}
196
197
public:
198
size_t branchingFactor; //!< branching factor of BVH to build
199
size_t maxDepth; //!< maximum depth of BVH to build
200
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
201
size_t minLeafSize; //!< minimum size of a leaf
202
size_t maxLeafSize; //!< maximum size of a leaf
203
float travCost; //!< estimated cost of one traversal step
204
float intCost; //!< estimated cost of one primitive intersection
205
bool singleLeafTimeSegment; //!< split time to single time range
206
size_t singleThreadThreshold; //!< threshold when we switch to single threaded build
207
};
208
209
struct BuildRecord
210
{
211
public:
212
__forceinline BuildRecord () {}
213
214
__forceinline BuildRecord (size_t depth)
215
: depth(depth) {}
216
217
__forceinline BuildRecord (const SetMB& prims, size_t depth)
218
: depth(depth), prims(prims) {}
219
220
__forceinline friend bool operator< (const BuildRecord& a, const BuildRecord& b) {
221
return a.prims.size() < b.prims.size();
222
}
223
224
__forceinline size_t size() const {
225
return prims.size();
226
}
227
228
public:
229
size_t depth; //!< Depth of the root of this subtree.
230
SetMB prims; //!< The list of primitives.
231
};
232
233
struct BuildRecordSplit : public BuildRecord
234
{
235
__forceinline BuildRecordSplit () {}
236
237
__forceinline BuildRecordSplit (size_t depth)
238
: BuildRecord(depth) {}
239
240
__forceinline BuildRecordSplit (const BuildRecord& record, const BinSplit<MBLUR_NUM_OBJECT_BINS>& split)
241
: BuildRecord(record), split(split) {}
242
243
BinSplit<MBLUR_NUM_OBJECT_BINS> split;
244
};
245
246
template<
247
typename NodeRef,
248
typename RecalculatePrimRef,
249
typename Allocator,
250
typename CreateAllocFunc,
251
typename CreateNodeFunc,
252
typename SetNodeFunc,
253
typename CreateLeafFunc,
254
typename ProgressMonitor>
255
256
class BuilderT
257
{
258
ALIGNED_CLASS_(16);
259
static const size_t MAX_BRANCHING_FACTOR = 16; //!< maximum supported BVH branching factor
260
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
261
262
typedef BVHNodeRecordMB4D<NodeRef> NodeRecordMB4D;
263
typedef BinSplit<MBLUR_NUM_OBJECT_BINS> Split;
264
typedef mvector<PrimRefMB>* PrimRefVector;
265
typedef SharedVector<mvector<PrimRefMB>> SharedPrimRefVector;
266
typedef LocalChildListT<BuildRecord,MAX_BRANCHING_FACTOR> LocalChildList;
267
typedef LocalChildListT<BuildRecordSplit,MAX_BRANCHING_FACTOR> LocalChildListSplit;
268
269
public:
270
271
BuilderT (MemoryMonitorInterface* device,
272
const RecalculatePrimRef recalculatePrimRef,
273
const CreateAllocFunc createAlloc,
274
const CreateNodeFunc createNode,
275
const SetNodeFunc setNode,
276
const CreateLeafFunc createLeaf,
277
const ProgressMonitor progressMonitor,
278
const Settings& settings)
279
: cfg(settings),
280
heuristicObjectSplit(),
281
heuristicTemporalSplit(device, recalculatePrimRef),
282
recalculatePrimRef(recalculatePrimRef), createAlloc(createAlloc), createNode(createNode), setNode(setNode), createLeaf(createLeaf),
283
progressMonitor(progressMonitor)
284
{
285
if (cfg.branchingFactor > MAX_BRANCHING_FACTOR)
286
throw_RTCError(RTC_ERROR_UNKNOWN,"bvh_builder: branching factor too large");
287
}
288
289
/*! finds the best split */
290
const Split find(const SetMB& set)
291
{
292
/* first try standard object split */
293
const Split object_split = heuristicObjectSplit.find(set,cfg.logBlockSize);
294
const float object_split_sah = object_split.splitSAH();
295
296
/* test temporal splits only when object split was bad */
297
const float leaf_sah = set.leafSAH(cfg.logBlockSize);
298
if (object_split_sah < 0.50f*leaf_sah)
299
return object_split;
300
301
/* do temporal splits only if the time range is big enough */
302
if (set.time_range.size() > 1.01f/float(set.max_num_time_segments))
303
{
304
const Split temporal_split = heuristicTemporalSplit.find(set,cfg.logBlockSize);
305
const float temporal_split_sah = temporal_split.splitSAH();
306
307
/* take temporal split if it improved SAH */
308
if (temporal_split_sah < object_split_sah)
309
return temporal_split;
310
}
311
312
return object_split;
313
}
314
315
/*! array partitioning */
316
__forceinline std::unique_ptr<mvector<PrimRefMB>> split(const Split& split, const SetMB& set, SetMB& lset, SetMB& rset)
317
{
318
/* perform object split */
319
if (likely(split.data == Split::SPLIT_OBJECT)) {
320
heuristicObjectSplit.split(split,set,lset,rset);
321
}
322
/* perform temporal split */
323
else if (likely(split.data == Split::SPLIT_TEMPORAL)) {
324
return heuristicTemporalSplit.split(split,set,lset,rset);
325
}
326
/* perform fallback split */
327
else if (unlikely(split.data == Split::SPLIT_FALLBACK)) {
328
set.deterministic_order();
329
splitFallback(set,lset,rset);
330
}
331
/* split by geometry */
332
else if (unlikely(split.data == Split::SPLIT_GEOMID)) {
333
set.deterministic_order();
334
splitByGeometry(set,lset,rset);
335
}
336
else
337
assert(false);
338
339
return std::unique_ptr<mvector<PrimRefMB>>();
340
}
341
342
/*! finds the best fallback split */
343
__noinline Split findFallback(const SetMB& set)
344
{
345
/* split if primitives are not from same geometry */
346
if (!sameGeometry(set))
347
return Split(0.0f,Split::SPLIT_GEOMID);
348
349
/* if a leaf can only hold a single time-segment, we might have to do additional temporal splits */
350
if (cfg.singleLeafTimeSegment)
351
{
352
/* test if one primitive has more than one time segment in time range, if so split time */
353
for (size_t i=set.begin(); i<set.end(); i++)
354
{
355
const PrimRefMB& prim = (*set.prims)[i];
356
const range<int> itime_range = prim.timeSegmentRange(set.time_range);
357
const int localTimeSegments = itime_range.size();
358
assert(localTimeSegments > 0);
359
if (localTimeSegments > 1) {
360
const int icenter = (itime_range.begin() + itime_range.end())/2;
361
const float splitTime = prim.timeStep(icenter);
362
return Split(0.0f,(unsigned)Split::SPLIT_TEMPORAL,0,splitTime);
363
}
364
}
365
}
366
367
/* otherwise return fallback split */
368
return Split(0.0f,Split::SPLIT_FALLBACK);
369
}
370
371
/*! performs fallback split */
372
void splitFallback(const SetMB& set, SetMB& lset, SetMB& rset)
373
{
374
mvector<PrimRefMB>& prims = *set.prims;
375
376
const size_t begin = set.begin();
377
const size_t end = set.end();
378
const size_t center = (begin + end + 1) / 2;
379
380
PrimInfoMB linfo = empty;
381
for (size_t i=begin; i<center; i++)
382
linfo.add_primref(prims[i]);
383
384
PrimInfoMB rinfo = empty;
385
for (size_t i=center; i<end; i++)
386
rinfo.add_primref(prims[i]);
387
388
new (&lset) SetMB(linfo,set.prims,range<size_t>(begin,center),set.time_range);
389
new (&rset) SetMB(rinfo,set.prims,range<size_t>(center,end ),set.time_range);
390
}
391
392
/*! checks if all primitives are from the same geometry */
393
__forceinline bool sameGeometry(const SetMB& set)
394
{
395
if (set.size() == 0) return true;
396
mvector<PrimRefMB>& prims = *set.prims;
397
const size_t begin = set.begin();
398
const size_t end = set.end();
399
unsigned int firstGeomID = prims[begin].geomID();
400
for (size_t i=begin+1; i<end; i++) {
401
if (prims[i].geomID() != firstGeomID){
402
return false;
403
}
404
}
405
return true;
406
}
407
408
/* split by geometry ID */
409
void splitByGeometry(const SetMB& set, SetMB& lset, SetMB& rset)
410
{
411
assert(set.size() > 1);
412
413
mvector<PrimRefMB>& prims = *set.prims;
414
const size_t begin = set.begin();
415
const size_t end = set.end();
416
417
PrimInfoMB left(empty);
418
PrimInfoMB right(empty);
419
unsigned int geomID = prims[begin].geomID();
420
size_t center = serial_partitioning(prims.data(),begin,end,left,right,
421
[&] ( const PrimRefMB& prim ) { return prim.geomID() == geomID; },
422
[ ] ( PrimInfoMB& dst, const PrimRefMB& prim ) { dst.add_primref(prim); });
423
424
new (&lset) SetMB(left, set.prims,range<size_t>(begin,center),set.time_range);
425
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
426
}
427
428
const NodeRecordMB4D createLargeLeaf(const BuildRecord& in, Allocator alloc)
429
{
430
/* this should never occur but is a fatal error */
431
if (in.depth > cfg.maxDepth)
432
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
433
434
/* replace already found split by fallback split */
435
const BuildRecordSplit current(BuildRecord(in.prims,in.depth),findFallback(in.prims));
436
437
/* special case when directly creating leaf without any splits that could shrink time_range */
438
bool force_split = false;
439
if (current.depth == 1 && current.size() > 0)
440
{
441
BBox1f c = empty;
442
BBox1f p = current.prims.time_range;
443
for (size_t i=current.prims.begin(); i<current.prims.end(); i++) {
444
mvector<PrimRefMB>& prims = *current.prims.prims;
445
c.extend(prims[i].time_range);
446
}
447
448
force_split = c.lower > p.lower || c.upper < p.upper;
449
}
450
451
/* create leaf for few primitives */
452
if (current.size() <= cfg.maxLeafSize && current.split.data < Split::SPLIT_ENFORCE && !force_split)
453
return createLeaf(current,alloc);
454
455
/* fill all children by always splitting the largest one */
456
bool hasTimeSplits = false;
457
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
458
LocalChildListSplit children(current);
459
460
do {
461
/* find best child with largest bounding box area */
462
size_t bestChild = -1;
463
size_t bestSize = 0;
464
for (size_t i=0; i<children.size(); i++)
465
{
466
/* ignore leaves as they cannot get split */
467
if (children[i].size() <= cfg.maxLeafSize && children[i].split.data < Split::SPLIT_ENFORCE && !force_split)
468
continue;
469
470
force_split = false;
471
472
/* remember child with largest size */
473
if (children[i].size() > bestSize) {
474
bestSize = children[i].size();
475
bestChild = i;
476
}
477
}
478
if (bestChild == -1) break;
479
480
/* perform best found split */
481
BuildRecordSplit& brecord = children[bestChild];
482
BuildRecordSplit lrecord(current.depth+1);
483
BuildRecordSplit rrecord(current.depth+1);
484
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(brecord.split,brecord.prims,lrecord.prims,rrecord.prims);
485
hasTimeSplits |= new_vector != nullptr;
486
487
/* find new splits */
488
lrecord.split = findFallback(lrecord.prims);
489
rrecord.split = findFallback(rrecord.prims);
490
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
491
492
} while (children.size() < cfg.branchingFactor);
493
494
/* detect time_ranges that have shrunken */
495
for (size_t i=0; i<children.size(); i++) {
496
const BBox1f c = children[i].prims.time_range;
497
const BBox1f p = in.prims.time_range;
498
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
499
}
500
501
/* create node */
502
auto node = createNode(children.children.data(),children.numChildren,alloc,hasTimeSplits);
503
504
/* recurse into each child and perform reduction */
505
LBBox3fa gbounds = empty;
506
for (size_t i=0; i<children.size(); i++) {
507
values[i] = createLargeLeaf(children[i],alloc);
508
gbounds.extend(values[i].lbounds);
509
}
510
511
setNode(current,children.children.data(),node,values,children.numChildren);
512
513
/* calculate geometry bounds of this node */
514
if (hasTimeSplits)
515
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
516
else
517
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
518
}
519
520
const NodeRecordMB4D recurse(const BuildRecord& current, Allocator alloc, bool toplevel)
521
{
522
/* get thread local allocator */
523
if (!alloc)
524
alloc = createAlloc();
525
526
/* call memory monitor function to signal progress */
527
if (toplevel && current.size() <= cfg.singleThreadThreshold)
528
progressMonitor(current.size());
529
530
/*! find best split */
531
const Split csplit = find(current.prims);
532
533
/*! compute leaf and split cost */
534
const float leafSAH = cfg.intCost*current.prims.leafSAH(cfg.logBlockSize);
535
const float splitSAH = cfg.travCost*current.prims.halfArea()+cfg.intCost*csplit.splitSAH();
536
assert((current.size() == 0) || ((leafSAH >= 0) && (splitSAH >= 0)));
537
538
/*! create a leaf node when threshold reached or SAH tells us to stop */
539
if (current.size() <= cfg.minLeafSize || current.depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || (current.size() <= cfg.maxLeafSize && leafSAH <= splitSAH)) {
540
current.prims.deterministic_order();
541
return createLargeLeaf(current,alloc);
542
}
543
544
/*! perform initial split */
545
SetMB lprims,rprims;
546
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,current.prims,lprims,rprims);
547
bool hasTimeSplits = new_vector != nullptr;
548
NodeRecordMB4D values[MAX_BRANCHING_FACTOR];
549
LocalChildList children(current);
550
{
551
BuildRecord lrecord(lprims,current.depth+1);
552
BuildRecord rrecord(rprims,current.depth+1);
553
children.split(0,lrecord,rrecord,std::move(new_vector));
554
}
555
556
/*! split until node is full or SAH tells us to stop */
557
while (children.size() < cfg.branchingFactor)
558
{
559
/*! find best child to split */
560
float bestArea = neg_inf;
561
ssize_t bestChild = -1;
562
for (size_t i=0; i<children.size(); i++)
563
{
564
if (children[i].size() <= cfg.minLeafSize) continue;
565
if (expectedApproxHalfArea(children[i].prims.geomBounds) > bestArea) {
566
bestChild = i; bestArea = expectedApproxHalfArea(children[i].prims.geomBounds);
567
}
568
}
569
if (bestChild == -1) break;
570
571
/* perform split */
572
BuildRecord& brecord = children[bestChild];
573
BuildRecord lrecord(current.depth+1);
574
BuildRecord rrecord(current.depth+1);
575
Split csplit = find(brecord.prims);
576
std::unique_ptr<mvector<PrimRefMB>> new_vector = split(csplit,brecord.prims,lrecord.prims,rrecord.prims);
577
hasTimeSplits |= new_vector != nullptr;
578
children.split(bestChild,lrecord,rrecord,std::move(new_vector));
579
}
580
581
/* detect time_ranges that have shrunken */
582
for (size_t i=0; i<children.size(); i++) {
583
const BBox1f c = children[i].prims.time_range;
584
const BBox1f p = current.prims.time_range;
585
hasTimeSplits |= c.lower > p.lower || c.upper < p.upper;
586
}
587
588
/* sort buildrecords for simpler shadow ray traversal */
589
//std::sort(&children[0],&children[children.size()],std::greater<BuildRecord>()); // FIXME: reduces traversal performance of bvh8.triangle4 (need to verified) !!
590
591
/*! create an inner node */
592
auto node = createNode(children.children.data(), children.numChildren, alloc, hasTimeSplits);
593
LBBox3fa gbounds = empty;
594
595
/* spawn tasks */
596
if (unlikely(current.size() > cfg.singleThreadThreshold))
597
{
598
/*! parallel_for is faster than spawning sub-tasks */
599
parallel_for(size_t(0), children.size(), [&] (const range<size_t>& r) {
600
for (size_t i=r.begin(); i<r.end(); i++) {
601
values[i] = recurse(children[i],nullptr,true);
602
_mm_mfence(); // to allow non-temporal stores during build
603
}
604
});
605
606
/*! merge bounding boxes */
607
for (size_t i=0; i<children.size(); i++)
608
gbounds.extend(values[i].lbounds);
609
}
610
/* recurse into each child */
611
else
612
{
613
//for (size_t i=0; i<children.size(); i++)
614
for (ssize_t i=children.size()-1; i>=0; i--) {
615
values[i] = recurse(children[i],alloc,false);
616
gbounds.extend(values[i].lbounds);
617
}
618
}
619
620
setNode(current,children.children.data(),node,values,children.numChildren);
621
622
/* calculate geometry bounds of this node */
623
if (unlikely(hasTimeSplits))
624
return NodeRecordMB4D(node,current.prims.linearBounds(recalculatePrimRef),current.prims.time_range);
625
else
626
return NodeRecordMB4D(node,gbounds,current.prims.time_range);
627
}
628
629
/*! builder entry function */
630
__forceinline const NodeRecordMB4D operator() (mvector<PrimRefMB>& prims, const PrimInfoMB& pinfo)
631
{
632
const SetMB set(pinfo,&prims);
633
auto ret = recurse(BuildRecord(set,1),nullptr,true);
634
_mm_mfence(); // to allow non-temporal stores during build
635
return ret;
636
}
637
638
private:
639
Settings cfg;
640
HeuristicArrayBinningMB<PrimRefMB,MBLUR_NUM_OBJECT_BINS> heuristicObjectSplit;
641
HeuristicMBlurTemporalSplit<PrimRefMB,RecalculatePrimRef,MBLUR_NUM_TEMPORAL_BINS> heuristicTemporalSplit;
642
const RecalculatePrimRef recalculatePrimRef;
643
const CreateAllocFunc createAlloc;
644
const CreateNodeFunc createNode;
645
const SetNodeFunc setNode;
646
const CreateLeafFunc createLeaf;
647
const ProgressMonitor progressMonitor;
648
};
649
650
template<typename NodeRef,
651
typename RecalculatePrimRef,
652
typename CreateAllocFunc,
653
typename CreateNodeFunc,
654
typename SetNodeFunc,
655
typename CreateLeafFunc,
656
typename ProgressMonitorFunc>
657
658
static const BVHNodeRecordMB4D<NodeRef> build(mvector<PrimRefMB>& prims,
659
const PrimInfoMB& pinfo,
660
MemoryMonitorInterface* device,
661
const RecalculatePrimRef recalculatePrimRef,
662
const CreateAllocFunc createAlloc,
663
const CreateNodeFunc createNode,
664
const SetNodeFunc setNode,
665
const CreateLeafFunc createLeaf,
666
const ProgressMonitorFunc progressMonitor,
667
const Settings& settings)
668
{
669
typedef BuilderT<
670
NodeRef,
671
RecalculatePrimRef,
672
decltype(createAlloc()),
673
CreateAllocFunc,
674
CreateNodeFunc,
675
SetNodeFunc,
676
CreateLeafFunc,
677
ProgressMonitorFunc> Builder;
678
679
Builder builder(device,
680
recalculatePrimRef,
681
createAlloc,
682
createNode,
683
setNode,
684
createLeaf,
685
progressMonitor,
686
settings);
687
688
689
return builder(prims,pinfo);
690
}
691
};
692
}
693
}
694
695