Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/builders/bvh_builder_hair.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "../bvh/bvh.h"
7
#include "../geometry/primitive.h"
8
#include "../builders/bvh_builder_sah.h"
9
#include "../builders/heuristic_binning_array_aligned.h"
10
#include "../builders/heuristic_binning_array_unaligned.h"
11
#include "../builders/heuristic_strand_array.h"
12
13
#define NUM_HAIR_OBJECT_BINS 32
14
15
namespace embree
16
{
17
namespace isa
18
{
19
struct BVHBuilderHair
20
{
21
/*! settings for builder */
22
struct Settings
23
{
24
/*! default settings */
25
Settings ()
26
: branchingFactor(2), maxDepth(32), logBlockSize(0), minLeafSize(1), maxLeafSize(7), finished_range_threshold(inf) {}
27
28
public:
29
size_t branchingFactor; //!< branching factor of BVH to build
30
size_t maxDepth; //!< maximum depth of BVH to build
31
size_t logBlockSize; //!< log2 of blocksize for SAH heuristic
32
size_t minLeafSize; //!< minimum size of a leaf
33
size_t maxLeafSize; //!< maximum size of a leaf
34
size_t finished_range_threshold; //!< finished range threshold
35
};
36
37
template<typename NodeRef,
38
typename CreateAllocFunc,
39
typename CreateAABBNodeFunc,
40
typename SetAABBNodeFunc,
41
typename CreateOBBNodeFunc,
42
typename SetOBBNodeFunc,
43
typename CreateLeafFunc,
44
typename ProgressMonitor,
45
typename ReportFinishedRangeFunc>
46
47
class BuilderT
48
{
49
ALIGNED_CLASS_(16);
50
friend struct BVHBuilderHair;
51
52
typedef FastAllocator::CachedAllocator Allocator;
53
typedef HeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> HeuristicBinningSAH;
54
typedef UnalignedHeuristicArrayBinningSAH<PrimRef,NUM_HAIR_OBJECT_BINS> UnalignedHeuristicBinningSAH;
55
typedef HeuristicStrandSplit HeuristicStrandSplitSAH;
56
57
static const size_t MAX_BRANCHING_FACTOR = 8; //!< maximum supported BVH branching factor
58
static const size_t MIN_LARGE_LEAF_LEVELS = 8; //!< create balanced tree if we are that many levels before the maximum tree depth
59
static const size_t SINGLE_THREADED_THRESHOLD = 4096; //!< threshold to switch to single threaded build
60
61
static const size_t travCostAligned = 1;
62
static const size_t travCostUnaligned = 5;
63
static const size_t intCost = 6;
64
65
BuilderT (Scene* scene,
66
PrimRef* prims,
67
const CreateAllocFunc& createAlloc,
68
const CreateAABBNodeFunc& createAABBNode,
69
const SetAABBNodeFunc& setAABBNode,
70
const CreateOBBNodeFunc& createOBBNode,
71
const SetOBBNodeFunc& setOBBNode,
72
const CreateLeafFunc& createLeaf,
73
const ProgressMonitor& progressMonitor,
74
const ReportFinishedRangeFunc& reportFinishedRange,
75
const Settings settings)
76
77
: cfg(settings),
78
prims(prims),
79
createAlloc(createAlloc),
80
createAABBNode(createAABBNode),
81
setAABBNode(setAABBNode),
82
createOBBNode(createOBBNode),
83
setOBBNode(setOBBNode),
84
createLeaf(createLeaf),
85
progressMonitor(progressMonitor),
86
reportFinishedRange(reportFinishedRange),
87
alignedHeuristic(prims), unalignedHeuristic(scene,prims), strandHeuristic(scene,prims) {}
88
89
/*! checks if all primitives are from the same geometry */
90
__forceinline bool sameGeometry(const PrimInfoRange& range)
91
{
92
if (range.size() == 0) return true;
93
unsigned int firstGeomID = prims[range.begin()].geomID();
94
for (size_t i=range.begin()+1; i<range.end(); i++) {
95
if (prims[i].geomID() != firstGeomID){
96
return false;
97
}
98
}
99
return true;
100
}
101
102
/*! creates a large leaf that could be larger than supported by the BVH */
103
NodeRef createLargeLeaf(size_t depth, const PrimInfoRange& pinfo, Allocator alloc)
104
{
105
/* this should never occur but is a fatal error */
106
if (depth > cfg.maxDepth)
107
throw_RTCError(RTC_ERROR_UNKNOWN,"depth limit reached");
108
109
/* create leaf for few primitives */
110
if (pinfo.size() <= cfg.maxLeafSize && sameGeometry(pinfo))
111
return createLeaf(prims,pinfo,alloc);
112
113
/* fill all children by always splitting the largest one */
114
PrimInfoRange children[MAX_BRANCHING_FACTOR];
115
unsigned numChildren = 1;
116
children[0] = pinfo;
117
118
do {
119
120
/* find best child with largest bounding box area */
121
int bestChild = -1;
122
size_t bestSize = 0;
123
for (unsigned i=0; i<numChildren; i++)
124
{
125
/* ignore leaves as they cannot get split */
126
if (children[i].size() <= cfg.maxLeafSize && sameGeometry(children[i]))
127
continue;
128
129
/* remember child with largest size */
130
if (children[i].size() > bestSize) {
131
bestSize = children[i].size();
132
bestChild = i;
133
}
134
}
135
if (bestChild == -1) break;
136
137
/*! split best child into left and right child */
138
__aligned(64) PrimInfoRange left, right;
139
if (!sameGeometry(children[bestChild])) {
140
alignedHeuristic.splitByGeometry(children[bestChild],left,right);
141
} else {
142
alignedHeuristic.splitFallback(children[bestChild],left,right);
143
}
144
145
/* add new children left and right */
146
children[bestChild] = children[numChildren-1];
147
children[numChildren-1] = left;
148
children[numChildren+0] = right;
149
numChildren++;
150
151
} while (numChildren < cfg.branchingFactor);
152
153
/* create node */
154
auto node = createAABBNode(alloc);
155
156
for (size_t i=0; i<numChildren; i++) {
157
const NodeRef child = createLargeLeaf(depth+1,children[i],alloc);
158
setAABBNode(node,i,child,children[i].geomBounds);
159
}
160
161
return node;
162
}
163
164
/*! performs split */
165
__noinline void split(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo, bool& aligned) // FIXME: not inlined as ICC otherwise uses much stack
166
{
167
/* variable to track the SAH of the best splitting approach */
168
float bestSAH = inf;
169
const size_t blocks = (pinfo.size()+(1ull<<cfg.logBlockSize)-1ull) >> cfg.logBlockSize;
170
const float leafSAH = intCost*float(blocks)*halfArea(pinfo.geomBounds);
171
172
/* try standard binning in aligned space */
173
float alignedObjectSAH = inf;
174
HeuristicBinningSAH::Split alignedObjectSplit;
175
if (aligned) {
176
alignedObjectSplit = alignedHeuristic.find(pinfo,cfg.logBlockSize);
177
alignedObjectSAH = travCostAligned*halfArea(pinfo.geomBounds) + intCost*alignedObjectSplit.splitSAH();
178
bestSAH = min(alignedObjectSAH,bestSAH);
179
}
180
181
/* try standard binning in unaligned space */
182
UnalignedHeuristicBinningSAH::Split unalignedObjectSplit;
183
LinearSpace3fa uspace;
184
float unalignedObjectSAH = inf;
185
if (bestSAH > 0.7f*leafSAH) {
186
uspace = unalignedHeuristic.computeAlignedSpace(pinfo);
187
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(pinfo,uspace);
188
unalignedObjectSplit = unalignedHeuristic.find(sinfo,cfg.logBlockSize,uspace);
189
unalignedObjectSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*unalignedObjectSplit.splitSAH();
190
bestSAH = min(unalignedObjectSAH,bestSAH);
191
}
192
193
/* try splitting into two strands */
194
HeuristicStrandSplitSAH::Split strandSplit;
195
float strandSAH = inf;
196
if (bestSAH > 0.7f*leafSAH && pinfo.size() <= 256) {
197
strandSplit = strandHeuristic.find(pinfo,cfg.logBlockSize);
198
strandSAH = travCostUnaligned*halfArea(pinfo.geomBounds) + intCost*strandSplit.splitSAH();
199
bestSAH = min(strandSAH,bestSAH);
200
}
201
202
/* fallback if SAH heuristics failed */
203
if (unlikely(!std::isfinite(bestSAH)))
204
{
205
alignedHeuristic.deterministic_order(pinfo);
206
alignedHeuristic.splitFallback(pinfo,linfo,rinfo);
207
}
208
209
/* perform aligned split if this is best */
210
else if (bestSAH == alignedObjectSAH) {
211
alignedHeuristic.split(alignedObjectSplit,pinfo,linfo,rinfo);
212
}
213
214
/* perform unaligned split if this is best */
215
else if (bestSAH == unalignedObjectSAH) {
216
unalignedHeuristic.split(unalignedObjectSplit,uspace,pinfo,linfo,rinfo);
217
aligned = false;
218
}
219
220
/* perform strand split if this is best */
221
else if (bestSAH == strandSAH) {
222
strandHeuristic.split(strandSplit,pinfo,linfo,rinfo);
223
aligned = false;
224
}
225
226
/* can never happen */
227
else
228
assert(false);
229
}
230
231
/*! recursive build */
232
NodeRef recurse(size_t depth, const PrimInfoRange& pinfo, Allocator alloc, bool toplevel, bool alloc_barrier)
233
{
234
/* get thread local allocator */
235
if (!alloc)
236
alloc = createAlloc();
237
238
/* call memory monitor function to signal progress */
239
if (toplevel && pinfo.size() <= SINGLE_THREADED_THRESHOLD)
240
progressMonitor(pinfo.size());
241
242
PrimInfoRange children[MAX_BRANCHING_FACTOR];
243
244
/* create leaf node */
245
if (depth+MIN_LARGE_LEAF_LEVELS >= cfg.maxDepth || pinfo.size() <= cfg.minLeafSize) {
246
alignedHeuristic.deterministic_order(pinfo);
247
return createLargeLeaf(depth,pinfo,alloc);
248
}
249
250
/* fill all children by always splitting the one with the largest surface area */
251
size_t numChildren = 1;
252
children[0] = pinfo;
253
bool aligned = true;
254
255
do {
256
257
/* find best child with largest bounding box area */
258
ssize_t bestChild = -1;
259
float bestArea = neg_inf;
260
for (size_t i=0; i<numChildren; i++)
261
{
262
/* ignore leaves as they cannot get split */
263
if (children[i].size() <= cfg.minLeafSize)
264
continue;
265
266
/* remember child with largest area */
267
if (area(children[i].geomBounds) > bestArea) {
268
bestArea = area(children[i].geomBounds);
269
bestChild = i;
270
}
271
}
272
if (bestChild == -1) break;
273
274
/*! split best child into left and right child */
275
PrimInfoRange left, right;
276
split(children[bestChild],left,right,aligned);
277
278
/* add new children left and right */
279
children[bestChild] = children[numChildren-1];
280
children[numChildren-1] = left;
281
children[numChildren+0] = right;
282
numChildren++;
283
284
} while (numChildren < cfg.branchingFactor);
285
286
NodeRef node;
287
288
/* create aligned node */
289
if (aligned)
290
{
291
node = createAABBNode(alloc);
292
293
/* spawn tasks or ... */
294
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
295
{
296
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
297
for (size_t i=r.begin(); i<r.end(); i++) {
298
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
299
setAABBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),children[i].geomBounds);
300
_mm_mfence(); // to allow non-temporal stores during build
301
}
302
});
303
}
304
/* ... continue sequentially */
305
else {
306
for (size_t i=0; i<numChildren; i++) {
307
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
308
setAABBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),children[i].geomBounds);
309
}
310
}
311
}
312
313
/* create unaligned node */
314
else
315
{
316
node = createOBBNode(alloc);
317
318
/* spawn tasks or ... */
319
if (pinfo.size() > SINGLE_THREADED_THRESHOLD)
320
{
321
parallel_for(size_t(0), numChildren, [&] (const range<size_t>& r) {
322
for (size_t i=r.begin(); i<r.end(); i++) {
323
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
324
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
325
const OBBox3fa obounds(space,sinfo.geomBounds);
326
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
327
setOBBNode(node,i,recurse(depth+1,children[i],nullptr,true,child_alloc_barrier),obounds);
328
_mm_mfence(); // to allow non-temporal stores during build
329
}
330
});
331
}
332
/* ... continue sequentially */
333
else
334
{
335
for (size_t i=0; i<numChildren; i++) {
336
const LinearSpace3fa space = unalignedHeuristic.computeAlignedSpace(children[i]);
337
const PrimInfoRange sinfo = unalignedHeuristic.computePrimInfo(children[i],space);
338
const OBBox3fa obounds(space,sinfo.geomBounds);
339
const bool child_alloc_barrier = pinfo.size() > cfg.finished_range_threshold && children[i].size() <= cfg.finished_range_threshold;
340
setOBBNode(node,i,recurse(depth+1,children[i],alloc,false,child_alloc_barrier),obounds);
341
}
342
}
343
}
344
345
/* reports a finished range of primrefs */
346
if (unlikely(alloc_barrier))
347
reportFinishedRange(pinfo);
348
349
return node;
350
}
351
352
private:
353
Settings cfg;
354
PrimRef* prims;
355
const CreateAllocFunc& createAlloc;
356
const CreateAABBNodeFunc& createAABBNode;
357
const SetAABBNodeFunc& setAABBNode;
358
const CreateOBBNodeFunc& createOBBNode;
359
const SetOBBNodeFunc& setOBBNode;
360
const CreateLeafFunc& createLeaf;
361
const ProgressMonitor& progressMonitor;
362
const ReportFinishedRangeFunc& reportFinishedRange;
363
364
private:
365
HeuristicBinningSAH alignedHeuristic;
366
UnalignedHeuristicBinningSAH unalignedHeuristic;
367
HeuristicStrandSplitSAH strandHeuristic;
368
};
369
370
template<typename NodeRef,
371
typename CreateAllocFunc,
372
typename CreateAABBNodeFunc,
373
typename SetAABBNodeFunc,
374
typename CreateOBBNodeFunc,
375
typename SetOBBNodeFunc,
376
typename CreateLeafFunc,
377
typename ProgressMonitor,
378
typename ReportFinishedRangeFunc>
379
380
static NodeRef build (const CreateAllocFunc& createAlloc,
381
const CreateAABBNodeFunc& createAABBNode,
382
const SetAABBNodeFunc& setAABBNode,
383
const CreateOBBNodeFunc& createOBBNode,
384
const SetOBBNodeFunc& setOBBNode,
385
const CreateLeafFunc& createLeaf,
386
const ProgressMonitor& progressMonitor,
387
const ReportFinishedRangeFunc& reportFinishedRange,
388
Scene* scene,
389
PrimRef* prims,
390
const PrimInfo& pinfo,
391
const Settings settings)
392
{
393
typedef BuilderT<NodeRef,
394
CreateAllocFunc,
395
CreateAABBNodeFunc,SetAABBNodeFunc,
396
CreateOBBNodeFunc,SetOBBNodeFunc,
397
CreateLeafFunc,ProgressMonitor,
398
ReportFinishedRangeFunc> Builder;
399
400
Builder builder(scene,prims,createAlloc,
401
createAABBNode,setAABBNode,
402
createOBBNode,setOBBNode,
403
createLeaf,progressMonitor,reportFinishedRange,settings);
404
405
NodeRef root = builder.recurse(1,pinfo,nullptr,true,false);
406
_mm_mfence(); // to allow non-temporal stores during build
407
return root;
408
}
409
};
410
}
411
}
412
413