Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/kernels/builders/heuristic_openmerge_array.h
9906 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
// TODO:
5
// - adjust parallel build thresholds
6
// - openNodesBasedOnExtend should consider max extended size
7
8
#pragma once
9
10
#include "heuristic_binning.h"
11
#include "heuristic_spatial.h"
12
13
/* stop opening of all bref.geomIDs are the same */
14
#define EQUAL_GEOMID_STOP_CRITERIA 1
15
16
/* 10% spatial extend threshold */
17
#define MAX_EXTEND_THRESHOLD 0.1f
18
19
/* maximum is 8 children */
20
#define MAX_OPENED_CHILD_NODES 8
21
22
/* open until all build refs are below threshold size in one step */
23
#define USE_LOOP_OPENING 0
24
25
namespace embree
26
{
27
namespace isa
28
{
29
/*! Performs standard object binning */
30
template<typename NodeOpenerFunc, typename PrimRef, size_t OBJECT_BINS>
31
struct HeuristicArrayOpenMergeSAH
32
{
33
typedef BinSplit<OBJECT_BINS> Split;
34
typedef BinInfoT<OBJECT_BINS,PrimRef,BBox3fa> Binner;
35
36
static const size_t PARALLEL_THRESHOLD = 1024;
37
static const size_t PARALLEL_FIND_BLOCK_SIZE = 512;
38
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
39
40
static const size_t MOVE_STEP_SIZE = 64;
41
static const size_t CREATE_SPLITS_STEP_SIZE = 128;
42
43
__forceinline HeuristicArrayOpenMergeSAH ()
44
: prims0(nullptr) {}
45
46
/*! remember prim array */
47
__forceinline HeuristicArrayOpenMergeSAH (const NodeOpenerFunc& nodeOpenerFunc, PrimRef* prims0, size_t max_open_size)
48
: prims0(prims0), nodeOpenerFunc(nodeOpenerFunc), max_open_size(max_open_size)
49
{
50
assert(max_open_size <= MAX_OPENED_CHILD_NODES);
51
}
52
53
struct OpenHeuristic
54
{
55
__forceinline OpenHeuristic( const PrimInfoExtRange& pinfo )
56
{
57
const Vec3fa diag = pinfo.geomBounds.size();
58
dim = maxDim(diag);
59
assert(diag[dim] > 0.0f);
60
inv_max_extend = 1.0f / diag[dim];
61
}
62
63
__forceinline bool operator () ( PrimRef& prim ) const {
64
return !prim.node.isLeaf() && prim.bounds().size()[dim] * inv_max_extend > MAX_EXTEND_THRESHOLD;
65
}
66
67
private:
68
size_t dim;
69
float inv_max_extend;
70
};
71
72
/*! compute extended ranges */
73
__forceinline void setExtentedRanges(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset, const size_t lweight, const size_t rweight)
74
{
75
assert(set.ext_range_size() > 0);
76
const float left_factor = (float)lweight / (lweight + rweight);
77
const size_t ext_range_size = set.ext_range_size();
78
const size_t left_ext_range_size = min((size_t)(floorf(left_factor * ext_range_size)),ext_range_size);
79
const size_t right_ext_range_size = ext_range_size - left_ext_range_size;
80
lset.set_ext_range(lset.end() + left_ext_range_size);
81
rset.set_ext_range(rset.end() + right_ext_range_size);
82
}
83
84
/*! move ranges */
85
__forceinline void moveExtentedRange(const PrimInfoExtRange& set, const PrimInfoExtRange& lset, PrimInfoExtRange& rset)
86
{
87
const size_t left_ext_range_size = lset.ext_range_size();
88
const size_t right_size = rset.size();
89
90
/* has the left child an extended range? */
91
if (left_ext_range_size > 0)
92
{
93
/* left extended range smaller than right range ? */
94
if (left_ext_range_size < right_size)
95
{
96
/* only move a small part of the beginning of the right range to the end */
97
parallel_for( rset.begin(), rset.begin()+left_ext_range_size, MOVE_STEP_SIZE, [&](const range<size_t>& r) {
98
for (size_t i=r.begin(); i<r.end(); i++)
99
prims0[i+right_size] = prims0[i];
100
});
101
}
102
else
103
{
104
/* no overlap, move entire right range to new location, can be made fully parallel */
105
parallel_for( rset.begin(), rset.end(), MOVE_STEP_SIZE, [&](const range<size_t>& r) {
106
for (size_t i=r.begin(); i<r.end(); i++)
107
prims0[i+left_ext_range_size] = prims0[i];
108
});
109
}
110
/* update right range */
111
assert(rset.ext_end() + left_ext_range_size == set.ext_end());
112
rset.move_right(left_ext_range_size);
113
}
114
}
115
116
/* estimates the extra space required when opening, and checks if all primitives are from same geometry */
117
__noinline std::pair<size_t,bool> getProperties(const PrimInfoExtRange& set)
118
{
119
const OpenHeuristic heuristic(set);
120
const unsigned int geomID = prims0[set.begin()].geomID();
121
122
auto body = [&] (const range<size_t>& r) -> std::pair<size_t,bool> {
123
bool commonGeomID = true;
124
size_t opens = 0;
125
for (size_t i=r.begin(); i<r.end(); i++) {
126
commonGeomID &= prims0[i].geomID() == geomID;
127
if (heuristic(prims0[i]))
128
opens += prims0[i].node.getN()-1; // coarse approximation
129
}
130
return std::pair<size_t,bool>(opens,commonGeomID);
131
};
132
auto reduction = [&] (const std::pair<size_t,bool>& b0, const std::pair<size_t,bool>& b1) -> std::pair<size_t,bool> {
133
return std::pair<size_t,bool>(b0.first+b1.first,b0.second && b1.second);
134
};
135
return parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,PARALLEL_THRESHOLD,std::pair<size_t,bool>(0,true),body,reduction);
136
}
137
138
// FIXME: should consider maximum available extended size
139
__noinline void openNodesBasedOnExtend(PrimInfoExtRange& set)
140
{
141
const OpenHeuristic heuristic(set);
142
const size_t ext_range_start = set.end();
143
144
if (false && set.size() < PARALLEL_THRESHOLD)
145
{
146
size_t extra_elements = 0;
147
for (size_t i=set.begin(); i<set.end(); i++)
148
{
149
if (heuristic(prims0[i]))
150
{
151
PrimRef tmp[MAX_OPENED_CHILD_NODES];
152
const size_t n = nodeOpenerFunc(prims0[i],tmp);
153
assert(extra_elements + n-1 <= set.ext_range_size());
154
for (size_t j=0; j<n; j++)
155
set.extend_center2(tmp[j]);
156
157
prims0[i] = tmp[0];
158
for (size_t j=1; j<n; j++)
159
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
160
extra_elements += n-1;
161
}
162
}
163
set._end += extra_elements;
164
}
165
else
166
{
167
std::atomic<size_t> ext_elements;
168
ext_elements.store(0);
169
PrimInfo info = parallel_reduce( set.begin(), set.end(), CREATE_SPLITS_STEP_SIZE, PrimInfo(empty), [&](const range<size_t>& r) -> PrimInfo {
170
PrimInfo info(empty);
171
for (size_t i=r.begin(); i<r.end(); i++)
172
if (heuristic(prims0[i]))
173
{
174
PrimRef tmp[MAX_OPENED_CHILD_NODES];
175
const size_t n = nodeOpenerFunc(prims0[i],tmp);
176
const size_t ID = ext_elements.fetch_add(n-1);
177
assert(ID + n-1 <= set.ext_range_size());
178
179
for (size_t j=0; j<n; j++)
180
info.extend_center2(tmp[j]);
181
182
prims0[i] = tmp[0];
183
for (size_t j=1; j<n; j++)
184
prims0[ext_range_start+ID+j-1] = tmp[j];
185
}
186
return info;
187
}, [] (const PrimInfo& a, const PrimInfo& b) { return PrimInfo::merge(a,b); });
188
set.centBounds.extend(info.centBounds);
189
assert(ext_elements.load() <= set.ext_range_size());
190
set._end += ext_elements.load();
191
}
192
}
193
194
__noinline void openNodesBasedOnExtendLoop(PrimInfoExtRange& set, const size_t est_new_elements)
195
{
196
const OpenHeuristic heuristic(set);
197
size_t next_iteration_extra_elements = est_new_elements;
198
199
while (next_iteration_extra_elements <= set.ext_range_size())
200
{
201
next_iteration_extra_elements = 0;
202
size_t extra_elements = 0;
203
const size_t ext_range_start = set.end();
204
205
for (size_t i=set.begin(); i<set.end(); i++)
206
{
207
if (heuristic(prims0[i]))
208
{
209
PrimRef tmp[MAX_OPENED_CHILD_NODES];
210
const size_t n = nodeOpenerFunc(prims0[i],tmp);
211
assert(extra_elements + n-1 <= set.ext_range_size());
212
for (size_t j=0;j<n;j++)
213
set.extend_center2(tmp[j]);
214
215
prims0[i] = tmp[0];
216
for (size_t j=1;j<n;j++)
217
prims0[ext_range_start+extra_elements+j-1] = tmp[j];
218
extra_elements += n-1;
219
220
for (size_t j=0; j<n; j++)
221
if (heuristic(tmp[j]))
222
next_iteration_extra_elements += tmp[j].node.getN()-1; // coarse approximation
223
224
}
225
}
226
assert( extra_elements <= set.ext_range_size());
227
set._end += extra_elements;
228
229
for (size_t i=set.begin();i<set.end();i++)
230
assert(prims0[i].numPrimitives() > 0);
231
232
if (unlikely(next_iteration_extra_elements == 0)) break;
233
}
234
}
235
236
__noinline const Split find(PrimInfoExtRange& set, const size_t logBlockSize)
237
{
238
/* single element */
239
if (set.size() <= 1)
240
return Split();
241
242
/* disable opening if there is no overlap */
243
const size_t D = 4;
244
if (unlikely(set.has_ext_range() && set.size() <= D))
245
{
246
bool disjoint = true;
247
for (size_t j=set.begin(); j<set.end()-1; j++) {
248
for (size_t i=set.begin()+1; i<set.end(); i++) {
249
if (conjoint(prims0[j].bounds(),prims0[i].bounds())) {
250
disjoint = false; break;
251
}
252
}
253
}
254
if (disjoint) set.set_ext_range(set.end()); /* disables opening */
255
}
256
257
std::pair<size_t,bool> p(0,false);
258
259
/* disable opening when all primitives are from same geometry */
260
if (unlikely(set.has_ext_range()))
261
{
262
p = getProperties(set);
263
#if EQUAL_GEOMID_STOP_CRITERIA == 1
264
if (p.second) set.set_ext_range(set.end()); /* disable opening */
265
#endif
266
}
267
268
/* open nodes when we have sufficient space available */
269
if (unlikely(set.has_ext_range()))
270
{
271
#if USE_LOOP_OPENING == 1
272
openNodesBasedOnExtendLoop(set,p.first);
273
#else
274
if (p.first <= set.ext_range_size())
275
openNodesBasedOnExtend(set);
276
#endif
277
278
/* disable opening when insufficient space for opening a node available */
279
if (set.ext_range_size() < max_open_size-1)
280
set.set_ext_range(set.end()); /* disable opening */
281
}
282
283
/* find best split */
284
return object_find(set,logBlockSize);
285
}
286
287
288
/*! finds the best object split */
289
__forceinline const Split object_find(const PrimInfoExtRange& set,const size_t logBlockSize)
290
{
291
if (set.size() < PARALLEL_THRESHOLD) return sequential_object_find(set,logBlockSize);
292
else return parallel_object_find (set,logBlockSize);
293
}
294
295
/*! finds the best object split */
296
__noinline const Split sequential_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
297
{
298
Binner binner(empty);
299
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
300
binner.bin(prims0,set.begin(),set.end(),mapping);
301
return binner.best(mapping,logBlockSize);
302
}
303
304
/*! finds the best split */
305
__noinline const Split parallel_object_find(const PrimInfoExtRange& set, const size_t logBlockSize)
306
{
307
Binner binner(empty);
308
const BinMapping<OBJECT_BINS> mapping(set.centBounds);
309
const BinMapping<OBJECT_BINS>& _mapping = mapping; // CLANG 3.4 parser bug workaround
310
auto body = [&] (const range<size_t>& r) -> Binner {
311
Binner binner(empty); binner.bin(prims0+r.begin(),r.size(),_mapping); return binner;
312
};
313
auto reduction = [&] (const Binner& b0, const Binner& b1) -> Binner {
314
Binner r = b0; r.merge(b1,_mapping.size()); return r;
315
};
316
binner = parallel_reduce(set.begin(),set.end(),PARALLEL_FIND_BLOCK_SIZE,binner,body,reduction);
317
return binner.best(mapping,logBlockSize);
318
}
319
320
/*! array partitioning */
321
__noinline void split(const Split& split, const PrimInfoExtRange& set_i, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
322
{
323
PrimInfoExtRange set = set_i;
324
325
/* valid split */
326
if (unlikely(!split.valid())) {
327
deterministic_order(set);
328
splitFallback(set,lset,rset);
329
return;
330
}
331
332
std::pair<size_t,size_t> ext_weights(0,0);
333
334
/* object split */
335
if (likely(set.size() < PARALLEL_THRESHOLD))
336
ext_weights = sequential_object_split(split,set,lset,rset);
337
else
338
ext_weights = parallel_object_split(split,set,lset,rset);
339
340
/* if we have an extended range, set extended child ranges and move right split range */
341
if (unlikely(set.has_ext_range()))
342
{
343
setExtentedRanges(set,lset,rset,ext_weights.first,ext_weights.second);
344
moveExtentedRange(set,lset,rset);
345
}
346
}
347
348
/*! array partitioning */
349
std::pair<size_t,size_t> sequential_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
350
{
351
const size_t begin = set.begin();
352
const size_t end = set.end();
353
PrimInfo local_left(empty);
354
PrimInfo local_right(empty);
355
const unsigned int splitPos = split.pos;
356
const unsigned int splitDim = split.dim;
357
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
358
359
const vint4 vSplitPos(splitPos);
360
const vbool4 vSplitMask( (int)splitDimMask );
361
362
size_t center = serial_partitioning(prims0,
363
begin,end,local_left,local_right,
364
[&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); },
365
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); });
366
367
new (&lset) PrimInfoExtRange(begin,center,center,local_left);
368
new (&rset) PrimInfoExtRange(center,end,end,local_right);
369
assert(area(lset.geomBounds) >= 0.0f);
370
assert(area(rset.geomBounds) >= 0.0f);
371
return std::pair<size_t,size_t>(local_left.size(),local_right.size());
372
}
373
374
/*! array partitioning */
375
__noinline std::pair<size_t,size_t> parallel_object_split(const Split& split, const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
376
{
377
const size_t begin = set.begin();
378
const size_t end = set.end();
379
PrimInfo left(empty);
380
PrimInfo right(empty);
381
const unsigned int splitPos = split.pos;
382
const unsigned int splitDim = split.dim;
383
const unsigned int splitDimMask = (unsigned int)1 << splitDim;
384
385
const vint4 vSplitPos(splitPos);
386
const vbool4 vSplitMask( (int)splitDimMask );
387
auto isLeft = [&] (const PrimRef& ref) { return split.mapping.bin_unsafe(ref,vSplitPos,vSplitMask); };
388
389
const size_t center = parallel_partitioning(
390
prims0,begin,end,EmptyTy(),left,right,isLeft,
391
[] (PrimInfo& pinfo,const PrimRef& ref) { pinfo.add_center2(ref); },
392
[] (PrimInfo& pinfo0,const PrimInfo& pinfo1) { pinfo0.merge(pinfo1); },
393
PARALLEL_PARTITION_BLOCK_SIZE);
394
395
new (&lset) PrimInfoExtRange(begin,center,center,left);
396
new (&rset) PrimInfoExtRange(center,end,end,right);
397
assert(area(lset.geomBounds) >= 0.0f);
398
assert(area(rset.geomBounds) >= 0.0f);
399
400
return std::pair<size_t,size_t>(left.size(),right.size());
401
}
402
403
void deterministic_order(const extended_range<size_t>& set)
404
{
405
/* required as parallel partition destroys original primitive order */
406
std::sort(&prims0[set.begin()],&prims0[set.end()]);
407
}
408
409
__forceinline void splitFallback(const PrimInfoExtRange& set, PrimInfoExtRange& lset, PrimInfoExtRange& rset)
410
{
411
const size_t begin = set.begin();
412
const size_t end = set.end();
413
const size_t center = (begin + end)/2;
414
415
PrimInfo left(empty);
416
for (size_t i=begin; i<center; i++)
417
left.add_center2(prims0[i]);
418
419
const size_t lweight = left.end;
420
421
PrimInfo right(empty);
422
for (size_t i=center; i<end; i++)
423
right.add_center2(prims0[i]);
424
425
const size_t rweight = right.end;
426
new (&lset) PrimInfoExtRange(begin,center,center,left);
427
new (&rset) PrimInfoExtRange(center,end,end,right);
428
429
/* if we have an extended range */
430
if (set.has_ext_range())
431
{
432
setExtentedRanges(set,lset,rset,lweight,rweight);
433
moveExtentedRange(set,lset,rset);
434
}
435
}
436
437
private:
438
PrimRef* const prims0;
439
const NodeOpenerFunc& nodeOpenerFunc;
440
size_t max_open_size;
441
};
442
}
443
}
444
445