Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/algorithms/parallel_partition.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "parallel_for.h"
7
#include "../math/range.h"
8
9
namespace embree
10
{
11
/* serial partitioning */
12
template<typename T, typename V, typename IsLeft, typename Reduction_T>
13
__forceinline size_t serial_partitioning(T* array,
14
const size_t begin,
15
const size_t end,
16
V& leftReduction,
17
V& rightReduction,
18
const IsLeft& is_left,
19
const Reduction_T& reduction_t)
20
{
21
T* l = array + begin;
22
T* r = array + end - 1;
23
24
while(1)
25
{
26
/* *l < pivot */
27
while (likely(l <= r && is_left(*l) ))
28
{
29
//prefetchw(l+4); // FIXME: enable?
30
reduction_t(leftReduction,*l);
31
++l;
32
}
33
/* *r >= pivot) */
34
while (likely(l <= r && !is_left(*r)))
35
{
36
//prefetchw(r-4); FIXME: enable?
37
reduction_t(rightReduction,*r);
38
--r;
39
}
40
if (r<l) break;
41
42
reduction_t(leftReduction ,*r);
43
reduction_t(rightReduction,*l);
44
xchg(*l,*r);
45
l++; r--;
46
}
47
48
return l - array;
49
}
50
51
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
52
class __aligned(64) parallel_partition_task
53
{
54
ALIGNED_CLASS_(64);
55
private:
56
57
static const size_t MAX_TASKS = 64;
58
59
T* array;
60
size_t N;
61
const IsLeft& is_left;
62
const Reduction_T& reduction_t;
63
const Reduction_V& reduction_v;
64
const Vi& identity;
65
66
size_t numTasks;
67
__aligned(64) size_t counter_start[MAX_TASKS+1];
68
__aligned(64) size_t counter_left[MAX_TASKS+1];
69
__aligned(64) range<ssize_t> leftMisplacedRanges[MAX_TASKS];
70
__aligned(64) range<ssize_t> rightMisplacedRanges[MAX_TASKS];
71
__aligned(64) V leftReductions[MAX_TASKS];
72
__aligned(64) V rightReductions[MAX_TASKS];
73
74
public:
75
76
__forceinline parallel_partition_task(T* array,
77
const size_t N,
78
const Vi& identity,
79
const IsLeft& is_left,
80
const Reduction_T& reduction_t,
81
const Reduction_V& reduction_v,
82
const size_t BLOCK_SIZE)
83
84
: array(array), N(N), is_left(is_left), reduction_t(reduction_t), reduction_v(reduction_v), identity(identity),
85
numTasks(min((N+BLOCK_SIZE-1)/BLOCK_SIZE,min(TaskScheduler::threadCount(),MAX_TASKS))) {}
86
87
__forceinline const range<ssize_t>* findStartRange(size_t& index, const range<ssize_t>* const r, const size_t numRanges)
88
{
89
size_t i = 0;
90
while(index >= (size_t)r[i].size())
91
{
92
assert(i < numRanges);
93
index -= (size_t)r[i].size();
94
i++;
95
}
96
return &r[i];
97
}
98
99
__forceinline void swapItemsInMisplacedRanges(const size_t numLeftMisplacedRanges,
100
const size_t numRightMisplacedRanges,
101
const size_t startID,
102
const size_t endID)
103
{
104
size_t leftLocalIndex = startID;
105
size_t rightLocalIndex = startID;
106
const range<ssize_t>* l_range = findStartRange(leftLocalIndex,leftMisplacedRanges,numLeftMisplacedRanges);
107
const range<ssize_t>* r_range = findStartRange(rightLocalIndex,rightMisplacedRanges,numRightMisplacedRanges);
108
109
size_t l_left = l_range->size() - leftLocalIndex;
110
size_t r_left = r_range->size() - rightLocalIndex;
111
T *__restrict__ l = &array[l_range->begin() + leftLocalIndex];
112
T *__restrict__ r = &array[r_range->begin() + rightLocalIndex];
113
size_t size = endID - startID;
114
size_t items = min(size,min(l_left,r_left));
115
116
while (size)
117
{
118
if (unlikely(l_left == 0))
119
{
120
l_range++;
121
l_left = l_range->size();
122
l = &array[l_range->begin()];
123
items = min(size,min(l_left,r_left));
124
}
125
126
if (unlikely(r_left == 0))
127
{
128
r_range++;
129
r_left = r_range->size();
130
r = &array[r_range->begin()];
131
items = min(size,min(l_left,r_left));
132
}
133
134
size -= items;
135
l_left -= items;
136
r_left -= items;
137
138
while(items) {
139
items--;
140
xchg(*l++,*r++);
141
}
142
}
143
}
144
145
__forceinline size_t partition(V& leftReduction, V& rightReduction)
146
{
147
/* partition the individual ranges for each task */
148
parallel_for(numTasks,[&] (const size_t taskID) {
149
const size_t startID = (taskID+0)*N/numTasks;
150
const size_t endID = (taskID+1)*N/numTasks;
151
V local_left(identity);
152
V local_right(identity);
153
const size_t mid = serial_partitioning(array,startID,endID,local_left,local_right,is_left,reduction_t);
154
counter_start[taskID] = startID;
155
counter_left [taskID] = mid-startID;
156
leftReductions[taskID] = local_left;
157
rightReductions[taskID] = local_right;
158
});
159
counter_start[numTasks] = N;
160
counter_left[numTasks] = 0;
161
162
/* finalize the reductions */
163
for (size_t i=0; i<numTasks; i++) {
164
reduction_v(leftReduction,leftReductions[i]);
165
reduction_v(rightReduction,rightReductions[i]);
166
}
167
168
/* calculate mid point for partitioning */
169
size_t mid = counter_left[0];
170
for (size_t i=1; i<numTasks; i++)
171
mid += counter_left[i];
172
const range<ssize_t> globalLeft (0,mid);
173
const range<ssize_t> globalRight(mid,N);
174
175
/* calculate all left and right ranges that are on the wrong global side */
176
size_t numMisplacedRangesLeft = 0;
177
size_t numMisplacedRangesRight = 0;
178
size_t numMisplacedItemsLeft MAYBE_UNUSED = 0;
179
size_t numMisplacedItemsRight MAYBE_UNUSED = 0;
180
181
for (size_t i=0; i<numTasks; i++)
182
{
183
const range<ssize_t> left_range (counter_start[i], counter_start[i] + counter_left[i]);
184
const range<ssize_t> right_range(counter_start[i] + counter_left[i], counter_start[i+1]);
185
const range<ssize_t> left_misplaced = globalLeft. intersect(right_range);
186
const range<ssize_t> right_misplaced = globalRight.intersect(left_range);
187
188
if (!left_misplaced.empty())
189
{
190
numMisplacedItemsLeft += left_misplaced.size();
191
leftMisplacedRanges[numMisplacedRangesLeft++] = left_misplaced;
192
}
193
194
if (!right_misplaced.empty())
195
{
196
numMisplacedItemsRight += right_misplaced.size();
197
rightMisplacedRanges[numMisplacedRangesRight++] = right_misplaced;
198
}
199
}
200
assert( numMisplacedItemsLeft == numMisplacedItemsRight );
201
202
/* if no items are misplaced we are done */
203
if (numMisplacedItemsLeft == 0)
204
return mid;
205
206
/* otherwise we copy the items to the right place in parallel */
207
parallel_for(numTasks,[&] (const size_t taskID) {
208
const size_t startID = (taskID+0)*numMisplacedItemsLeft/numTasks;
209
const size_t endID = (taskID+1)*numMisplacedItemsLeft/numTasks;
210
swapItemsInMisplacedRanges(numMisplacedRangesLeft,numMisplacedRangesRight,startID,endID);
211
});
212
213
return mid;
214
}
215
};
216
217
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
218
__noinline size_t parallel_partitioning(T* array,
219
const size_t begin,
220
const size_t end,
221
const Vi &identity,
222
V &leftReduction,
223
V &rightReduction,
224
const IsLeft& is_left,
225
const Reduction_T& reduction_t,
226
const Reduction_V& reduction_v,
227
size_t BLOCK_SIZE = 128)
228
{
229
/* fall back to single threaded partitioning for small N */
230
if (unlikely(end-begin < BLOCK_SIZE))
231
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
232
233
/* otherwise use parallel code */
234
else {
235
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
236
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
237
return begin+p->partition(leftReduction,rightReduction);
238
}
239
}
240
241
template<typename T, typename V, typename Vi, typename IsLeft, typename Reduction_T, typename Reduction_V>
242
__noinline size_t parallel_partitioning(T* array,
243
const size_t begin,
244
const size_t end,
245
const Vi &identity,
246
V &leftReduction,
247
V &rightReduction,
248
const IsLeft& is_left,
249
const Reduction_T& reduction_t,
250
const Reduction_V& reduction_v,
251
size_t BLOCK_SIZE,
252
size_t PARALLEL_THRESHOLD)
253
{
254
/* fall back to single threaded partitioning for small N */
255
if (unlikely(end-begin < PARALLEL_THRESHOLD))
256
return serial_partitioning(array,begin,end,leftReduction,rightReduction,is_left,reduction_t);
257
258
/* otherwise use parallel code */
259
else {
260
typedef parallel_partition_task<T,V,Vi,IsLeft,Reduction_T,Reduction_V> partition_task;
261
std::unique_ptr<partition_task> p(new partition_task(&array[begin],end-begin,identity,is_left,reduction_t,reduction_v,BLOCK_SIZE));
262
return begin+p->partition(leftReduction,rightReduction);
263
}
264
}
265
266
267
template<typename T, typename IsLeft>
268
inline size_t parallel_partitioning(T* array,
269
const size_t begin,
270
const size_t end,
271
const IsLeft& is_left,
272
size_t BLOCK_SIZE = 128)
273
{
274
size_t leftReduction = 0;
275
size_t rightReduction = 0;
276
return parallel_partitioning(
277
array,begin,end,0,leftReduction,rightReduction,is_left,
278
[] (size_t& t,const T& ref) { },
279
[] (size_t& t0,size_t& t1) { },
280
BLOCK_SIZE);
281
}
282
283
}
284
285