Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/algorithms/parallel_prefix_sum.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "parallel_for.h"
7
8
namespace embree
9
{
10
template<typename Value>
11
struct ParallelPrefixSumState
12
{
13
enum { MAX_TASKS = 64 };
14
Value counts[MAX_TASKS];
15
Value sums [MAX_TASKS];
16
};
17
18
template<typename Index, typename Value, typename Func, typename Reduction>
19
__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)
20
{
21
/* calculate number of tasks to use */
22
const size_t numThreads = TaskScheduler::threadCount();
23
const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;
24
const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));
25
26
/* perform parallel prefix sum */
27
parallel_for(taskCount, [&](const size_t taskIndex)
28
{
29
const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;
30
const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;
31
state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);
32
});
33
34
/* calculate prefix sum */
35
Value sum=identity;
36
for (size_t i=0; i<taskCount; i++)
37
{
38
const Value c = state.counts[i];
39
state.sums[i] = sum;
40
sum=reduction(sum,c);
41
}
42
43
return sum;
44
}
45
46
/*! parallel calculation of prefix sums */
47
template<typename SrcArray, typename DstArray, typename Value, typename Add>
48
__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)
49
{
50
/* perform single threaded prefix operation for small N */
51
if (N < SINGLE_THREAD_THRESHOLD)
52
{
53
Value sum=identity;
54
for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;
55
return sum;
56
}
57
58
/* perform parallel prefix operation for large N */
59
else
60
{
61
ParallelPrefixSumState<Value> state;
62
63
/* initial run just sets up start values for subtasks */
64
parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
65
66
Value s = identity;
67
for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);
68
return s;
69
70
}, add);
71
72
/* final run calculates prefix sum */
73
return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {
74
75
Value s = identity;
76
for (size_t i=r.begin(); i<r.end(); i++) {
77
dst[i] = add(sum,s);
78
s = add(s,src[i]);
79
}
80
return s;
81
82
}, add);
83
}
84
}
85
}
86
87