Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/thirdparty/embree/common/algorithms/parallel_reduce.h
9912 views
1
// Copyright 2009-2021 Intel Corporation
2
// SPDX-License-Identifier: Apache-2.0
3
4
#pragma once
5
6
#include "parallel_for.h"
7
8
namespace embree
9
{
10
template<typename Index, typename Value, typename Func, typename Reduction>
11
__forceinline Value sequential_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
12
{
13
return func(range<Index>(first,last));
14
}
15
16
template<typename Index, typename Value, typename Func, typename Reduction>
17
__forceinline Value sequential_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
18
{
19
return func(range<Index>(first,last));
20
}
21
22
template<typename Index, typename Value, typename Func, typename Reduction>
23
__noinline Value parallel_reduce_internal( Index taskCount, const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
24
{
25
const Index maxTasks = 512;
26
const Index threadCount = (Index) TaskScheduler::threadCount();
27
taskCount = min(taskCount,threadCount,maxTasks);
28
29
/* parallel invocation of all tasks */
30
dynamic_large_stack_array(Value,values,taskCount,8192); // consumes at most 8192 bytes on the stack
31
parallel_for(taskCount, [&](const Index taskIndex) {
32
const Index k0 = first+(taskIndex+0)*(last-first)/taskCount;
33
const Index k1 = first+(taskIndex+1)*(last-first)/taskCount;
34
values[taskIndex] = func(range<Index>(k0,k1));
35
});
36
37
/* perform reduction over all tasks */
38
Value v = identity;
39
for (Index i=0; i<taskCount; i++) v = reduction(v,values[i]);
40
return v;
41
}
42
43
template<typename Index, typename Value, typename Func, typename Reduction>
44
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
45
{
46
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
47
48
/* fast path for small number of iterations */
49
Index taskCount = (last-first+minStepSize-1)/minStepSize;
50
if (likely(taskCount == 1)) {
51
return func(range<Index>(first,last));
52
}
53
return parallel_reduce_internal(taskCount,first,last,minStepSize,identity,func,reduction);
54
55
#elif defined(TASKING_TBB)
56
#if TBB_INTERFACE_VERSION >= 12002
57
tbb::task_group_context context;
58
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
59
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
60
reduction,context);
61
//if (context.is_group_execution_cancelled())
62
// throw std::runtime_error("task cancelled");
63
return v;
64
#else
65
const Value v = tbb::parallel_reduce(tbb::blocked_range<Index>(first,last,minStepSize),identity,
66
[&](const tbb::blocked_range<Index>& r, const Value& start) { return reduction(start,func(range<Index>(r.begin(),r.end()))); },
67
reduction);
68
//if (tbb::task::self().is_cancelled())
69
// throw std::runtime_error("task cancelled");
70
return v;
71
#endif
72
#else // TASKING_PPL
73
struct AlignedValue
74
{
75
char storage[__alignof(Value)+sizeof(Value)];
76
static uintptr_t alignUp(uintptr_t p, size_t a) { return p + (~(p - 1) % a); };
77
Value* getValuePtr() { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
78
const Value* getValuePtr() const { return reinterpret_cast<Value*>(alignUp(uintptr_t(storage), __alignof(Value))); }
79
AlignedValue(const Value& v) { new(getValuePtr()) Value(v); }
80
AlignedValue(const AlignedValue& v) { new(getValuePtr()) Value(*v.getValuePtr()); }
81
AlignedValue(const AlignedValue&& v) { new(getValuePtr()) Value(*v.getValuePtr()); };
82
AlignedValue& operator = (const AlignedValue& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
83
AlignedValue& operator = (const AlignedValue&& v) { *getValuePtr() = *v.getValuePtr(); return *this; };
84
operator Value() const { return *getValuePtr(); }
85
};
86
87
struct Iterator_Index
88
{
89
Index v;
90
typedef std::forward_iterator_tag iterator_category;
91
typedef AlignedValue value_type;
92
typedef Index difference_type;
93
typedef Index distance_type;
94
typedef AlignedValue* pointer;
95
typedef AlignedValue& reference;
96
__forceinline Iterator_Index() {}
97
__forceinline Iterator_Index(Index v) : v(v) {}
98
__forceinline bool operator== (Iterator_Index other) { return v == other.v; }
99
__forceinline bool operator!= (Iterator_Index other) { return v != other.v; }
100
__forceinline Iterator_Index operator++() { return Iterator_Index(++v); }
101
__forceinline Iterator_Index operator++(int) { return Iterator_Index(v++); }
102
};
103
104
auto range_reduction = [&](Iterator_Index begin, Iterator_Index end, const AlignedValue& start) {
105
assert(begin.v < end.v);
106
return reduction(start, func(range<Index>(begin.v, end.v)));
107
};
108
const Value v = concurrency::parallel_reduce(Iterator_Index(first), Iterator_Index(last), AlignedValue(identity), range_reduction, reduction);
109
return v;
110
#endif
111
}
112
113
template<typename Index, typename Value, typename Func, typename Reduction>
114
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
115
{
116
if (likely(last-first < parallel_threshold)) {
117
return func(range<Index>(first,last));
118
} else {
119
return parallel_reduce(first,last,minStepSize,identity,func,reduction);
120
}
121
}
122
123
template<typename Index, typename Value, typename Func, typename Reduction>
124
__forceinline Value parallel_reduce( const range<Index> range, const Index minStepSize, const Index parallel_threshold, const Value& identity, const Func& func, const Reduction& reduction )
125
{
126
return parallel_reduce(range.begin(),range.end(),minStepSize,parallel_threshold,identity,func,reduction);
127
}
128
129
template<typename Index, typename Value, typename Func, typename Reduction>
130
__forceinline Value parallel_reduce( const Index first, const Index last, const Value& identity, const Func& func, const Reduction& reduction )
131
{
132
auto funcr = [&] ( const range<Index> r ) {
133
Value v = identity;
134
for (Index i=r.begin(); i<r.end(); i++)
135
v = reduction(v,func(i));
136
return v;
137
};
138
return parallel_reduce(first,last,Index(1),identity,funcr,reduction);
139
}
140
141
template<typename Index, typename Value, typename Func, typename Reduction>
142
__forceinline Value parallel_reduce( const range<Index> range, const Value& identity, const Func& func, const Reduction& reduction )
143
{
144
return parallel_reduce(range.begin(),range.end(),Index(1),identity,func,reduction);
145
}
146
}
147
148