Path: blob/master/thirdparty/embree/common/algorithms/parallel_prefix_sum.h
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#pragma once45#include "parallel_for.h"67namespace embree8{9template<typename Value>10struct ParallelPrefixSumState11{12enum { MAX_TASKS = 64 };13Value counts[MAX_TASKS];14Value sums [MAX_TASKS];15};1617template<typename Index, typename Value, typename Func, typename Reduction>18__forceinline Value parallel_prefix_sum( ParallelPrefixSumState<Value>& state, Index first, Index last, Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction)19{20/* calculate number of tasks to use */21const size_t numThreads = TaskScheduler::threadCount();22const size_t numBlocks = (last-first+minStepSize-1)/minStepSize;23const size_t taskCount = min(numThreads,numBlocks,size_t(ParallelPrefixSumState<Value>::MAX_TASKS));2425/* perform parallel prefix sum */26parallel_for(taskCount, [&](const size_t taskIndex)27{28const size_t i0 = first+(taskIndex+0)*(last-first)/taskCount;29const size_t i1 = first+(taskIndex+1)*(last-first)/taskCount;30state.counts[taskIndex] = func(range<size_t>(i0,i1),state.sums[taskIndex]);31});3233/* calculate prefix sum */34Value sum=identity;35for (size_t i=0; i<taskCount; i++)36{37const Value c = state.counts[i];38state.sums[i] = sum;39sum=reduction(sum,c);40}4142return sum;43}4445/*! parallel calculation of prefix sums */46template<typename SrcArray, typename DstArray, typename Value, typename Add>47__forceinline Value parallel_prefix_sum(const SrcArray& src, DstArray& dst, size_t N, const Value& identity, const Add& add, const size_t SINGLE_THREAD_THRESHOLD = 4096)48{49/* perform single threaded prefix operation for small N */50if (N < SINGLE_THREAD_THRESHOLD)51{52Value sum=identity;53for (size_t i=0; i<N; sum=add(sum,src[i++])) dst[i] = sum;54return sum;55}5657/* perform parallel prefix operation for large N */58else59{60ParallelPrefixSumState<Value> state;6162/* initial run just sets up start values for subtasks */63parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {6465Value s = identity;66for (size_t i=r.begin(); i<r.end(); i++) s = add(s,src[i]);67return s;6869}, add);7071/* final run calculates prefix sum */72return parallel_prefix_sum( state, size_t(0), size_t(N), size_t(1024), identity, [&](const range<size_t>& r, const Value& sum) -> Value {7374Value s = identity;75for (size_t i=r.begin(); i<r.end(); i++) {76dst[i] = add(sum,s);77s = add(s,src[i]);78}79return s;8081}, add);82}83}84}858687