Path: blob/master/thirdparty/embree/common/sys/barrier.cpp
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#include "barrier.h"4#include "condition.h"5#include "regression.h"6#include "thread.h"78#if defined (__WIN32__)910#define WIN32_LEAN_AND_MEAN11#include <windows.h>1213namespace embree14{15struct BarrierSysImplementation16{17__forceinline BarrierSysImplementation (size_t N)18: i(0), enterCount(0), exitCount(0), barrierSize(0)19{20events[0] = CreateEvent(nullptr, TRUE, FALSE, nullptr);21events[1] = CreateEvent(nullptr, TRUE, FALSE, nullptr);22init(N);23}2425__forceinline ~BarrierSysImplementation ()26{27CloseHandle(events[0]);28CloseHandle(events[1]);29}3031__forceinline void init(size_t N)32{33barrierSize = N;34enterCount.store(N);35exitCount.store(N);36}3738__forceinline void wait()39{40/* every thread entering the barrier decrements this count */41size_t i0 = i;42size_t cnt0 = enterCount--;4344/* all threads except the last one are wait in the barrier */45if (cnt0 > 1)46{47if (WaitForSingleObject(events[i0], INFINITE) != WAIT_OBJECT_0)48THROW_RUNTIME_ERROR("WaitForSingleObjects failed");49}5051/* the last thread starts all threads waiting at the barrier */52else53{54i = 1-i;55enterCount.store(barrierSize);56if (SetEvent(events[i0]) == 0)57THROW_RUNTIME_ERROR("SetEvent failed");58}5960/* every thread leaving the barrier decrements this count */61size_t cnt1 = exitCount--;6263/* the last thread that left the barrier resets the event again */64if (cnt1 == 1)65{66exitCount.store(barrierSize);67if (ResetEvent(events[i0]) == 0)68THROW_RUNTIME_ERROR("ResetEvent failed");69}70}7172public:73HANDLE events[2];74atomic<size_t> i;75atomic<size_t> enterCount;76atomic<size_t> exitCount;77size_t barrierSize;78};79}8081#else8283namespace embree84{85struct BarrierSysImplementation86{87__forceinline BarrierSysImplementation (size_t N)88: count(0), barrierSize(0)89{90init(N);91}9293__forceinline void init(size_t N)94{95assert(count == 0);96count = 0;97barrierSize = N;98}99100__forceinline void wait()101{102mutex.lock();103count++;104105if (count == barrierSize) {106count = 0;107cond.notify_all();108mutex.unlock();109return;110}111112cond.wait(mutex);113mutex.unlock();114return;115}116117public:118MutexSys mutex;119ConditionSys cond;120volatile size_t count;121volatile size_t barrierSize;122};123}124125#endif126127namespace embree128{129BarrierSys::BarrierSys (size_t N) {130opaque = new BarrierSysImplementation(N);131}132133BarrierSys::~BarrierSys () {134delete (BarrierSysImplementation*) opaque;135}136137void BarrierSys::init(size_t count) {138((BarrierSysImplementation*) opaque)->init(count);139}140141void BarrierSys::wait() {142((BarrierSysImplementation*) opaque)->wait();143}144145LinearBarrierActive::LinearBarrierActive (size_t N)146: count0(nullptr), count1(nullptr), mode(0), flag0(0), flag1(0), threadCount(0)147{148if (N == 0) N = getNumberOfLogicalThreads();149init(N);150}151152LinearBarrierActive::~LinearBarrierActive()153{154delete[] count0;155delete[] count1;156}157158void LinearBarrierActive::init(size_t N)159{160if (threadCount != N) {161threadCount = N;162if (count0) delete[] count0; count0 = new unsigned char[N];163if (count1) delete[] count1; count1 = new unsigned char[N];164}165mode = 0;166flag0 = 0;167flag1 = 0;168for (size_t i=0; i<N; i++) count0[i] = 0;169for (size_t i=0; i<N; i++) count1[i] = 0;170}171172void LinearBarrierActive::wait (const size_t threadIndex)173{174if (mode == 0)175{176if (threadIndex == 0)177{178for (size_t i=0; i<threadCount; i++)179count1[i] = 0;180181for (size_t i=1; i<threadCount; i++)182{183while (likely(count0[i] == 0))184pause_cpu();185}186mode = 1;187flag1 = 0;188__memory_barrier();189flag0 = 1;190}191else192{193count0[threadIndex] = 1;194{195while (likely(flag0 == 0))196pause_cpu();197}198199}200}201else202{203if (threadIndex == 0)204{205for (size_t i=0; i<threadCount; i++)206count0[i] = 0;207208for (size_t i=1; i<threadCount; i++)209{210while (likely(count1[i] == 0))211pause_cpu();212}213214mode = 0;215flag0 = 0;216__memory_barrier();217flag1 = 1;218}219else220{221count1[threadIndex] = 1;222{223while (likely(flag1 == 0))224pause_cpu();225}226}227}228}229230struct barrier_sys_regression_test : public RegressionTest231{232BarrierSys barrier;233std::atomic<size_t> threadID;234std::atomic<size_t> numFailed;235std::vector<size_t> threadResults;236237barrier_sys_regression_test()238: RegressionTest("barrier_sys_regression_test"), threadID(0), numFailed(0)239{240registerRegressionTest(this);241}242243static void thread_alloc(barrier_sys_regression_test* This)244{245size_t tid = This->threadID++;246for (size_t j=0; j<1000; j++)247{248This->barrier.wait();249This->threadResults[tid] = tid;250This->barrier.wait();251}252}253254bool run ()255{256threadID.store(0);257numFailed.store(0);258259size_t numThreads = getNumberOfLogicalThreads();260threadResults.resize(numThreads);261barrier.init(numThreads+1);262263/* create threads */264std::vector<thread_t> threads;265for (size_t i=0; i<numThreads; i++)266threads.push_back(createThread((thread_func)thread_alloc,this));267268/* run test */269for (size_t i=0; i<1000; i++)270{271for (size_t i=0; i<numThreads; i++) threadResults[i] = 0;272barrier.wait();273barrier.wait();274for (size_t i=0; i<numThreads; i++) numFailed += threadResults[i] != i;275}276277/* destroy threads */278for (size_t i=0; i<numThreads; i++)279join(threads[i]);280281return numFailed == 0;282}283};284285barrier_sys_regression_test barrier_sys_regression_test;286}287288289290291