Path: blob/master/thirdparty/embree/common/sys/thread.cpp
9912 views
// Copyright 2009-2021 Intel Corporation1// SPDX-License-Identifier: Apache-2.023#include "thread.h"4#include "sysinfo.h"5#include "estring.h"67#include <iostream>8#if defined(__ARM_NEON)9#include "../simd/arm/emulation.h"10#else11#include <xmmintrin.h>12#if defined(__EMSCRIPTEN__)13#include "../simd/wasm/emulation.h"14#endif15#endif1617#if defined(PTHREADS_WIN32)18#pragma comment (lib, "pthreadVC.lib")19#endif2021////////////////////////////////////////////////////////////////////////////////22/// Windows Platform23////////////////////////////////////////////////////////////////////////////////2425#if defined(__WIN32__)2627#define WIN32_LEAN_AND_MEAN28#include <windows.h>2930namespace embree31{32/*! set the affinity of a given thread */33void setAffinity(HANDLE thread, ssize_t affinity)34{35typedef WORD (WINAPI *GetActiveProcessorGroupCountFunc)();36typedef DWORD (WINAPI *GetActiveProcessorCountFunc)(WORD);37typedef BOOL (WINAPI *SetThreadGroupAffinityFunc)(HANDLE, const GROUP_AFFINITY *, PGROUP_AFFINITY);38typedef BOOL (WINAPI *SetThreadIdealProcessorExFunc)(HANDLE, PPROCESSOR_NUMBER, PPROCESSOR_NUMBER);39HMODULE hlib = LoadLibrary("Kernel32");40GetActiveProcessorGroupCountFunc pGetActiveProcessorGroupCount = (GetActiveProcessorGroupCountFunc)GetProcAddress(hlib, "GetActiveProcessorGroupCount");41GetActiveProcessorCountFunc pGetActiveProcessorCount = (GetActiveProcessorCountFunc)GetProcAddress(hlib, "GetActiveProcessorCount");42SetThreadGroupAffinityFunc pSetThreadGroupAffinity = (SetThreadGroupAffinityFunc)GetProcAddress(hlib, "SetThreadGroupAffinity");43SetThreadIdealProcessorExFunc pSetThreadIdealProcessorEx = (SetThreadIdealProcessorExFunc)GetProcAddress(hlib, "SetThreadIdealProcessorEx");44if (pGetActiveProcessorGroupCount && pGetActiveProcessorCount && pSetThreadGroupAffinity && pSetThreadIdealProcessorEx)45{46int groups = pGetActiveProcessorGroupCount();47int totalProcessors = 0, group = 0, number = 0;48for (int i = 0; i<groups; i++) {49int processors = pGetActiveProcessorCount(i);50if (totalProcessors + processors > affinity) {51group = i;52number = (int)affinity - totalProcessors;53break;54}55totalProcessors += processors;56}5758GROUP_AFFINITY groupAffinity;59groupAffinity.Group = (WORD)group;60groupAffinity.Mask = (KAFFINITY)(uint64_t(1) << number);61groupAffinity.Reserved[0] = 0;62groupAffinity.Reserved[1] = 0;63groupAffinity.Reserved[2] = 0;64if (!pSetThreadGroupAffinity(thread, &groupAffinity, nullptr))65WARNING("SetThreadGroupAffinity failed"); // on purpose only a warning6667PROCESSOR_NUMBER processorNumber;68processorNumber.Group = group;69processorNumber.Number = number;70processorNumber.Reserved = 0;71if (!pSetThreadIdealProcessorEx(thread, &processorNumber, nullptr))72WARNING("SetThreadIdealProcessorEx failed"); // on purpose only a warning73}74else75{76if (!SetThreadAffinityMask(thread, DWORD_PTR(uint64_t(1) << affinity)))77WARNING("SetThreadAffinityMask failed"); // on purpose only a warning78if (SetThreadIdealProcessor(thread, (DWORD)affinity) == (DWORD)-1)79WARNING("SetThreadIdealProcessor failed"); // on purpose only a warning80}81}8283/*! set affinity of the calling thread */84void setAffinity(ssize_t affinity) {85setAffinity(GetCurrentThread(), affinity);86}8788struct ThreadStartupData89{90public:91ThreadStartupData (thread_func f, void* arg)92: f(f), arg(arg) {}93public:94thread_func f;95void* arg;96};9798DWORD WINAPI threadStartup(LPVOID ptr)99{100ThreadStartupData* parg = (ThreadStartupData*) ptr;101_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));102parg->f(parg->arg);103delete parg;104return 0;105}106107#if !defined(PTHREADS_WIN32)108109/*! creates a hardware thread running on specific core */110thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)111{112HANDLE thread = CreateThread(nullptr, stack_size, threadStartup, new ThreadStartupData(f,arg), 0, nullptr);113if (thread == nullptr) FATAL("CreateThread failed");114if (threadID >= 0) setAffinity(thread, threadID);115return thread_t(thread);116}117118/*! the thread calling this function gets yielded */119void yield() {120SwitchToThread();121}122123/*! waits until the given thread has terminated */124void join(thread_t tid) {125WaitForSingleObject(HANDLE(tid), INFINITE);126CloseHandle(HANDLE(tid));127}128129/*! destroy a hardware thread by its handle */130void destroyThread(thread_t tid) {131TerminateThread(HANDLE(tid),0);132CloseHandle(HANDLE(tid));133}134135/*! creates thread local storage */136tls_t createTls() {137return tls_t(size_t(TlsAlloc()));138}139140/*! set the thread local storage pointer */141void setTls(tls_t tls, void* const ptr) {142TlsSetValue(DWORD(size_t(tls)), ptr);143}144145/*! return the thread local storage pointer */146void* getTls(tls_t tls) {147return TlsGetValue(DWORD(size_t(tls)));148}149150/*! destroys thread local storage identifier */151void destroyTls(tls_t tls) {152TlsFree(DWORD(size_t(tls)));153}154#endif155}156157#endif158159////////////////////////////////////////////////////////////////////////////////160/// Linux Platform161////////////////////////////////////////////////////////////////////////////////162163#if defined(__LINUX__) && !defined(__ANDROID__)164165#include <fstream>166#include <sstream>167#include <algorithm>168169namespace embree170{171static MutexSys mutex;172static std::vector<size_t> threadIDs;173174/* changes thread ID mapping such that we first fill up all thread on one core */175size_t mapThreadID(size_t threadID)176{177Lock<MutexSys> lock(mutex);178179if (threadIDs.size() == 0)180{181/* parse thread/CPU topology */182for (size_t cpuID=0;;cpuID++)183{184std::fstream fs;185std::string cpu = std::string("/sys/devices/system/cpu/cpu") + std::to_string((long long)cpuID) + std::string("/topology/thread_siblings_list");186fs.open (cpu.c_str(), std::fstream::in);187if (fs.fail()) break;188189int i;190while (fs >> i)191{192if (std::none_of(threadIDs.begin(),threadIDs.end(),[&] (int id) { return id == i; }))193threadIDs.push_back(i);194if (fs.peek() == ',')195fs.ignore();196}197fs.close();198}199200#if 0201for (size_t i=0;i<threadIDs.size();i++)202std::cout << i << " -> " << threadIDs[i] << std::endl;203#endif204205/* verify the mapping and do not use it if the mapping has errors */206for (size_t i=0;i<threadIDs.size();i++) {207for (size_t j=0;j<threadIDs.size();j++) {208if (i != j && threadIDs[i] == threadIDs[j]) {209threadIDs.clear();210}211}212}213}214215/* re-map threadIDs if mapping is available */216size_t ID = threadID;217if (threadID < threadIDs.size())218ID = threadIDs[threadID];219220/* find correct thread to affinitize to */221cpu_set_t set;222CPU_ZERO(&set);223224if (pthread_getaffinity_np(pthread_self(), sizeof(set), &set) == 0)225{226for (int i=0, j=0; i<CPU_SETSIZE; i++)227{228if (!CPU_ISSET(i,&set)) continue;229230if (j == ID) {231ID = i;232break;233}234j++;235}236}237238return ID;239}240241/*! set affinity of the calling thread */242void setAffinity(ssize_t affinity)243{244cpu_set_t cset;245CPU_ZERO(&cset);246//size_t threadID = mapThreadID(affinity); // this is not working properly in LXC containers when some processors are disabled247size_t threadID = affinity;248CPU_SET(threadID, &cset);249250pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);251}252}253#endif254255////////////////////////////////////////////////////////////////////////////////256/// Android Platform257////////////////////////////////////////////////////////////////////////////////258259#if defined(__ANDROID__)260261namespace embree262{263/*! set affinity of the calling thread */264void setAffinity(ssize_t affinity)265{266cpu_set_t cset;267CPU_ZERO(&cset);268CPU_SET(affinity, &cset);269270sched_setaffinity(0, sizeof(cset), &cset);271}272}273#endif274275////////////////////////////////////////////////////////////////////////////////276/// FreeBSD Platform277////////////////////////////////////////////////////////////////////////////////278279#if defined(__FreeBSD__)280281#include <pthread_np.h>282283namespace embree284{285/*! set affinity of the calling thread */286void setAffinity(ssize_t affinity)287{288cpuset_t cset;289CPU_ZERO(&cset);290CPU_SET(affinity, &cset);291292pthread_setaffinity_np(pthread_self(), sizeof(cset), &cset);293}294}295#endif296297////////////////////////////////////////////////////////////////////////////////298/// WebAssembly Platform299////////////////////////////////////////////////////////////////////////////////300301#if defined(__EMSCRIPTEN__)302namespace embree303{304/*! set affinity of the calling thread */305void setAffinity(ssize_t affinity)306{307// Setting thread affinity is not supported in WASM.308}309}310#endif311312////////////////////////////////////////////////////////////////////////////////313/// MacOSX Platform314////////////////////////////////////////////////////////////////////////////////315316#if defined(__MACOSX__)317318#include <mach/thread_act.h>319#include <mach/thread_policy.h>320#include <mach/mach_init.h>321322namespace embree323{324/*! set affinity of the calling thread */325void setAffinity(ssize_t affinity)326{327#if !defined(__ARM_NEON) // affinity seems not supported on M1 chip328329thread_affinity_policy ap;330ap.affinity_tag = affinity;331if (thread_policy_set(mach_thread_self(),THREAD_AFFINITY_POLICY,(thread_policy_t)&ap,THREAD_AFFINITY_POLICY_COUNT) != KERN_SUCCESS)332WARNING("setting thread affinity failed"); // on purpose only a warning333334#endif335}336}337#endif338339////////////////////////////////////////////////////////////////////////////////340/// Unix Platform341////////////////////////////////////////////////////////////////////////////////342343#if defined(__UNIX__) || defined(PTHREADS_WIN32)344345#include <pthread.h>346#include <sched.h>347348#if defined(__USE_NUMA__)349#include <numa.h>350#endif351352namespace embree353{354struct ThreadStartupData355{356public:357ThreadStartupData (thread_func f, void* arg, int affinity)358: f(f), arg(arg), affinity(affinity) {}359public:360thread_func f;361void* arg;362ssize_t affinity;363};364365static void* threadStartup(ThreadStartupData* parg)366{367_mm_setcsr(_mm_getcsr() | /*FTZ:*/ (1<<15) | /*DAZ:*/ (1<<6));368369/*! Mac OS X does not support setting affinity at thread creation time */370#if defined(__MACOSX__)371if (parg->affinity >= 0)372setAffinity(parg->affinity);373#endif374375parg->f(parg->arg);376delete parg;377return nullptr;378}379380/*! creates a hardware thread running on specific core */381thread_t createThread(thread_func f, void* arg, size_t stack_size, ssize_t threadID)382{383/* set stack size */384pthread_attr_t attr;385pthread_attr_init(&attr);386if (stack_size > 0) pthread_attr_setstacksize (&attr, stack_size);387388/* create thread */389pthread_t* tid = new pthread_t;390if (pthread_create(tid,&attr,(void*(*)(void*))threadStartup,new ThreadStartupData(f,arg,threadID)) != 0) {391pthread_attr_destroy(&attr);392delete tid;393FATAL("pthread_create failed");394}395pthread_attr_destroy(&attr);396397/* set affinity */398#if defined(__LINUX__) && !defined(__ANDROID__)399if (threadID >= 0) {400cpu_set_t cset;401CPU_ZERO(&cset);402threadID = mapThreadID(threadID);403CPU_SET(threadID, &cset);404pthread_setaffinity_np(*tid, sizeof(cset), &cset);405}406#elif defined(__FreeBSD__)407if (threadID >= 0) {408cpuset_t cset;409CPU_ZERO(&cset);410CPU_SET(threadID, &cset);411pthread_setaffinity_np(*tid, sizeof(cset), &cset);412}413#elif defined(__ANDROID__)414if (threadID >= 0) {415cpu_set_t cset;416CPU_ZERO(&cset);417CPU_SET(threadID, &cset);418sched_setaffinity(pthread_gettid_np(*tid), sizeof(cset), &cset);419}420#endif421422return thread_t(tid);423}424425/*! the thread calling this function gets yielded */426void yield() {427sched_yield();428}429430/*! waits until the given thread has terminated */431void join(thread_t tid) {432if (pthread_join(*(pthread_t*)tid, nullptr) != 0)433FATAL("pthread_join failed");434delete (pthread_t*)tid;435}436437/*! destroy a hardware thread by its handle */438void destroyThread(thread_t tid) {439#if defined(__ANDROID__)440FATAL("Can't destroy threads on Android."); // pthread_cancel not implemented.441#else442pthread_cancel(*(pthread_t*)tid);443delete (pthread_t*)tid;444#endif445}446447/*! creates thread local storage */448tls_t createTls()449{450pthread_key_t* key = new pthread_key_t;451if (pthread_key_create(key,nullptr) != 0) {452delete key;453FATAL("pthread_key_create failed");454}455456return tls_t(key);457}458459/*! return the thread local storage pointer */460void* getTls(tls_t tls)461{462assert(tls);463return pthread_getspecific(*(pthread_key_t*)tls);464}465466/*! set the thread local storage pointer */467void setTls(tls_t tls, void* const ptr)468{469assert(tls);470if (pthread_setspecific(*(pthread_key_t*)tls, ptr) != 0)471FATAL("pthread_setspecific failed");472}473474/*! destroys thread local storage identifier */475void destroyTls(tls_t tls)476{477assert(tls);478if (pthread_key_delete(*(pthread_key_t*)tls) != 0)479FATAL("pthread_key_delete failed");480delete (pthread_key_t*)tls;481}482}483484#endif485486487