Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_barrier.h
35258 views
/*1* kmp_barrier.h2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#ifndef KMP_BARRIER_H13#define KMP_BARRIER_H1415#include "kmp.h"16#include "kmp_i18n.h"1718#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC19#include <xmmintrin.h>20#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)21#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)22#elif KMP_HAVE_ALIGNED_ALLOC23#define KMP_ALGIN_UP(val, alignment) \24(((val) + (alignment)-1) / (alignment) * (alignment))25#define KMP_ALIGNED_ALLOCATE(size, alignment) \26aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))27#define KMP_ALIGNED_FREE(ptr) free(ptr)28#elif KMP_HAVE_POSIX_MEMALIGN29static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {30void *ptr;31int n = posix_memalign(&ptr, alignment, size);32if (n != 0) {33if (ptr)34free(ptr);35return nullptr;36}37return ptr;38}39#define KMP_ALIGNED_FREE(ptr) free(ptr)40#elif KMP_HAVE__ALIGNED_MALLOC41#include <malloc.h>42#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)43#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)44#else45#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)46#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)47#endif4849// Use four cache lines: MLC tends to prefetch the next or previous cache line50// creating a possible fake conflict between cores, so this is the only way to51// guarantee that no such prefetch can happen.52#ifndef KMP_FOURLINE_ALIGN_CACHE53#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)54#endif5556#define KMP_OPTIMIZE_FOR_REDUCTIONS 05758class distributedBarrier {59struct flags_s {60kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;61};6263struct go_s {64std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;65};6667struct iter_s {68kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;69};7071struct sleep_s {72std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;73};7475void init(size_t nthr);76void resize(size_t nthr);77void computeGo(size_t n);78void computeVarsForN(size_t n);7980public:81enum {82MAX_ITERS = 3,83MAX_GOS = 8,84IDEAL_GOS = 4,85IDEAL_CONTENTION = 16,86};8788flags_s *flags[MAX_ITERS];89go_s *go;90iter_s *iter;91sleep_s *sleep;9293size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier94size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure95// number of go signals each requiring one write per iteration96size_t KMP_ALIGN_CACHE num_gos;97// number of groups of gos98size_t KMP_ALIGN_CACHE num_groups;99// threads per go signal100size_t KMP_ALIGN_CACHE threads_per_go;101bool KMP_ALIGN_CACHE fix_threads_per_go;102// threads per group103size_t KMP_ALIGN_CACHE threads_per_group;104// number of go signals in a group105size_t KMP_ALIGN_CACHE gos_per_group;106void *team_icvs;107108distributedBarrier() = delete;109~distributedBarrier() = delete;110111// Used instead of constructor to create aligned data112static distributedBarrier *allocate(int nThreads) {113distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(114sizeof(distributedBarrier), 4 * CACHE_LINE);115if (!d) {116KMP_FATAL(MemoryAllocFailed);117}118d->num_threads = 0;119d->max_threads = 0;120for (int i = 0; i < MAX_ITERS; ++i)121d->flags[i] = NULL;122d->go = NULL;123d->iter = NULL;124d->sleep = NULL;125d->team_icvs = NULL;126d->fix_threads_per_go = false;127// calculate gos and groups ONCE on base size128d->computeGo(nThreads);129d->init(nThreads);130return d;131}132133static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }134135void update_num_threads(size_t nthr) { init(nthr); }136137bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }138size_t get_num_threads() { return num_threads; }139kmp_uint64 go_release();140void go_reset();141};142143#endif // KMP_BARRIER_H144145146