Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_barrier.h
35258 views
1
/*
2
* kmp_barrier.h
3
*/
4
5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12
13
#ifndef KMP_BARRIER_H
14
#define KMP_BARRIER_H
15
16
#include "kmp.h"
17
#include "kmp_i18n.h"
18
19
#if KMP_HAVE_XMMINTRIN_H && KMP_HAVE__MM_MALLOC
20
#include <xmmintrin.h>
21
#define KMP_ALIGNED_ALLOCATE(size, alignment) _mm_malloc(size, alignment)
22
#define KMP_ALIGNED_FREE(ptr) _mm_free(ptr)
23
#elif KMP_HAVE_ALIGNED_ALLOC
24
#define KMP_ALGIN_UP(val, alignment) \
25
(((val) + (alignment)-1) / (alignment) * (alignment))
26
#define KMP_ALIGNED_ALLOCATE(size, alignment) \
27
aligned_alloc(alignment, KMP_ALGIN_UP(size, alignment))
28
#define KMP_ALIGNED_FREE(ptr) free(ptr)
29
#elif KMP_HAVE_POSIX_MEMALIGN
30
static inline void *KMP_ALIGNED_ALLOCATE(size_t size, size_t alignment) {
31
void *ptr;
32
int n = posix_memalign(&ptr, alignment, size);
33
if (n != 0) {
34
if (ptr)
35
free(ptr);
36
return nullptr;
37
}
38
return ptr;
39
}
40
#define KMP_ALIGNED_FREE(ptr) free(ptr)
41
#elif KMP_HAVE__ALIGNED_MALLOC
42
#include <malloc.h>
43
#define KMP_ALIGNED_ALLOCATE(size, alignment) _aligned_malloc(size, alignment)
44
#define KMP_ALIGNED_FREE(ptr) _aligned_free(ptr)
45
#else
46
#define KMP_ALIGNED_ALLOCATE(size, alignment) KMP_INTERNAL_MALLOC(size)
47
#define KMP_ALIGNED_FREE(ptr) KMP_INTERNAL_FREE(ptr)
48
#endif
49
50
// Use four cache lines: MLC tends to prefetch the next or previous cache line
51
// creating a possible fake conflict between cores, so this is the only way to
52
// guarantee that no such prefetch can happen.
53
#ifndef KMP_FOURLINE_ALIGN_CACHE
54
#define KMP_FOURLINE_ALIGN_CACHE KMP_ALIGN(4 * CACHE_LINE)
55
#endif
56
57
#define KMP_OPTIMIZE_FOR_REDUCTIONS 0
58
59
class distributedBarrier {
60
struct flags_s {
61
kmp_uint32 volatile KMP_FOURLINE_ALIGN_CACHE stillNeed;
62
};
63
64
struct go_s {
65
std::atomic<kmp_uint64> KMP_FOURLINE_ALIGN_CACHE go;
66
};
67
68
struct iter_s {
69
kmp_uint64 volatile KMP_FOURLINE_ALIGN_CACHE iter;
70
};
71
72
struct sleep_s {
73
std::atomic<bool> KMP_FOURLINE_ALIGN_CACHE sleep;
74
};
75
76
void init(size_t nthr);
77
void resize(size_t nthr);
78
void computeGo(size_t n);
79
void computeVarsForN(size_t n);
80
81
public:
82
enum {
83
MAX_ITERS = 3,
84
MAX_GOS = 8,
85
IDEAL_GOS = 4,
86
IDEAL_CONTENTION = 16,
87
};
88
89
flags_s *flags[MAX_ITERS];
90
go_s *go;
91
iter_s *iter;
92
sleep_s *sleep;
93
94
size_t KMP_ALIGN_CACHE num_threads; // number of threads in barrier
95
size_t KMP_ALIGN_CACHE max_threads; // size of arrays in data structure
96
// number of go signals each requiring one write per iteration
97
size_t KMP_ALIGN_CACHE num_gos;
98
// number of groups of gos
99
size_t KMP_ALIGN_CACHE num_groups;
100
// threads per go signal
101
size_t KMP_ALIGN_CACHE threads_per_go;
102
bool KMP_ALIGN_CACHE fix_threads_per_go;
103
// threads per group
104
size_t KMP_ALIGN_CACHE threads_per_group;
105
// number of go signals in a group
106
size_t KMP_ALIGN_CACHE gos_per_group;
107
void *team_icvs;
108
109
distributedBarrier() = delete;
110
~distributedBarrier() = delete;
111
112
// Used instead of constructor to create aligned data
113
static distributedBarrier *allocate(int nThreads) {
114
distributedBarrier *d = (distributedBarrier *)KMP_ALIGNED_ALLOCATE(
115
sizeof(distributedBarrier), 4 * CACHE_LINE);
116
if (!d) {
117
KMP_FATAL(MemoryAllocFailed);
118
}
119
d->num_threads = 0;
120
d->max_threads = 0;
121
for (int i = 0; i < MAX_ITERS; ++i)
122
d->flags[i] = NULL;
123
d->go = NULL;
124
d->iter = NULL;
125
d->sleep = NULL;
126
d->team_icvs = NULL;
127
d->fix_threads_per_go = false;
128
// calculate gos and groups ONCE on base size
129
d->computeGo(nThreads);
130
d->init(nThreads);
131
return d;
132
}
133
134
static void deallocate(distributedBarrier *db) { KMP_ALIGNED_FREE(db); }
135
136
void update_num_threads(size_t nthr) { init(nthr); }
137
138
bool need_resize(size_t new_nthr) { return (new_nthr > max_threads); }
139
size_t get_num_threads() { return num_threads; }
140
kmp_uint64 go_release();
141
void go_reset();
142
};
143
144
#endif // KMP_BARRIER_H
145
146