Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp
35258 views
1
/*
2
* kmp_lock.cpp -- lock-related functions
3
*/
4
5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include <stddef.h>
14
#include <atomic>
15
16
#include "kmp.h"
17
#include "kmp_i18n.h"
18
#include "kmp_io.h"
19
#include "kmp_itt.h"
20
#include "kmp_lock.h"
21
#include "kmp_wait_release.h"
22
#include "kmp_wrapper_getpid.h"
23
24
#if KMP_USE_FUTEX
25
#include <sys/syscall.h>
26
#include <unistd.h>
27
// We should really include <futex.h>, but that causes compatibility problems on
28
// different Linux* OS distributions that either require that you include (or
29
// break when you try to include) <pci/types.h>. Since all we need is the two
30
// macros below (which are part of the kernel ABI, so can't change) we just
31
// define the constants here and don't include <futex.h>
32
#ifndef FUTEX_WAIT
33
#define FUTEX_WAIT 0
34
#endif
35
#ifndef FUTEX_WAKE
36
#define FUTEX_WAKE 1
37
#endif
38
#endif
39
40
/* Implement spin locks for internal library use. */
41
/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43
void __kmp_validate_locks(void) {
44
int i;
45
kmp_uint32 x, y;
46
47
/* Check to make sure unsigned arithmetic does wraps properly */
48
x = ~((kmp_uint32)0) - 2;
49
y = x - 2;
50
51
for (i = 0; i < 8; ++i, ++x, ++y) {
52
kmp_uint32 z = (x - y);
53
KMP_ASSERT(z == 2);
54
}
55
56
KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57
}
58
59
/* ------------------------------------------------------------------------ */
60
/* test and set locks */
61
62
// For the non-nested locks, we can only assume that the first 4 bytes were
63
// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64
// compiler only allocates a 4 byte pointer on IA-32 architecture. On
65
// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66
//
67
// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68
// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
70
static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71
return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72
}
73
74
static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75
return lck->lk.depth_locked != -1;
76
}
77
78
__forceinline static int
79
__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80
KMP_MB();
81
82
#ifdef USE_LOCK_PROFILE
83
kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84
if ((curr != 0) && (curr != gtid + 1))
85
__kmp_printf("LOCK CONTENTION: %p\n", lck);
86
/* else __kmp_printf( "." );*/
87
#endif /* USE_LOCK_PROFILE */
88
89
kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92
if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93
__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94
KMP_FSYNC_ACQUIRED(lck);
95
return KMP_LOCK_ACQUIRED_FIRST;
96
}
97
98
kmp_uint32 spins;
99
kmp_uint64 time;
100
KMP_FSYNC_PREPARE(lck);
101
KMP_INIT_YIELD(spins);
102
KMP_INIT_BACKOFF(time);
103
kmp_backoff_t backoff = __kmp_spin_backoff_params;
104
do {
105
#if !KMP_HAVE_UMWAIT
106
__kmp_spin_backoff(&backoff);
107
#else
108
if (!__kmp_tpause_enabled)
109
__kmp_spin_backoff(&backoff);
110
#endif
111
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112
} while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113
!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
114
KMP_FSYNC_ACQUIRED(lck);
115
return KMP_LOCK_ACQUIRED_FIRST;
116
}
117
118
int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
119
int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120
return retval;
121
}
122
123
static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
124
kmp_int32 gtid) {
125
char const *const func = "omp_set_lock";
126
if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
127
__kmp_is_tas_lock_nestable(lck)) {
128
KMP_FATAL(LockNestableUsedAsSimple, func);
129
}
130
if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131
KMP_FATAL(LockIsAlreadyOwned, func);
132
}
133
return __kmp_acquire_tas_lock(lck, gtid);
134
}
135
136
int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
137
kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139
if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140
__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
141
KMP_FSYNC_ACQUIRED(lck);
142
return TRUE;
143
}
144
return FALSE;
145
}
146
147
static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
148
kmp_int32 gtid) {
149
char const *const func = "omp_test_lock";
150
if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
151
__kmp_is_tas_lock_nestable(lck)) {
152
KMP_FATAL(LockNestableUsedAsSimple, func);
153
}
154
return __kmp_test_tas_lock(lck, gtid);
155
}
156
157
int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
158
KMP_MB(); /* Flush all pending memory write invalidates. */
159
160
KMP_FSYNC_RELEASING(lck);
161
KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));
162
KMP_MB(); /* Flush all pending memory write invalidates. */
163
164
KMP_YIELD_OVERSUB();
165
return KMP_LOCK_RELEASED;
166
}
167
168
static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,
169
kmp_int32 gtid) {
170
char const *const func = "omp_unset_lock";
171
KMP_MB(); /* in case another processor initialized lock */
172
if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
173
__kmp_is_tas_lock_nestable(lck)) {
174
KMP_FATAL(LockNestableUsedAsSimple, func);
175
}
176
if (__kmp_get_tas_lock_owner(lck) == -1) {
177
KMP_FATAL(LockUnsettingFree, func);
178
}
179
if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&
180
(__kmp_get_tas_lock_owner(lck) != gtid)) {
181
KMP_FATAL(LockUnsettingSetByAnother, func);
182
}
183
return __kmp_release_tas_lock(lck, gtid);
184
}
185
186
void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {
187
lck->lk.poll = KMP_LOCK_FREE(tas);
188
}
189
190
void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }
191
192
static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {
193
char const *const func = "omp_destroy_lock";
194
if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
195
__kmp_is_tas_lock_nestable(lck)) {
196
KMP_FATAL(LockNestableUsedAsSimple, func);
197
}
198
if (__kmp_get_tas_lock_owner(lck) != -1) {
199
KMP_FATAL(LockStillOwned, func);
200
}
201
__kmp_destroy_tas_lock(lck);
202
}
203
204
// nested test and set locks
205
206
int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
207
KMP_DEBUG_ASSERT(gtid >= 0);
208
209
if (__kmp_get_tas_lock_owner(lck) == gtid) {
210
lck->lk.depth_locked += 1;
211
return KMP_LOCK_ACQUIRED_NEXT;
212
} else {
213
__kmp_acquire_tas_lock_timed_template(lck, gtid);
214
lck->lk.depth_locked = 1;
215
return KMP_LOCK_ACQUIRED_FIRST;
216
}
217
}
218
219
static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
220
kmp_int32 gtid) {
221
char const *const func = "omp_set_nest_lock";
222
if (!__kmp_is_tas_lock_nestable(lck)) {
223
KMP_FATAL(LockSimpleUsedAsNestable, func);
224
}
225
return __kmp_acquire_nested_tas_lock(lck, gtid);
226
}
227
228
int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
229
int retval;
230
231
KMP_DEBUG_ASSERT(gtid >= 0);
232
233
if (__kmp_get_tas_lock_owner(lck) == gtid) {
234
retval = ++lck->lk.depth_locked;
235
} else if (!__kmp_test_tas_lock(lck, gtid)) {
236
retval = 0;
237
} else {
238
KMP_MB();
239
retval = lck->lk.depth_locked = 1;
240
}
241
return retval;
242
}
243
244
static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
245
kmp_int32 gtid) {
246
char const *const func = "omp_test_nest_lock";
247
if (!__kmp_is_tas_lock_nestable(lck)) {
248
KMP_FATAL(LockSimpleUsedAsNestable, func);
249
}
250
return __kmp_test_nested_tas_lock(lck, gtid);
251
}
252
253
int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
254
KMP_DEBUG_ASSERT(gtid >= 0);
255
256
KMP_MB();
257
if (--(lck->lk.depth_locked) == 0) {
258
__kmp_release_tas_lock(lck, gtid);
259
return KMP_LOCK_RELEASED;
260
}
261
return KMP_LOCK_STILL_HELD;
262
}
263
264
static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,
265
kmp_int32 gtid) {
266
char const *const func = "omp_unset_nest_lock";
267
KMP_MB(); /* in case another processor initialized lock */
268
if (!__kmp_is_tas_lock_nestable(lck)) {
269
KMP_FATAL(LockSimpleUsedAsNestable, func);
270
}
271
if (__kmp_get_tas_lock_owner(lck) == -1) {
272
KMP_FATAL(LockUnsettingFree, func);
273
}
274
if (__kmp_get_tas_lock_owner(lck) != gtid) {
275
KMP_FATAL(LockUnsettingSetByAnother, func);
276
}
277
return __kmp_release_nested_tas_lock(lck, gtid);
278
}
279
280
void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {
281
__kmp_init_tas_lock(lck);
282
lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
283
}
284
285
void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {
286
__kmp_destroy_tas_lock(lck);
287
lck->lk.depth_locked = 0;
288
}
289
290
static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
291
char const *const func = "omp_destroy_nest_lock";
292
if (!__kmp_is_tas_lock_nestable(lck)) {
293
KMP_FATAL(LockSimpleUsedAsNestable, func);
294
}
295
if (__kmp_get_tas_lock_owner(lck) != -1) {
296
KMP_FATAL(LockStillOwned, func);
297
}
298
__kmp_destroy_nested_tas_lock(lck);
299
}
300
301
#if KMP_USE_FUTEX
302
303
/* ------------------------------------------------------------------------ */
304
/* futex locks */
305
306
// futex locks are really just test and set locks, with a different method
307
// of handling contention. They take the same amount of space as test and
308
// set locks, and are allocated the same way (i.e. use the area allocated by
309
// the compiler for non-nested locks / allocate nested locks on the heap).
310
311
static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {
312
return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;
313
}
314
315
static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {
316
return lck->lk.depth_locked != -1;
317
}
318
319
__forceinline static int
320
__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {
321
kmp_int32 gtid_code = (gtid + 1) << 1;
322
323
KMP_MB();
324
325
#ifdef USE_LOCK_PROFILE
326
kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));
327
if ((curr != 0) && (curr != gtid_code))
328
__kmp_printf("LOCK CONTENTION: %p\n", lck);
329
/* else __kmp_printf( "." );*/
330
#endif /* USE_LOCK_PROFILE */
331
332
KMP_FSYNC_PREPARE(lck);
333
KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",
334
lck, lck->lk.poll, gtid));
335
336
kmp_int32 poll_val;
337
338
while ((poll_val = KMP_COMPARE_AND_STORE_RET32(
339
&(lck->lk.poll), KMP_LOCK_FREE(futex),
340
KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {
341
342
kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;
343
KA_TRACE(
344
1000,
345
("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",
346
lck, gtid, poll_val, cond));
347
348
// NOTE: if you try to use the following condition for this branch
349
//
350
// if ( poll_val & 1 == 0 )
351
//
352
// Then the 12.0 compiler has a bug where the following block will
353
// always be skipped, regardless of the value of the LSB of poll_val.
354
if (!cond) {
355
// Try to set the lsb in the poll to indicate to the owner
356
// thread that they need to wake this thread up.
357
if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,
358
poll_val | KMP_LOCK_BUSY(1, futex))) {
359
KA_TRACE(
360
1000,
361
("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",
362
lck, lck->lk.poll, gtid));
363
continue;
364
}
365
poll_val |= KMP_LOCK_BUSY(1, futex);
366
367
KA_TRACE(1000,
368
("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,
369
lck->lk.poll, gtid));
370
}
371
372
KA_TRACE(
373
1000,
374
("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",
375
lck, gtid, poll_val));
376
377
long rc;
378
if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,
379
NULL, 0)) != 0) {
380
KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "
381
"failed (rc=%ld errno=%d)\n",
382
lck, gtid, poll_val, rc, errno));
383
continue;
384
}
385
386
KA_TRACE(1000,
387
("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",
388
lck, gtid, poll_val));
389
// This thread has now done a successful futex wait call and was entered on
390
// the OS futex queue. We must now perform a futex wake call when releasing
391
// the lock, as we have no idea how many other threads are in the queue.
392
gtid_code |= 1;
393
}
394
395
KMP_FSYNC_ACQUIRED(lck);
396
KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
397
lck->lk.poll, gtid));
398
return KMP_LOCK_ACQUIRED_FIRST;
399
}
400
401
int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
402
int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);
403
return retval;
404
}
405
406
static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,
407
kmp_int32 gtid) {
408
char const *const func = "omp_set_lock";
409
if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
410
__kmp_is_futex_lock_nestable(lck)) {
411
KMP_FATAL(LockNestableUsedAsSimple, func);
412
}
413
if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {
414
KMP_FATAL(LockIsAlreadyOwned, func);
415
}
416
return __kmp_acquire_futex_lock(lck, gtid);
417
}
418
419
int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
420
if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),
421
KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {
422
KMP_FSYNC_ACQUIRED(lck);
423
return TRUE;
424
}
425
return FALSE;
426
}
427
428
static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,
429
kmp_int32 gtid) {
430
char const *const func = "omp_test_lock";
431
if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
432
__kmp_is_futex_lock_nestable(lck)) {
433
KMP_FATAL(LockNestableUsedAsSimple, func);
434
}
435
return __kmp_test_futex_lock(lck, gtid);
436
}
437
438
int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
439
KMP_MB(); /* Flush all pending memory write invalidates. */
440
441
KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",
442
lck, lck->lk.poll, gtid));
443
444
KMP_FSYNC_RELEASING(lck);
445
446
kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));
447
448
KA_TRACE(1000,
449
("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",
450
lck, gtid, poll_val));
451
452
if (KMP_LOCK_STRIP(poll_val) & 1) {
453
KA_TRACE(1000,
454
("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",
455
lck, gtid));
456
syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),
457
NULL, NULL, 0);
458
}
459
460
KMP_MB(); /* Flush all pending memory write invalidates. */
461
462
KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,
463
lck->lk.poll, gtid));
464
465
KMP_YIELD_OVERSUB();
466
return KMP_LOCK_RELEASED;
467
}
468
469
static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,
470
kmp_int32 gtid) {
471
char const *const func = "omp_unset_lock";
472
KMP_MB(); /* in case another processor initialized lock */
473
if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
474
__kmp_is_futex_lock_nestable(lck)) {
475
KMP_FATAL(LockNestableUsedAsSimple, func);
476
}
477
if (__kmp_get_futex_lock_owner(lck) == -1) {
478
KMP_FATAL(LockUnsettingFree, func);
479
}
480
if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&
481
(__kmp_get_futex_lock_owner(lck) != gtid)) {
482
KMP_FATAL(LockUnsettingSetByAnother, func);
483
}
484
return __kmp_release_futex_lock(lck, gtid);
485
}
486
487
void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {
488
TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));
489
}
490
491
void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }
492
493
static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {
494
char const *const func = "omp_destroy_lock";
495
if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&
496
__kmp_is_futex_lock_nestable(lck)) {
497
KMP_FATAL(LockNestableUsedAsSimple, func);
498
}
499
if (__kmp_get_futex_lock_owner(lck) != -1) {
500
KMP_FATAL(LockStillOwned, func);
501
}
502
__kmp_destroy_futex_lock(lck);
503
}
504
505
// nested futex locks
506
507
int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
508
KMP_DEBUG_ASSERT(gtid >= 0);
509
510
if (__kmp_get_futex_lock_owner(lck) == gtid) {
511
lck->lk.depth_locked += 1;
512
return KMP_LOCK_ACQUIRED_NEXT;
513
} else {
514
__kmp_acquire_futex_lock_timed_template(lck, gtid);
515
lck->lk.depth_locked = 1;
516
return KMP_LOCK_ACQUIRED_FIRST;
517
}
518
}
519
520
static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
521
kmp_int32 gtid) {
522
char const *const func = "omp_set_nest_lock";
523
if (!__kmp_is_futex_lock_nestable(lck)) {
524
KMP_FATAL(LockSimpleUsedAsNestable, func);
525
}
526
return __kmp_acquire_nested_futex_lock(lck, gtid);
527
}
528
529
int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
530
int retval;
531
532
KMP_DEBUG_ASSERT(gtid >= 0);
533
534
if (__kmp_get_futex_lock_owner(lck) == gtid) {
535
retval = ++lck->lk.depth_locked;
536
} else if (!__kmp_test_futex_lock(lck, gtid)) {
537
retval = 0;
538
} else {
539
KMP_MB();
540
retval = lck->lk.depth_locked = 1;
541
}
542
return retval;
543
}
544
545
static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
546
kmp_int32 gtid) {
547
char const *const func = "omp_test_nest_lock";
548
if (!__kmp_is_futex_lock_nestable(lck)) {
549
KMP_FATAL(LockSimpleUsedAsNestable, func);
550
}
551
return __kmp_test_nested_futex_lock(lck, gtid);
552
}
553
554
int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {
555
KMP_DEBUG_ASSERT(gtid >= 0);
556
557
KMP_MB();
558
if (--(lck->lk.depth_locked) == 0) {
559
__kmp_release_futex_lock(lck, gtid);
560
return KMP_LOCK_RELEASED;
561
}
562
return KMP_LOCK_STILL_HELD;
563
}
564
565
static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,
566
kmp_int32 gtid) {
567
char const *const func = "omp_unset_nest_lock";
568
KMP_MB(); /* in case another processor initialized lock */
569
if (!__kmp_is_futex_lock_nestable(lck)) {
570
KMP_FATAL(LockSimpleUsedAsNestable, func);
571
}
572
if (__kmp_get_futex_lock_owner(lck) == -1) {
573
KMP_FATAL(LockUnsettingFree, func);
574
}
575
if (__kmp_get_futex_lock_owner(lck) != gtid) {
576
KMP_FATAL(LockUnsettingSetByAnother, func);
577
}
578
return __kmp_release_nested_futex_lock(lck, gtid);
579
}
580
581
void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {
582
__kmp_init_futex_lock(lck);
583
lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
584
}
585
586
void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {
587
__kmp_destroy_futex_lock(lck);
588
lck->lk.depth_locked = 0;
589
}
590
591
static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
592
char const *const func = "omp_destroy_nest_lock";
593
if (!__kmp_is_futex_lock_nestable(lck)) {
594
KMP_FATAL(LockSimpleUsedAsNestable, func);
595
}
596
if (__kmp_get_futex_lock_owner(lck) != -1) {
597
KMP_FATAL(LockStillOwned, func);
598
}
599
__kmp_destroy_nested_futex_lock(lck);
600
}
601
602
#endif // KMP_USE_FUTEX
603
604
/* ------------------------------------------------------------------------ */
605
/* ticket (bakery) locks */
606
607
static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {
608
return std::atomic_load_explicit(&lck->lk.owner_id,
609
std::memory_order_relaxed) -
610
1;
611
}
612
613
static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {
614
return std::atomic_load_explicit(&lck->lk.depth_locked,
615
std::memory_order_relaxed) != -1;
616
}
617
618
static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {
619
return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,
620
std::memory_order_acquire) == my_ticket;
621
}
622
623
__forceinline static int
624
__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,
625
kmp_int32 gtid) {
626
kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(
627
&lck->lk.next_ticket, 1U, std::memory_order_relaxed);
628
629
#ifdef USE_LOCK_PROFILE
630
if (std::atomic_load_explicit(&lck->lk.now_serving,
631
std::memory_order_relaxed) != my_ticket)
632
__kmp_printf("LOCK CONTENTION: %p\n", lck);
633
/* else __kmp_printf( "." );*/
634
#endif /* USE_LOCK_PROFILE */
635
636
if (std::atomic_load_explicit(&lck->lk.now_serving,
637
std::memory_order_acquire) == my_ticket) {
638
return KMP_LOCK_ACQUIRED_FIRST;
639
}
640
KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);
641
return KMP_LOCK_ACQUIRED_FIRST;
642
}
643
644
int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
645
int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);
646
return retval;
647
}
648
649
static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
650
kmp_int32 gtid) {
651
char const *const func = "omp_set_lock";
652
653
if (!std::atomic_load_explicit(&lck->lk.initialized,
654
std::memory_order_relaxed)) {
655
KMP_FATAL(LockIsUninitialized, func);
656
}
657
if (lck->lk.self != lck) {
658
KMP_FATAL(LockIsUninitialized, func);
659
}
660
if (__kmp_is_ticket_lock_nestable(lck)) {
661
KMP_FATAL(LockNestableUsedAsSimple, func);
662
}
663
if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {
664
KMP_FATAL(LockIsAlreadyOwned, func);
665
}
666
667
__kmp_acquire_ticket_lock(lck, gtid);
668
669
std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
670
std::memory_order_relaxed);
671
return KMP_LOCK_ACQUIRED_FIRST;
672
}
673
674
int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
675
kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,
676
std::memory_order_relaxed);
677
678
if (std::atomic_load_explicit(&lck->lk.now_serving,
679
std::memory_order_relaxed) == my_ticket) {
680
kmp_uint32 next_ticket = my_ticket + 1;
681
if (std::atomic_compare_exchange_strong_explicit(
682
&lck->lk.next_ticket, &my_ticket, next_ticket,
683
std::memory_order_acquire, std::memory_order_acquire)) {
684
return TRUE;
685
}
686
}
687
return FALSE;
688
}
689
690
static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
691
kmp_int32 gtid) {
692
char const *const func = "omp_test_lock";
693
694
if (!std::atomic_load_explicit(&lck->lk.initialized,
695
std::memory_order_relaxed)) {
696
KMP_FATAL(LockIsUninitialized, func);
697
}
698
if (lck->lk.self != lck) {
699
KMP_FATAL(LockIsUninitialized, func);
700
}
701
if (__kmp_is_ticket_lock_nestable(lck)) {
702
KMP_FATAL(LockNestableUsedAsSimple, func);
703
}
704
705
int retval = __kmp_test_ticket_lock(lck, gtid);
706
707
if (retval) {
708
std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
709
std::memory_order_relaxed);
710
}
711
return retval;
712
}
713
714
int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
715
kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,
716
std::memory_order_relaxed) -
717
std::atomic_load_explicit(&lck->lk.now_serving,
718
std::memory_order_relaxed);
719
720
std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,
721
std::memory_order_release);
722
723
KMP_YIELD(distance >
724
(kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));
725
return KMP_LOCK_RELEASED;
726
}
727
728
static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
729
kmp_int32 gtid) {
730
char const *const func = "omp_unset_lock";
731
732
if (!std::atomic_load_explicit(&lck->lk.initialized,
733
std::memory_order_relaxed)) {
734
KMP_FATAL(LockIsUninitialized, func);
735
}
736
if (lck->lk.self != lck) {
737
KMP_FATAL(LockIsUninitialized, func);
738
}
739
if (__kmp_is_ticket_lock_nestable(lck)) {
740
KMP_FATAL(LockNestableUsedAsSimple, func);
741
}
742
if (__kmp_get_ticket_lock_owner(lck) == -1) {
743
KMP_FATAL(LockUnsettingFree, func);
744
}
745
if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&
746
(__kmp_get_ticket_lock_owner(lck) != gtid)) {
747
KMP_FATAL(LockUnsettingSetByAnother, func);
748
}
749
std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
750
return __kmp_release_ticket_lock(lck, gtid);
751
}
752
753
void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {
754
lck->lk.location = NULL;
755
lck->lk.self = lck;
756
std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
757
std::memory_order_relaxed);
758
std::atomic_store_explicit(&lck->lk.now_serving, 0U,
759
std::memory_order_relaxed);
760
std::atomic_store_explicit(
761
&lck->lk.owner_id, 0,
762
std::memory_order_relaxed); // no thread owns the lock.
763
std::atomic_store_explicit(
764
&lck->lk.depth_locked, -1,
765
std::memory_order_relaxed); // -1 => not a nested lock.
766
std::atomic_store_explicit(&lck->lk.initialized, true,
767
std::memory_order_release);
768
}
769
770
void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {
771
std::atomic_store_explicit(&lck->lk.initialized, false,
772
std::memory_order_release);
773
lck->lk.self = NULL;
774
lck->lk.location = NULL;
775
std::atomic_store_explicit(&lck->lk.next_ticket, 0U,
776
std::memory_order_relaxed);
777
std::atomic_store_explicit(&lck->lk.now_serving, 0U,
778
std::memory_order_relaxed);
779
std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
780
std::atomic_store_explicit(&lck->lk.depth_locked, -1,
781
std::memory_order_relaxed);
782
}
783
784
static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
785
char const *const func = "omp_destroy_lock";
786
787
if (!std::atomic_load_explicit(&lck->lk.initialized,
788
std::memory_order_relaxed)) {
789
KMP_FATAL(LockIsUninitialized, func);
790
}
791
if (lck->lk.self != lck) {
792
KMP_FATAL(LockIsUninitialized, func);
793
}
794
if (__kmp_is_ticket_lock_nestable(lck)) {
795
KMP_FATAL(LockNestableUsedAsSimple, func);
796
}
797
if (__kmp_get_ticket_lock_owner(lck) != -1) {
798
KMP_FATAL(LockStillOwned, func);
799
}
800
__kmp_destroy_ticket_lock(lck);
801
}
802
803
// nested ticket locks
804
805
int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
806
KMP_DEBUG_ASSERT(gtid >= 0);
807
808
if (__kmp_get_ticket_lock_owner(lck) == gtid) {
809
std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
810
std::memory_order_relaxed);
811
return KMP_LOCK_ACQUIRED_NEXT;
812
} else {
813
__kmp_acquire_ticket_lock_timed_template(lck, gtid);
814
std::atomic_store_explicit(&lck->lk.depth_locked, 1,
815
std::memory_order_relaxed);
816
std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
817
std::memory_order_relaxed);
818
return KMP_LOCK_ACQUIRED_FIRST;
819
}
820
}
821
822
static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
823
kmp_int32 gtid) {
824
char const *const func = "omp_set_nest_lock";
825
826
if (!std::atomic_load_explicit(&lck->lk.initialized,
827
std::memory_order_relaxed)) {
828
KMP_FATAL(LockIsUninitialized, func);
829
}
830
if (lck->lk.self != lck) {
831
KMP_FATAL(LockIsUninitialized, func);
832
}
833
if (!__kmp_is_ticket_lock_nestable(lck)) {
834
KMP_FATAL(LockSimpleUsedAsNestable, func);
835
}
836
return __kmp_acquire_nested_ticket_lock(lck, gtid);
837
}
838
839
int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
840
int retval;
841
842
KMP_DEBUG_ASSERT(gtid >= 0);
843
844
if (__kmp_get_ticket_lock_owner(lck) == gtid) {
845
retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,
846
std::memory_order_relaxed) +
847
1;
848
} else if (!__kmp_test_ticket_lock(lck, gtid)) {
849
retval = 0;
850
} else {
851
std::atomic_store_explicit(&lck->lk.depth_locked, 1,
852
std::memory_order_relaxed);
853
std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,
854
std::memory_order_relaxed);
855
retval = 1;
856
}
857
return retval;
858
}
859
860
static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
861
kmp_int32 gtid) {
862
char const *const func = "omp_test_nest_lock";
863
864
if (!std::atomic_load_explicit(&lck->lk.initialized,
865
std::memory_order_relaxed)) {
866
KMP_FATAL(LockIsUninitialized, func);
867
}
868
if (lck->lk.self != lck) {
869
KMP_FATAL(LockIsUninitialized, func);
870
}
871
if (!__kmp_is_ticket_lock_nestable(lck)) {
872
KMP_FATAL(LockSimpleUsedAsNestable, func);
873
}
874
return __kmp_test_nested_ticket_lock(lck, gtid);
875
}
876
877
int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {
878
KMP_DEBUG_ASSERT(gtid >= 0);
879
880
if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,
881
std::memory_order_relaxed) -
882
1) == 0) {
883
std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);
884
__kmp_release_ticket_lock(lck, gtid);
885
return KMP_LOCK_RELEASED;
886
}
887
return KMP_LOCK_STILL_HELD;
888
}
889
890
static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,
891
kmp_int32 gtid) {
892
char const *const func = "omp_unset_nest_lock";
893
894
if (!std::atomic_load_explicit(&lck->lk.initialized,
895
std::memory_order_relaxed)) {
896
KMP_FATAL(LockIsUninitialized, func);
897
}
898
if (lck->lk.self != lck) {
899
KMP_FATAL(LockIsUninitialized, func);
900
}
901
if (!__kmp_is_ticket_lock_nestable(lck)) {
902
KMP_FATAL(LockSimpleUsedAsNestable, func);
903
}
904
if (__kmp_get_ticket_lock_owner(lck) == -1) {
905
KMP_FATAL(LockUnsettingFree, func);
906
}
907
if (__kmp_get_ticket_lock_owner(lck) != gtid) {
908
KMP_FATAL(LockUnsettingSetByAnother, func);
909
}
910
return __kmp_release_nested_ticket_lock(lck, gtid);
911
}
912
913
void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {
914
__kmp_init_ticket_lock(lck);
915
std::atomic_store_explicit(&lck->lk.depth_locked, 0,
916
std::memory_order_relaxed);
917
// >= 0 for nestable locks, -1 for simple locks
918
}
919
920
void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {
921
__kmp_destroy_ticket_lock(lck);
922
std::atomic_store_explicit(&lck->lk.depth_locked, 0,
923
std::memory_order_relaxed);
924
}
925
926
static void
927
__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
928
char const *const func = "omp_destroy_nest_lock";
929
930
if (!std::atomic_load_explicit(&lck->lk.initialized,
931
std::memory_order_relaxed)) {
932
KMP_FATAL(LockIsUninitialized, func);
933
}
934
if (lck->lk.self != lck) {
935
KMP_FATAL(LockIsUninitialized, func);
936
}
937
if (!__kmp_is_ticket_lock_nestable(lck)) {
938
KMP_FATAL(LockSimpleUsedAsNestable, func);
939
}
940
if (__kmp_get_ticket_lock_owner(lck) != -1) {
941
KMP_FATAL(LockStillOwned, func);
942
}
943
__kmp_destroy_nested_ticket_lock(lck);
944
}
945
946
// access functions to fields which don't exist for all lock kinds.
947
948
static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {
949
return lck->lk.location;
950
}
951
952
static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,
953
const ident_t *loc) {
954
lck->lk.location = loc;
955
}
956
957
static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {
958
return lck->lk.flags;
959
}
960
961
static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,
962
kmp_lock_flags_t flags) {
963
lck->lk.flags = flags;
964
}
965
966
/* ------------------------------------------------------------------------ */
967
/* queuing locks */
968
969
/* First the states
970
(head,tail) = 0, 0 means lock is unheld, nobody on queue
971
UINT_MAX or -1, 0 means lock is held, nobody on queue
972
h, h means lock held or about to transition,
973
1 element on queue
974
h, t h <> t, means lock is held or about to
975
transition, >1 elements on queue
976
977
Now the transitions
978
Acquire(0,0) = -1 ,0
979
Release(0,0) = Error
980
Acquire(-1,0) = h ,h h > 0
981
Release(-1,0) = 0 ,0
982
Acquire(h,h) = h ,t h > 0, t > 0, h <> t
983
Release(h,h) = -1 ,0 h > 0
984
Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'
985
Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t
986
987
And pictorially
988
989
+-----+
990
| 0, 0|------- release -------> Error
991
+-----+
992
| ^
993
acquire| |release
994
| |
995
| |
996
v |
997
+-----+
998
|-1, 0|
999
+-----+
1000
| ^
1001
acquire| |release
1002
| |
1003
| |
1004
v |
1005
+-----+
1006
| h, h|
1007
+-----+
1008
| ^
1009
acquire| |release
1010
| |
1011
| |
1012
v |
1013
+-----+
1014
| h, t|----- acquire, release loopback ---+
1015
+-----+ |
1016
^ |
1017
| |
1018
+------------------------------------+
1019
*/
1020
1021
#ifdef DEBUG_QUEUING_LOCKS
1022
1023
/* Stuff for circular trace buffer */
1024
#define TRACE_BUF_ELE 1024
1025
static char traces[TRACE_BUF_ELE][128] = {0};
1026
static int tc = 0;
1027
#define TRACE_LOCK(X, Y) \
1028
KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);
1029
#define TRACE_LOCK_T(X, Y, Z) \
1030
KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);
1031
#define TRACE_LOCK_HT(X, Y, Z, Q) \
1032
KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \
1033
Z, Q);
1034
1035
static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,
1036
kmp_queuing_lock_t *lck, kmp_int32 head_id,
1037
kmp_int32 tail_id) {
1038
kmp_int32 t, i;
1039
1040
__kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");
1041
1042
i = tc % TRACE_BUF_ELE;
1043
__kmp_printf_no_lock("%s\n", traces[i]);
1044
i = (i + 1) % TRACE_BUF_ELE;
1045
while (i != (tc % TRACE_BUF_ELE)) {
1046
__kmp_printf_no_lock("%s", traces[i]);
1047
i = (i + 1) % TRACE_BUF_ELE;
1048
}
1049
__kmp_printf_no_lock("\n");
1050
1051
__kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "
1052
"next_wait:%d, head_id:%d, tail_id:%d\n",
1053
gtid + 1, this_thr->th.th_spin_here,
1054
this_thr->th.th_next_waiting, head_id, tail_id);
1055
1056
__kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);
1057
1058
if (lck->lk.head_id >= 1) {
1059
t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;
1060
while (t > 0) {
1061
__kmp_printf_no_lock("-> %d ", t);
1062
t = __kmp_threads[t - 1]->th.th_next_waiting;
1063
}
1064
}
1065
__kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);
1066
__kmp_printf_no_lock("\n\n");
1067
}
1068
1069
#endif /* DEBUG_QUEUING_LOCKS */
1070
1071
static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {
1072
return TCR_4(lck->lk.owner_id) - 1;
1073
}
1074
1075
static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {
1076
return lck->lk.depth_locked != -1;
1077
}
1078
1079
/* Acquire a lock using a the queuing lock implementation */
1080
template <bool takeTime>
1081
/* [TLW] The unused template above is left behind because of what BEB believes
1082
is a potential compiler problem with __forceinline. */
1083
__forceinline static int
1084
__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
1085
kmp_int32 gtid) {
1086
kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1087
volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1088
volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1089
volatile kmp_uint32 *spin_here_p;
1090
1091
#if OMPT_SUPPORT
1092
ompt_state_t prev_state = ompt_state_undefined;
1093
#endif
1094
1095
KA_TRACE(1000,
1096
("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1097
1098
KMP_FSYNC_PREPARE(lck);
1099
KMP_DEBUG_ASSERT(this_thr != NULL);
1100
spin_here_p = &this_thr->th.th_spin_here;
1101
1102
#ifdef DEBUG_QUEUING_LOCKS
1103
TRACE_LOCK(gtid + 1, "acq ent");
1104
if (*spin_here_p)
1105
__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1106
if (this_thr->th.th_next_waiting != 0)
1107
__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1108
#endif
1109
KMP_DEBUG_ASSERT(!*spin_here_p);
1110
KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1111
1112
/* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to
1113
head_id_p that may follow, not just in execution order, but also in
1114
visibility order. This way, when a releasing thread observes the changes to
1115
the queue by this thread, it can rightly assume that spin_here_p has
1116
already been set to TRUE, so that when it sets spin_here_p to FALSE, it is
1117
not premature. If the releasing thread sets spin_here_p to FALSE before
1118
this thread sets it to TRUE, this thread will hang. */
1119
*spin_here_p = TRUE; /* before enqueuing to prevent race */
1120
1121
while (1) {
1122
kmp_int32 enqueued;
1123
kmp_int32 head;
1124
kmp_int32 tail;
1125
1126
head = *head_id_p;
1127
1128
switch (head) {
1129
1130
case -1: {
1131
#ifdef DEBUG_QUEUING_LOCKS
1132
tail = *tail_id_p;
1133
TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1134
#endif
1135
tail = 0; /* to make sure next link asynchronously read is not set
1136
accidentally; this assignment prevents us from entering the
1137
if ( t > 0 ) condition in the enqueued case below, which is not
1138
necessary for this state transition */
1139
1140
/* try (-1,0)->(tid,tid) */
1141
enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,
1142
KMP_PACK_64(-1, 0),
1143
KMP_PACK_64(gtid + 1, gtid + 1));
1144
#ifdef DEBUG_QUEUING_LOCKS
1145
if (enqueued)
1146
TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");
1147
#endif
1148
} break;
1149
1150
default: {
1151
tail = *tail_id_p;
1152
KMP_DEBUG_ASSERT(tail != gtid + 1);
1153
1154
#ifdef DEBUG_QUEUING_LOCKS
1155
TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1156
#endif
1157
1158
if (tail == 0) {
1159
enqueued = FALSE;
1160
} else {
1161
/* try (h,t) or (h,h)->(h,tid) */
1162
enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);
1163
1164
#ifdef DEBUG_QUEUING_LOCKS
1165
if (enqueued)
1166
TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");
1167
#endif
1168
}
1169
} break;
1170
1171
case 0: /* empty queue */
1172
{
1173
kmp_int32 grabbed_lock;
1174
1175
#ifdef DEBUG_QUEUING_LOCKS
1176
tail = *tail_id_p;
1177
TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);
1178
#endif
1179
/* try (0,0)->(-1,0) */
1180
1181
/* only legal transition out of head = 0 is head = -1 with no change to
1182
* tail */
1183
grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);
1184
1185
if (grabbed_lock) {
1186
1187
*spin_here_p = FALSE;
1188
1189
KA_TRACE(
1190
1000,
1191
("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",
1192
lck, gtid));
1193
#ifdef DEBUG_QUEUING_LOCKS
1194
TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);
1195
#endif
1196
1197
#if OMPT_SUPPORT
1198
if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
1199
/* change the state before clearing wait_id */
1200
this_thr->th.ompt_thread_info.state = prev_state;
1201
this_thr->th.ompt_thread_info.wait_id = 0;
1202
}
1203
#endif
1204
1205
KMP_FSYNC_ACQUIRED(lck);
1206
return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */
1207
}
1208
enqueued = FALSE;
1209
} break;
1210
}
1211
1212
#if OMPT_SUPPORT
1213
if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
1214
/* this thread will spin; set wait_id before entering wait state */
1215
prev_state = this_thr->th.ompt_thread_info.state;
1216
this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
1217
this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
1218
}
1219
#endif
1220
1221
if (enqueued) {
1222
if (tail > 0) {
1223
kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);
1224
KMP_ASSERT(tail_thr != NULL);
1225
tail_thr->th.th_next_waiting = gtid + 1;
1226
/* corresponding wait for this write in release code */
1227
}
1228
KA_TRACE(1000,
1229
("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",
1230
lck, gtid));
1231
1232
KMP_MB();
1233
// ToDo: Use __kmp_wait_sleep or similar when blocktime != inf
1234
KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);
1235
// Synchronize writes to both runtime thread structures
1236
// and writes in user code.
1237
KMP_MB();
1238
1239
#ifdef DEBUG_QUEUING_LOCKS
1240
TRACE_LOCK(gtid + 1, "acq spin");
1241
1242
if (this_thr->th.th_next_waiting != 0)
1243
__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1244
#endif
1245
KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1246
KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "
1247
"waiting on queue\n",
1248
lck, gtid));
1249
1250
#ifdef DEBUG_QUEUING_LOCKS
1251
TRACE_LOCK(gtid + 1, "acq exit 2");
1252
#endif
1253
1254
#if OMPT_SUPPORT
1255
/* change the state before clearing wait_id */
1256
this_thr->th.ompt_thread_info.state = prev_state;
1257
this_thr->th.ompt_thread_info.wait_id = 0;
1258
#endif
1259
1260
/* got lock, we were dequeued by the thread that released lock */
1261
return KMP_LOCK_ACQUIRED_FIRST;
1262
}
1263
1264
/* Yield if number of threads > number of logical processors */
1265
/* ToDo: Not sure why this should only be in oversubscription case,
1266
maybe should be traditional YIELD_INIT/YIELD_WHEN loop */
1267
KMP_YIELD_OVERSUB();
1268
1269
#ifdef DEBUG_QUEUING_LOCKS
1270
TRACE_LOCK(gtid + 1, "acq retry");
1271
#endif
1272
}
1273
KMP_ASSERT2(0, "should not get here");
1274
return KMP_LOCK_ACQUIRED_FIRST;
1275
}
1276
1277
int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1278
KMP_DEBUG_ASSERT(gtid >= 0);
1279
1280
int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1281
return retval;
1282
}
1283
1284
static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1285
kmp_int32 gtid) {
1286
char const *const func = "omp_set_lock";
1287
if (lck->lk.initialized != lck) {
1288
KMP_FATAL(LockIsUninitialized, func);
1289
}
1290
if (__kmp_is_queuing_lock_nestable(lck)) {
1291
KMP_FATAL(LockNestableUsedAsSimple, func);
1292
}
1293
if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1294
KMP_FATAL(LockIsAlreadyOwned, func);
1295
}
1296
1297
__kmp_acquire_queuing_lock(lck, gtid);
1298
1299
lck->lk.owner_id = gtid + 1;
1300
return KMP_LOCK_ACQUIRED_FIRST;
1301
}
1302
1303
int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1304
volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1305
kmp_int32 head;
1306
#ifdef KMP_DEBUG
1307
kmp_info_t *this_thr;
1308
#endif
1309
1310
KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));
1311
KMP_DEBUG_ASSERT(gtid >= 0);
1312
#ifdef KMP_DEBUG
1313
this_thr = __kmp_thread_from_gtid(gtid);
1314
KMP_DEBUG_ASSERT(this_thr != NULL);
1315
KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1316
#endif
1317
1318
head = *head_id_p;
1319
1320
if (head == 0) { /* nobody on queue, nobody holding */
1321
/* try (0,0)->(-1,0) */
1322
if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {
1323
KA_TRACE(1000,
1324
("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));
1325
KMP_FSYNC_ACQUIRED(lck);
1326
return TRUE;
1327
}
1328
}
1329
1330
KA_TRACE(1000,
1331
("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));
1332
return FALSE;
1333
}
1334
1335
static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1336
kmp_int32 gtid) {
1337
char const *const func = "omp_test_lock";
1338
if (lck->lk.initialized != lck) {
1339
KMP_FATAL(LockIsUninitialized, func);
1340
}
1341
if (__kmp_is_queuing_lock_nestable(lck)) {
1342
KMP_FATAL(LockNestableUsedAsSimple, func);
1343
}
1344
1345
int retval = __kmp_test_queuing_lock(lck, gtid);
1346
1347
if (retval) {
1348
lck->lk.owner_id = gtid + 1;
1349
}
1350
return retval;
1351
}
1352
1353
int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1354
volatile kmp_int32 *head_id_p = &lck->lk.head_id;
1355
volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;
1356
1357
KA_TRACE(1000,
1358
("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));
1359
KMP_DEBUG_ASSERT(gtid >= 0);
1360
#if KMP_DEBUG || DEBUG_QUEUING_LOCKS
1361
kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);
1362
#endif
1363
KMP_DEBUG_ASSERT(this_thr != NULL);
1364
#ifdef DEBUG_QUEUING_LOCKS
1365
TRACE_LOCK(gtid + 1, "rel ent");
1366
1367
if (this_thr->th.th_spin_here)
1368
__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1369
if (this_thr->th.th_next_waiting != 0)
1370
__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);
1371
#endif
1372
KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
1373
KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
1374
1375
KMP_FSYNC_RELEASING(lck);
1376
1377
while (1) {
1378
kmp_int32 dequeued;
1379
kmp_int32 head;
1380
kmp_int32 tail;
1381
1382
head = *head_id_p;
1383
1384
#ifdef DEBUG_QUEUING_LOCKS
1385
tail = *tail_id_p;
1386
TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);
1387
if (head == 0)
1388
__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1389
#endif
1390
KMP_DEBUG_ASSERT(head !=
1391
0); /* holding the lock, head must be -1 or queue head */
1392
1393
if (head == -1) { /* nobody on queue */
1394
/* try (-1,0)->(0,0) */
1395
if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {
1396
KA_TRACE(
1397
1000,
1398
("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",
1399
lck, gtid));
1400
#ifdef DEBUG_QUEUING_LOCKS
1401
TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);
1402
#endif
1403
1404
#if OMPT_SUPPORT
1405
/* nothing to do - no other thread is trying to shift blame */
1406
#endif
1407
return KMP_LOCK_RELEASED;
1408
}
1409
dequeued = FALSE;
1410
} else {
1411
KMP_MB();
1412
tail = *tail_id_p;
1413
if (head == tail) { /* only one thread on the queue */
1414
#ifdef DEBUG_QUEUING_LOCKS
1415
if (head <= 0)
1416
__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1417
#endif
1418
KMP_DEBUG_ASSERT(head > 0);
1419
1420
/* try (h,h)->(-1,0) */
1421
dequeued = KMP_COMPARE_AND_STORE_REL64(
1422
RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),
1423
KMP_PACK_64(-1, 0));
1424
#ifdef DEBUG_QUEUING_LOCKS
1425
TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");
1426
#endif
1427
1428
} else {
1429
volatile kmp_int32 *waiting_id_p;
1430
kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1431
KMP_DEBUG_ASSERT(head_thr != NULL);
1432
waiting_id_p = &head_thr->th.th_next_waiting;
1433
1434
/* Does this require synchronous reads? */
1435
#ifdef DEBUG_QUEUING_LOCKS
1436
if (head <= 0 || tail <= 0)
1437
__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1438
#endif
1439
KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1440
1441
/* try (h,t)->(h',t) or (t,t) */
1442
KMP_MB();
1443
/* make sure enqueuing thread has time to update next waiting thread
1444
* field */
1445
*head_id_p =
1446
KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);
1447
#ifdef DEBUG_QUEUING_LOCKS
1448
TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");
1449
#endif
1450
dequeued = TRUE;
1451
}
1452
}
1453
1454
if (dequeued) {
1455
kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);
1456
KMP_DEBUG_ASSERT(head_thr != NULL);
1457
1458
/* Does this require synchronous reads? */
1459
#ifdef DEBUG_QUEUING_LOCKS
1460
if (head <= 0 || tail <= 0)
1461
__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);
1462
#endif
1463
KMP_DEBUG_ASSERT(head > 0 && tail > 0);
1464
1465
/* For clean code only. Thread not released until next statement prevents
1466
race with acquire code. */
1467
head_thr->th.th_next_waiting = 0;
1468
#ifdef DEBUG_QUEUING_LOCKS
1469
TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);
1470
#endif
1471
1472
KMP_MB();
1473
/* reset spin value */
1474
head_thr->th.th_spin_here = FALSE;
1475
1476
KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "
1477
"dequeuing\n",
1478
lck, gtid));
1479
#ifdef DEBUG_QUEUING_LOCKS
1480
TRACE_LOCK(gtid + 1, "rel exit 2");
1481
#endif
1482
return KMP_LOCK_RELEASED;
1483
}
1484
/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring
1485
threads */
1486
1487
#ifdef DEBUG_QUEUING_LOCKS
1488
TRACE_LOCK(gtid + 1, "rel retry");
1489
#endif
1490
1491
} /* while */
1492
KMP_ASSERT2(0, "should not get here");
1493
return KMP_LOCK_RELEASED;
1494
}
1495
1496
static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1497
kmp_int32 gtid) {
1498
char const *const func = "omp_unset_lock";
1499
KMP_MB(); /* in case another processor initialized lock */
1500
if (lck->lk.initialized != lck) {
1501
KMP_FATAL(LockIsUninitialized, func);
1502
}
1503
if (__kmp_is_queuing_lock_nestable(lck)) {
1504
KMP_FATAL(LockNestableUsedAsSimple, func);
1505
}
1506
if (__kmp_get_queuing_lock_owner(lck) == -1) {
1507
KMP_FATAL(LockUnsettingFree, func);
1508
}
1509
if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1510
KMP_FATAL(LockUnsettingSetByAnother, func);
1511
}
1512
lck->lk.owner_id = 0;
1513
return __kmp_release_queuing_lock(lck, gtid);
1514
}
1515
1516
void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {
1517
lck->lk.location = NULL;
1518
lck->lk.head_id = 0;
1519
lck->lk.tail_id = 0;
1520
lck->lk.next_ticket = 0;
1521
lck->lk.now_serving = 0;
1522
lck->lk.owner_id = 0; // no thread owns the lock.
1523
lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
1524
lck->lk.initialized = lck;
1525
1526
KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));
1527
}
1528
1529
void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {
1530
lck->lk.initialized = NULL;
1531
lck->lk.location = NULL;
1532
lck->lk.head_id = 0;
1533
lck->lk.tail_id = 0;
1534
lck->lk.next_ticket = 0;
1535
lck->lk.now_serving = 0;
1536
lck->lk.owner_id = 0;
1537
lck->lk.depth_locked = -1;
1538
}
1539
1540
static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1541
char const *const func = "omp_destroy_lock";
1542
if (lck->lk.initialized != lck) {
1543
KMP_FATAL(LockIsUninitialized, func);
1544
}
1545
if (__kmp_is_queuing_lock_nestable(lck)) {
1546
KMP_FATAL(LockNestableUsedAsSimple, func);
1547
}
1548
if (__kmp_get_queuing_lock_owner(lck) != -1) {
1549
KMP_FATAL(LockStillOwned, func);
1550
}
1551
__kmp_destroy_queuing_lock(lck);
1552
}
1553
1554
// nested queuing locks
1555
1556
int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1557
KMP_DEBUG_ASSERT(gtid >= 0);
1558
1559
if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1560
lck->lk.depth_locked += 1;
1561
return KMP_LOCK_ACQUIRED_NEXT;
1562
} else {
1563
__kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);
1564
KMP_MB();
1565
lck->lk.depth_locked = 1;
1566
KMP_MB();
1567
lck->lk.owner_id = gtid + 1;
1568
return KMP_LOCK_ACQUIRED_FIRST;
1569
}
1570
}
1571
1572
static int
1573
__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1574
kmp_int32 gtid) {
1575
char const *const func = "omp_set_nest_lock";
1576
if (lck->lk.initialized != lck) {
1577
KMP_FATAL(LockIsUninitialized, func);
1578
}
1579
if (!__kmp_is_queuing_lock_nestable(lck)) {
1580
KMP_FATAL(LockSimpleUsedAsNestable, func);
1581
}
1582
return __kmp_acquire_nested_queuing_lock(lck, gtid);
1583
}
1584
1585
int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1586
int retval;
1587
1588
KMP_DEBUG_ASSERT(gtid >= 0);
1589
1590
if (__kmp_get_queuing_lock_owner(lck) == gtid) {
1591
retval = ++lck->lk.depth_locked;
1592
} else if (!__kmp_test_queuing_lock(lck, gtid)) {
1593
retval = 0;
1594
} else {
1595
KMP_MB();
1596
retval = lck->lk.depth_locked = 1;
1597
KMP_MB();
1598
lck->lk.owner_id = gtid + 1;
1599
}
1600
return retval;
1601
}
1602
1603
static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1604
kmp_int32 gtid) {
1605
char const *const func = "omp_test_nest_lock";
1606
if (lck->lk.initialized != lck) {
1607
KMP_FATAL(LockIsUninitialized, func);
1608
}
1609
if (!__kmp_is_queuing_lock_nestable(lck)) {
1610
KMP_FATAL(LockSimpleUsedAsNestable, func);
1611
}
1612
return __kmp_test_nested_queuing_lock(lck, gtid);
1613
}
1614
1615
int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {
1616
KMP_DEBUG_ASSERT(gtid >= 0);
1617
1618
KMP_MB();
1619
if (--(lck->lk.depth_locked) == 0) {
1620
KMP_MB();
1621
lck->lk.owner_id = 0;
1622
__kmp_release_queuing_lock(lck, gtid);
1623
return KMP_LOCK_RELEASED;
1624
}
1625
return KMP_LOCK_STILL_HELD;
1626
}
1627
1628
static int
1629
__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
1630
kmp_int32 gtid) {
1631
char const *const func = "omp_unset_nest_lock";
1632
KMP_MB(); /* in case another processor initialized lock */
1633
if (lck->lk.initialized != lck) {
1634
KMP_FATAL(LockIsUninitialized, func);
1635
}
1636
if (!__kmp_is_queuing_lock_nestable(lck)) {
1637
KMP_FATAL(LockSimpleUsedAsNestable, func);
1638
}
1639
if (__kmp_get_queuing_lock_owner(lck) == -1) {
1640
KMP_FATAL(LockUnsettingFree, func);
1641
}
1642
if (__kmp_get_queuing_lock_owner(lck) != gtid) {
1643
KMP_FATAL(LockUnsettingSetByAnother, func);
1644
}
1645
return __kmp_release_nested_queuing_lock(lck, gtid);
1646
}
1647
1648
void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1649
__kmp_init_queuing_lock(lck);
1650
lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
1651
}
1652
1653
void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {
1654
__kmp_destroy_queuing_lock(lck);
1655
lck->lk.depth_locked = 0;
1656
}
1657
1658
static void
1659
__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
1660
char const *const func = "omp_destroy_nest_lock";
1661
if (lck->lk.initialized != lck) {
1662
KMP_FATAL(LockIsUninitialized, func);
1663
}
1664
if (!__kmp_is_queuing_lock_nestable(lck)) {
1665
KMP_FATAL(LockSimpleUsedAsNestable, func);
1666
}
1667
if (__kmp_get_queuing_lock_owner(lck) != -1) {
1668
KMP_FATAL(LockStillOwned, func);
1669
}
1670
__kmp_destroy_nested_queuing_lock(lck);
1671
}
1672
1673
// access functions to fields which don't exist for all lock kinds.
1674
1675
static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {
1676
return lck->lk.location;
1677
}
1678
1679
static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,
1680
const ident_t *loc) {
1681
lck->lk.location = loc;
1682
}
1683
1684
static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {
1685
return lck->lk.flags;
1686
}
1687
1688
static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
1689
kmp_lock_flags_t flags) {
1690
lck->lk.flags = flags;
1691
}
1692
1693
#if KMP_USE_ADAPTIVE_LOCKS
1694
1695
/* RTM Adaptive locks */
1696
1697
#if KMP_HAVE_RTM_INTRINSICS
1698
#include <immintrin.h>
1699
#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1700
1701
#else
1702
1703
// Values from the status register after failed speculation.
1704
#define _XBEGIN_STARTED (~0u)
1705
#define _XABORT_EXPLICIT (1 << 0)
1706
#define _XABORT_RETRY (1 << 1)
1707
#define _XABORT_CONFLICT (1 << 2)
1708
#define _XABORT_CAPACITY (1 << 3)
1709
#define _XABORT_DEBUG (1 << 4)
1710
#define _XABORT_NESTED (1 << 5)
1711
#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))
1712
1713
// Aborts for which it's worth trying again immediately
1714
#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
1715
1716
#define STRINGIZE_INTERNAL(arg) #arg
1717
#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)
1718
1719
// Access to RTM instructions
1720
/*A version of XBegin which returns -1 on speculation, and the value of EAX on
1721
an abort. This is the same definition as the compiler intrinsic that will be
1722
supported at some point. */
1723
static __inline int _xbegin() {
1724
int res = -1;
1725
1726
#if KMP_OS_WINDOWS
1727
#if KMP_ARCH_X86_64
1728
_asm {
1729
_emit 0xC7
1730
_emit 0xF8
1731
_emit 2
1732
_emit 0
1733
_emit 0
1734
_emit 0
1735
jmp L2
1736
mov res, eax
1737
L2:
1738
}
1739
#else /* IA32 */
1740
_asm {
1741
_emit 0xC7
1742
_emit 0xF8
1743
_emit 2
1744
_emit 0
1745
_emit 0
1746
_emit 0
1747
jmp L2
1748
mov res, eax
1749
L2:
1750
}
1751
#endif // KMP_ARCH_X86_64
1752
#else
1753
/* Note that %eax must be noted as killed (clobbered), because the XSR is
1754
returned in %eax(%rax) on abort. Other register values are restored, so
1755
don't need to be killed.
1756
1757
We must also mark 'res' as an input and an output, since otherwise
1758
'res=-1' may be dropped as being dead, whereas we do need the assignment on
1759
the successful (i.e., non-abort) path. */
1760
__asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"
1761
" .long 1f-1b-6\n"
1762
" jmp 2f\n"
1763
"1: movl %%eax,%0\n"
1764
"2:"
1765
: "+r"(res)::"memory", "%eax");
1766
#endif // KMP_OS_WINDOWS
1767
return res;
1768
}
1769
1770
/* Transaction end */
1771
static __inline void _xend() {
1772
#if KMP_OS_WINDOWS
1773
__asm {
1774
_emit 0x0f
1775
_emit 0x01
1776
_emit 0xd5
1777
}
1778
#else
1779
__asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");
1780
#endif
1781
}
1782
1783
/* This is a macro, the argument must be a single byte constant which can be
1784
evaluated by the inline assembler, since it is emitted as a byte into the
1785
assembly code. */
1786
// clang-format off
1787
#if KMP_OS_WINDOWS
1788
#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG
1789
#else
1790
#define _xabort(ARG) \
1791
__asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");
1792
#endif
1793
// clang-format on
1794
#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
1795
1796
// Statistics is collected for testing purpose
1797
#if KMP_DEBUG_ADAPTIVE_LOCKS
1798
1799
// We accumulate speculative lock statistics when the lock is destroyed. We
1800
// keep locks that haven't been destroyed in the liveLocks list so that we can
1801
// grab their statistics too.
1802
static kmp_adaptive_lock_statistics_t destroyedStats;
1803
1804
// To hold the list of live locks.
1805
static kmp_adaptive_lock_info_t liveLocks;
1806
1807
// A lock so we can safely update the list of locks.
1808
static kmp_bootstrap_lock_t chain_lock =
1809
KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);
1810
1811
// Initialize the list of stats.
1812
void __kmp_init_speculative_stats() {
1813
kmp_adaptive_lock_info_t *lck = &liveLocks;
1814
1815
memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,
1816
sizeof(lck->stats));
1817
lck->stats.next = lck;
1818
lck->stats.prev = lck;
1819
1820
KMP_ASSERT(lck->stats.next->stats.prev == lck);
1821
KMP_ASSERT(lck->stats.prev->stats.next == lck);
1822
1823
__kmp_init_bootstrap_lock(&chain_lock);
1824
}
1825
1826
// Insert the lock into the circular list
1827
static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {
1828
__kmp_acquire_bootstrap_lock(&chain_lock);
1829
1830
lck->stats.next = liveLocks.stats.next;
1831
lck->stats.prev = &liveLocks;
1832
1833
liveLocks.stats.next = lck;
1834
lck->stats.next->stats.prev = lck;
1835
1836
KMP_ASSERT(lck->stats.next->stats.prev == lck);
1837
KMP_ASSERT(lck->stats.prev->stats.next == lck);
1838
1839
__kmp_release_bootstrap_lock(&chain_lock);
1840
}
1841
1842
static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {
1843
KMP_ASSERT(lck->stats.next->stats.prev == lck);
1844
KMP_ASSERT(lck->stats.prev->stats.next == lck);
1845
1846
kmp_adaptive_lock_info_t *n = lck->stats.next;
1847
kmp_adaptive_lock_info_t *p = lck->stats.prev;
1848
1849
n->stats.prev = p;
1850
p->stats.next = n;
1851
}
1852
1853
static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1854
memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,
1855
sizeof(lck->stats));
1856
__kmp_remember_lock(lck);
1857
}
1858
1859
static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,
1860
kmp_adaptive_lock_info_t *lck) {
1861
kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;
1862
1863
t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;
1864
t->successfulSpeculations += s->successfulSpeculations;
1865
t->hardFailedSpeculations += s->hardFailedSpeculations;
1866
t->softFailedSpeculations += s->softFailedSpeculations;
1867
t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;
1868
t->lemmingYields += s->lemmingYields;
1869
}
1870
1871
static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {
1872
__kmp_acquire_bootstrap_lock(&chain_lock);
1873
1874
__kmp_add_stats(&destroyedStats, lck);
1875
__kmp_forget_lock(lck);
1876
1877
__kmp_release_bootstrap_lock(&chain_lock);
1878
}
1879
1880
static float percent(kmp_uint32 count, kmp_uint32 total) {
1881
return (total == 0) ? 0.0 : (100.0 * count) / total;
1882
}
1883
1884
void __kmp_print_speculative_stats() {
1885
kmp_adaptive_lock_statistics_t total = destroyedStats;
1886
kmp_adaptive_lock_info_t *lck;
1887
1888
for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {
1889
__kmp_add_stats(&total, lck);
1890
}
1891
kmp_adaptive_lock_statistics_t *t = &total;
1892
kmp_uint32 totalSections =
1893
t->nonSpeculativeAcquires + t->successfulSpeculations;
1894
kmp_uint32 totalSpeculations = t->successfulSpeculations +
1895
t->hardFailedSpeculations +
1896
t->softFailedSpeculations;
1897
if (totalSections <= 0)
1898
return;
1899
1900
kmp_safe_raii_file_t statsFile;
1901
if (strcmp(__kmp_speculative_statsfile, "-") == 0) {
1902
statsFile.set_stdout();
1903
} else {
1904
size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;
1905
char buffer[buffLen];
1906
KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,
1907
(kmp_int32)getpid());
1908
statsFile.open(buffer, "w");
1909
}
1910
1911
fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");
1912
fprintf(statsFile,
1913
" Lock parameters: \n"
1914
" max_soft_retries : %10d\n"
1915
" max_badness : %10d\n",
1916
__kmp_adaptive_backoff_params.max_soft_retries,
1917
__kmp_adaptive_backoff_params.max_badness);
1918
fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",
1919
t->nonSpeculativeAcquireAttempts);
1920
fprintf(statsFile, " Total critical sections : %10d\n",
1921
totalSections);
1922
fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",
1923
t->successfulSpeculations,
1924
percent(t->successfulSpeculations, totalSections));
1925
fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",
1926
t->nonSpeculativeAcquires,
1927
percent(t->nonSpeculativeAcquires, totalSections));
1928
fprintf(statsFile, " Lemming yields : %10d\n\n",
1929
t->lemmingYields);
1930
1931
fprintf(statsFile, " Speculative acquire attempts : %10d\n",
1932
totalSpeculations);
1933
fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",
1934
t->successfulSpeculations,
1935
percent(t->successfulSpeculations, totalSpeculations));
1936
fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",
1937
t->softFailedSpeculations,
1938
percent(t->softFailedSpeculations, totalSpeculations));
1939
fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",
1940
t->hardFailedSpeculations,
1941
percent(t->hardFailedSpeculations, totalSpeculations));
1942
}
1943
1944
#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)
1945
#else
1946
#define KMP_INC_STAT(lck, stat)
1947
1948
#endif // KMP_DEBUG_ADAPTIVE_LOCKS
1949
1950
static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {
1951
// It is enough to check that the head_id is zero.
1952
// We don't also need to check the tail.
1953
bool res = lck->lk.head_id == 0;
1954
1955
// We need a fence here, since we must ensure that no memory operations
1956
// from later in this thread float above that read.
1957
#if KMP_COMPILER_ICC || KMP_COMPILER_ICX
1958
_mm_mfence();
1959
#else
1960
__sync_synchronize();
1961
#endif
1962
1963
return res;
1964
}
1965
1966
// Functions for manipulating the badness
1967
static __inline void
1968
__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {
1969
// Reset the badness to zero so we eagerly try to speculate again
1970
lck->lk.adaptive.badness = 0;
1971
KMP_INC_STAT(lck, successfulSpeculations);
1972
}
1973
1974
// Create a bit mask with one more set bit.
1975
static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {
1976
kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;
1977
if (newBadness > lck->lk.adaptive.max_badness) {
1978
return;
1979
} else {
1980
lck->lk.adaptive.badness = newBadness;
1981
}
1982
}
1983
1984
// Check whether speculation should be attempted.
1985
KMP_ATTRIBUTE_TARGET_RTM
1986
static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,
1987
kmp_int32 gtid) {
1988
kmp_uint32 badness = lck->lk.adaptive.badness;
1989
kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;
1990
int res = (attempts & badness) == 0;
1991
return res;
1992
}
1993
1994
// Attempt to acquire only the speculative lock.
1995
// Does not back off to the non-speculative lock.
1996
KMP_ATTRIBUTE_TARGET_RTM
1997
static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,
1998
kmp_int32 gtid) {
1999
int retries = lck->lk.adaptive.max_soft_retries;
2000
2001
// We don't explicitly count the start of speculation, rather we record the
2002
// results (success, hard fail, soft fail). The sum of all of those is the
2003
// total number of times we started speculation since all speculations must
2004
// end one of those ways.
2005
do {
2006
kmp_uint32 status = _xbegin();
2007
// Switch this in to disable actual speculation but exercise at least some
2008
// of the rest of the code. Useful for debugging...
2009
// kmp_uint32 status = _XABORT_NESTED;
2010
2011
if (status == _XBEGIN_STARTED) {
2012
/* We have successfully started speculation. Check that no-one acquired
2013
the lock for real between when we last looked and now. This also gets
2014
the lock cache line into our read-set, which we need so that we'll
2015
abort if anyone later claims it for real. */
2016
if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2017
// Lock is now visibly acquired, so someone beat us to it. Abort the
2018
// transaction so we'll restart from _xbegin with the failure status.
2019
_xabort(0x01);
2020
KMP_ASSERT2(0, "should not get here");
2021
}
2022
return 1; // Lock has been acquired (speculatively)
2023
} else {
2024
// We have aborted, update the statistics
2025
if (status & SOFT_ABORT_MASK) {
2026
KMP_INC_STAT(lck, softFailedSpeculations);
2027
// and loop round to retry.
2028
} else {
2029
KMP_INC_STAT(lck, hardFailedSpeculations);
2030
// Give up if we had a hard failure.
2031
break;
2032
}
2033
}
2034
} while (retries--); // Loop while we have retries, and didn't fail hard.
2035
2036
// Either we had a hard failure or we didn't succeed softly after
2037
// the full set of attempts, so back off the badness.
2038
__kmp_step_badness(lck);
2039
return 0;
2040
}
2041
2042
// Attempt to acquire the speculative lock, or back off to the non-speculative
2043
// one if the speculative lock cannot be acquired.
2044
// We can succeed speculatively, non-speculatively, or fail.
2045
static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {
2046
// First try to acquire the lock speculatively
2047
if (__kmp_should_speculate(lck, gtid) &&
2048
__kmp_test_adaptive_lock_only(lck, gtid))
2049
return 1;
2050
2051
// Speculative acquisition failed, so try to acquire it non-speculatively.
2052
// Count the non-speculative acquire attempt
2053
lck->lk.adaptive.acquire_attempts++;
2054
2055
// Use base, non-speculative lock.
2056
if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {
2057
KMP_INC_STAT(lck, nonSpeculativeAcquires);
2058
return 1; // Lock is acquired (non-speculatively)
2059
} else {
2060
return 0; // Failed to acquire the lock, it's already visibly locked.
2061
}
2062
}
2063
2064
static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2065
kmp_int32 gtid) {
2066
char const *const func = "omp_test_lock";
2067
if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2068
KMP_FATAL(LockIsUninitialized, func);
2069
}
2070
2071
int retval = __kmp_test_adaptive_lock(lck, gtid);
2072
2073
if (retval) {
2074
lck->lk.qlk.owner_id = gtid + 1;
2075
}
2076
return retval;
2077
}
2078
2079
// Block until we can acquire a speculative, adaptive lock. We check whether we
2080
// should be trying to speculate. If we should be, we check the real lock to see
2081
// if it is free, and, if not, pause without attempting to acquire it until it
2082
// is. Then we try the speculative acquire. This means that although we suffer
2083
// from lemmings a little (because all we can't acquire the lock speculatively
2084
// until the queue of threads waiting has cleared), we don't get into a state
2085
// where we can never acquire the lock speculatively (because we force the queue
2086
// to clear by preventing new arrivals from entering the queue). This does mean
2087
// that when we're trying to break lemmings, the lock is no longer fair. However
2088
// OpenMP makes no guarantee that its locks are fair, so this isn't a real
2089
// problem.
2090
static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,
2091
kmp_int32 gtid) {
2092
if (__kmp_should_speculate(lck, gtid)) {
2093
if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2094
if (__kmp_test_adaptive_lock_only(lck, gtid))
2095
return;
2096
// We tried speculation and failed, so give up.
2097
} else {
2098
// We can't try speculation until the lock is free, so we pause here
2099
// (without suspending on the queueing lock, to allow it to drain, then
2100
// try again. All other threads will also see the same result for
2101
// shouldSpeculate, so will be doing the same if they try to claim the
2102
// lock from now on.
2103
while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {
2104
KMP_INC_STAT(lck, lemmingYields);
2105
KMP_YIELD(TRUE);
2106
}
2107
2108
if (__kmp_test_adaptive_lock_only(lck, gtid))
2109
return;
2110
}
2111
}
2112
2113
// Speculative acquisition failed, so acquire it non-speculatively.
2114
// Count the non-speculative acquire attempt
2115
lck->lk.adaptive.acquire_attempts++;
2116
2117
__kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);
2118
// We have acquired the base lock, so count that.
2119
KMP_INC_STAT(lck, nonSpeculativeAcquires);
2120
}
2121
2122
static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2123
kmp_int32 gtid) {
2124
char const *const func = "omp_set_lock";
2125
if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2126
KMP_FATAL(LockIsUninitialized, func);
2127
}
2128
if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {
2129
KMP_FATAL(LockIsAlreadyOwned, func);
2130
}
2131
2132
__kmp_acquire_adaptive_lock(lck, gtid);
2133
2134
lck->lk.qlk.owner_id = gtid + 1;
2135
}
2136
2137
KMP_ATTRIBUTE_TARGET_RTM
2138
static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,
2139
kmp_int32 gtid) {
2140
if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(
2141
lck))) { // If the lock doesn't look claimed we must be speculating.
2142
// (Or the user's code is buggy and they're releasing without locking;
2143
// if we had XTEST we'd be able to check that case...)
2144
_xend(); // Exit speculation
2145
__kmp_update_badness_after_success(lck);
2146
} else { // Since the lock *is* visibly locked we're not speculating,
2147
// so should use the underlying lock's release scheme.
2148
__kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);
2149
}
2150
return KMP_LOCK_RELEASED;
2151
}
2152
2153
static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,
2154
kmp_int32 gtid) {
2155
char const *const func = "omp_unset_lock";
2156
KMP_MB(); /* in case another processor initialized lock */
2157
if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2158
KMP_FATAL(LockIsUninitialized, func);
2159
}
2160
if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {
2161
KMP_FATAL(LockUnsettingFree, func);
2162
}
2163
if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {
2164
KMP_FATAL(LockUnsettingSetByAnother, func);
2165
}
2166
lck->lk.qlk.owner_id = 0;
2167
__kmp_release_adaptive_lock(lck, gtid);
2168
return KMP_LOCK_RELEASED;
2169
}
2170
2171
static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {
2172
__kmp_init_queuing_lock(GET_QLK_PTR(lck));
2173
lck->lk.adaptive.badness = 0;
2174
lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;
2175
lck->lk.adaptive.max_soft_retries =
2176
__kmp_adaptive_backoff_params.max_soft_retries;
2177
lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;
2178
#if KMP_DEBUG_ADAPTIVE_LOCKS
2179
__kmp_zero_speculative_stats(&lck->lk.adaptive);
2180
#endif
2181
KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));
2182
}
2183
2184
static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {
2185
#if KMP_DEBUG_ADAPTIVE_LOCKS
2186
__kmp_accumulate_speculative_stats(&lck->lk.adaptive);
2187
#endif
2188
__kmp_destroy_queuing_lock(GET_QLK_PTR(lck));
2189
// Nothing needed for the speculative part.
2190
}
2191
2192
static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
2193
char const *const func = "omp_destroy_lock";
2194
if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {
2195
KMP_FATAL(LockIsUninitialized, func);
2196
}
2197
if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {
2198
KMP_FATAL(LockStillOwned, func);
2199
}
2200
__kmp_destroy_adaptive_lock(lck);
2201
}
2202
2203
#endif // KMP_USE_ADAPTIVE_LOCKS
2204
2205
/* ------------------------------------------------------------------------ */
2206
/* DRDPA ticket locks */
2207
/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */
2208
2209
static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {
2210
return lck->lk.owner_id - 1;
2211
}
2212
2213
static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {
2214
return lck->lk.depth_locked != -1;
2215
}
2216
2217
__forceinline static int
2218
__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2219
kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);
2220
kmp_uint64 mask = lck->lk.mask; // atomic load
2221
std::atomic<kmp_uint64> *polls = lck->lk.polls;
2222
2223
#ifdef USE_LOCK_PROFILE
2224
if (polls[ticket & mask] != ticket)
2225
__kmp_printf("LOCK CONTENTION: %p\n", lck);
2226
/* else __kmp_printf( "." );*/
2227
#endif /* USE_LOCK_PROFILE */
2228
2229
// Now spin-wait, but reload the polls pointer and mask, in case the
2230
// polling area has been reconfigured. Unless it is reconfigured, the
2231
// reloads stay in L1 cache and are cheap.
2232
//
2233
// Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!
2234
// The current implementation of KMP_WAIT doesn't allow for mask
2235
// and poll to be re-read every spin iteration.
2236
kmp_uint32 spins;
2237
kmp_uint64 time;
2238
KMP_FSYNC_PREPARE(lck);
2239
KMP_INIT_YIELD(spins);
2240
KMP_INIT_BACKOFF(time);
2241
while (polls[ticket & mask] < ticket) { // atomic load
2242
KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
2243
// Re-read the mask and the poll pointer from the lock structure.
2244
//
2245
// Make certain that "mask" is read before "polls" !!!
2246
//
2247
// If another thread picks reconfigures the polling area and updates their
2248
// values, and we get the new value of mask and the old polls pointer, we
2249
// could access memory beyond the end of the old polling area.
2250
mask = lck->lk.mask; // atomic load
2251
polls = lck->lk.polls; // atomic load
2252
}
2253
2254
// Critical section starts here
2255
KMP_FSYNC_ACQUIRED(lck);
2256
KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",
2257
ticket, lck));
2258
lck->lk.now_serving = ticket; // non-volatile store
2259
2260
// Deallocate a garbage polling area if we know that we are the last
2261
// thread that could possibly access it.
2262
//
2263
// The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup
2264
// ticket.
2265
if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {
2266
__kmp_free(lck->lk.old_polls);
2267
lck->lk.old_polls = NULL;
2268
lck->lk.cleanup_ticket = 0;
2269
}
2270
2271
// Check to see if we should reconfigure the polling area.
2272
// If there is still a garbage polling area to be deallocated from a
2273
// previous reconfiguration, let a later thread reconfigure it.
2274
if (lck->lk.old_polls == NULL) {
2275
bool reconfigure = false;
2276
std::atomic<kmp_uint64> *old_polls = polls;
2277
kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);
2278
2279
if (TCR_4(__kmp_nth) >
2280
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {
2281
// We are in oversubscription mode. Contract the polling area
2282
// down to a single location, if that hasn't been done already.
2283
if (num_polls > 1) {
2284
reconfigure = true;
2285
num_polls = TCR_4(lck->lk.num_polls);
2286
mask = 0;
2287
num_polls = 1;
2288
polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2289
sizeof(*polls));
2290
polls[0] = ticket;
2291
}
2292
} else {
2293
// We are in under/fully subscribed mode. Check the number of
2294
// threads waiting on the lock. The size of the polling area
2295
// should be at least the number of threads waiting.
2296
kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;
2297
if (num_waiting > num_polls) {
2298
kmp_uint32 old_num_polls = num_polls;
2299
reconfigure = true;
2300
do {
2301
mask = (mask << 1) | 1;
2302
num_polls *= 2;
2303
} while (num_polls <= num_waiting);
2304
2305
// Allocate the new polling area, and copy the relevant portion
2306
// of the old polling area to the new area. __kmp_allocate()
2307
// zeroes the memory it allocates, and most of the old area is
2308
// just zero padding, so we only copy the release counters.
2309
polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *
2310
sizeof(*polls));
2311
kmp_uint32 i;
2312
for (i = 0; i < old_num_polls; i++) {
2313
polls[i].store(old_polls[i]);
2314
}
2315
}
2316
}
2317
2318
if (reconfigure) {
2319
// Now write the updated fields back to the lock structure.
2320
//
2321
// Make certain that "polls" is written before "mask" !!!
2322
//
2323
// If another thread picks up the new value of mask and the old polls
2324
// pointer , it could access memory beyond the end of the old polling
2325
// area.
2326
//
2327
// On x86, we need memory fences.
2328
KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "
2329
"lock %p to %d polls\n",
2330
ticket, lck, num_polls));
2331
2332
lck->lk.old_polls = old_polls;
2333
lck->lk.polls = polls; // atomic store
2334
2335
KMP_MB();
2336
2337
lck->lk.num_polls = num_polls;
2338
lck->lk.mask = mask; // atomic store
2339
2340
KMP_MB();
2341
2342
// Only after the new polling area and mask have been flushed
2343
// to main memory can we update the cleanup ticket field.
2344
//
2345
// volatile load / non-volatile store
2346
lck->lk.cleanup_ticket = lck->lk.next_ticket;
2347
}
2348
}
2349
return KMP_LOCK_ACQUIRED_FIRST;
2350
}
2351
2352
int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2353
int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2354
return retval;
2355
}
2356
2357
static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2358
kmp_int32 gtid) {
2359
char const *const func = "omp_set_lock";
2360
if (lck->lk.initialized != lck) {
2361
KMP_FATAL(LockIsUninitialized, func);
2362
}
2363
if (__kmp_is_drdpa_lock_nestable(lck)) {
2364
KMP_FATAL(LockNestableUsedAsSimple, func);
2365
}
2366
if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {
2367
KMP_FATAL(LockIsAlreadyOwned, func);
2368
}
2369
2370
__kmp_acquire_drdpa_lock(lck, gtid);
2371
2372
lck->lk.owner_id = gtid + 1;
2373
return KMP_LOCK_ACQUIRED_FIRST;
2374
}
2375
2376
int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2377
// First get a ticket, then read the polls pointer and the mask.
2378
// The polls pointer must be read before the mask!!! (See above)
2379
kmp_uint64 ticket = lck->lk.next_ticket; // atomic load
2380
std::atomic<kmp_uint64> *polls = lck->lk.polls;
2381
kmp_uint64 mask = lck->lk.mask; // atomic load
2382
if (polls[ticket & mask] == ticket) {
2383
kmp_uint64 next_ticket = ticket + 1;
2384
if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,
2385
next_ticket)) {
2386
KMP_FSYNC_ACQUIRED(lck);
2387
KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",
2388
ticket, lck));
2389
lck->lk.now_serving = ticket; // non-volatile store
2390
2391
// Since no threads are waiting, there is no possibility that we would
2392
// want to reconfigure the polling area. We might have the cleanup ticket
2393
// value (which says that it is now safe to deallocate old_polls), but
2394
// we'll let a later thread which calls __kmp_acquire_lock do that - this
2395
// routine isn't supposed to block, and we would risk blocks if we called
2396
// __kmp_free() to do the deallocation.
2397
return TRUE;
2398
}
2399
}
2400
return FALSE;
2401
}
2402
2403
static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2404
kmp_int32 gtid) {
2405
char const *const func = "omp_test_lock";
2406
if (lck->lk.initialized != lck) {
2407
KMP_FATAL(LockIsUninitialized, func);
2408
}
2409
if (__kmp_is_drdpa_lock_nestable(lck)) {
2410
KMP_FATAL(LockNestableUsedAsSimple, func);
2411
}
2412
2413
int retval = __kmp_test_drdpa_lock(lck, gtid);
2414
2415
if (retval) {
2416
lck->lk.owner_id = gtid + 1;
2417
}
2418
return retval;
2419
}
2420
2421
int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2422
// Read the ticket value from the lock data struct, then the polls pointer and
2423
// the mask. The polls pointer must be read before the mask!!! (See above)
2424
kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load
2425
std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load
2426
kmp_uint64 mask = lck->lk.mask; // atomic load
2427
KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",
2428
ticket - 1, lck));
2429
KMP_FSYNC_RELEASING(lck);
2430
polls[ticket & mask] = ticket; // atomic store
2431
return KMP_LOCK_RELEASED;
2432
}
2433
2434
static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2435
kmp_int32 gtid) {
2436
char const *const func = "omp_unset_lock";
2437
KMP_MB(); /* in case another processor initialized lock */
2438
if (lck->lk.initialized != lck) {
2439
KMP_FATAL(LockIsUninitialized, func);
2440
}
2441
if (__kmp_is_drdpa_lock_nestable(lck)) {
2442
KMP_FATAL(LockNestableUsedAsSimple, func);
2443
}
2444
if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2445
KMP_FATAL(LockUnsettingFree, func);
2446
}
2447
if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&
2448
(__kmp_get_drdpa_lock_owner(lck) != gtid)) {
2449
KMP_FATAL(LockUnsettingSetByAnother, func);
2450
}
2451
lck->lk.owner_id = 0;
2452
return __kmp_release_drdpa_lock(lck, gtid);
2453
}
2454
2455
void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {
2456
lck->lk.location = NULL;
2457
lck->lk.mask = 0;
2458
lck->lk.num_polls = 1;
2459
lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(
2460
lck->lk.num_polls * sizeof(*(lck->lk.polls)));
2461
lck->lk.cleanup_ticket = 0;
2462
lck->lk.old_polls = NULL;
2463
lck->lk.next_ticket = 0;
2464
lck->lk.now_serving = 0;
2465
lck->lk.owner_id = 0; // no thread owns the lock.
2466
lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.
2467
lck->lk.initialized = lck;
2468
2469
KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));
2470
}
2471
2472
void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {
2473
lck->lk.initialized = NULL;
2474
lck->lk.location = NULL;
2475
if (lck->lk.polls.load() != NULL) {
2476
__kmp_free(lck->lk.polls.load());
2477
lck->lk.polls = NULL;
2478
}
2479
if (lck->lk.old_polls != NULL) {
2480
__kmp_free(lck->lk.old_polls);
2481
lck->lk.old_polls = NULL;
2482
}
2483
lck->lk.mask = 0;
2484
lck->lk.num_polls = 0;
2485
lck->lk.cleanup_ticket = 0;
2486
lck->lk.next_ticket = 0;
2487
lck->lk.now_serving = 0;
2488
lck->lk.owner_id = 0;
2489
lck->lk.depth_locked = -1;
2490
}
2491
2492
static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2493
char const *const func = "omp_destroy_lock";
2494
if (lck->lk.initialized != lck) {
2495
KMP_FATAL(LockIsUninitialized, func);
2496
}
2497
if (__kmp_is_drdpa_lock_nestable(lck)) {
2498
KMP_FATAL(LockNestableUsedAsSimple, func);
2499
}
2500
if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2501
KMP_FATAL(LockStillOwned, func);
2502
}
2503
__kmp_destroy_drdpa_lock(lck);
2504
}
2505
2506
// nested drdpa ticket locks
2507
2508
int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2509
KMP_DEBUG_ASSERT(gtid >= 0);
2510
2511
if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2512
lck->lk.depth_locked += 1;
2513
return KMP_LOCK_ACQUIRED_NEXT;
2514
} else {
2515
__kmp_acquire_drdpa_lock_timed_template(lck, gtid);
2516
KMP_MB();
2517
lck->lk.depth_locked = 1;
2518
KMP_MB();
2519
lck->lk.owner_id = gtid + 1;
2520
return KMP_LOCK_ACQUIRED_FIRST;
2521
}
2522
}
2523
2524
static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2525
kmp_int32 gtid) {
2526
char const *const func = "omp_set_nest_lock";
2527
if (lck->lk.initialized != lck) {
2528
KMP_FATAL(LockIsUninitialized, func);
2529
}
2530
if (!__kmp_is_drdpa_lock_nestable(lck)) {
2531
KMP_FATAL(LockSimpleUsedAsNestable, func);
2532
}
2533
__kmp_acquire_nested_drdpa_lock(lck, gtid);
2534
}
2535
2536
int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2537
int retval;
2538
2539
KMP_DEBUG_ASSERT(gtid >= 0);
2540
2541
if (__kmp_get_drdpa_lock_owner(lck) == gtid) {
2542
retval = ++lck->lk.depth_locked;
2543
} else if (!__kmp_test_drdpa_lock(lck, gtid)) {
2544
retval = 0;
2545
} else {
2546
KMP_MB();
2547
retval = lck->lk.depth_locked = 1;
2548
KMP_MB();
2549
lck->lk.owner_id = gtid + 1;
2550
}
2551
return retval;
2552
}
2553
2554
static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2555
kmp_int32 gtid) {
2556
char const *const func = "omp_test_nest_lock";
2557
if (lck->lk.initialized != lck) {
2558
KMP_FATAL(LockIsUninitialized, func);
2559
}
2560
if (!__kmp_is_drdpa_lock_nestable(lck)) {
2561
KMP_FATAL(LockSimpleUsedAsNestable, func);
2562
}
2563
return __kmp_test_nested_drdpa_lock(lck, gtid);
2564
}
2565
2566
int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {
2567
KMP_DEBUG_ASSERT(gtid >= 0);
2568
2569
KMP_MB();
2570
if (--(lck->lk.depth_locked) == 0) {
2571
KMP_MB();
2572
lck->lk.owner_id = 0;
2573
__kmp_release_drdpa_lock(lck, gtid);
2574
return KMP_LOCK_RELEASED;
2575
}
2576
return KMP_LOCK_STILL_HELD;
2577
}
2578
2579
static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,
2580
kmp_int32 gtid) {
2581
char const *const func = "omp_unset_nest_lock";
2582
KMP_MB(); /* in case another processor initialized lock */
2583
if (lck->lk.initialized != lck) {
2584
KMP_FATAL(LockIsUninitialized, func);
2585
}
2586
if (!__kmp_is_drdpa_lock_nestable(lck)) {
2587
KMP_FATAL(LockSimpleUsedAsNestable, func);
2588
}
2589
if (__kmp_get_drdpa_lock_owner(lck) == -1) {
2590
KMP_FATAL(LockUnsettingFree, func);
2591
}
2592
if (__kmp_get_drdpa_lock_owner(lck) != gtid) {
2593
KMP_FATAL(LockUnsettingSetByAnother, func);
2594
}
2595
return __kmp_release_nested_drdpa_lock(lck, gtid);
2596
}
2597
2598
void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2599
__kmp_init_drdpa_lock(lck);
2600
lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks
2601
}
2602
2603
void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {
2604
__kmp_destroy_drdpa_lock(lck);
2605
lck->lk.depth_locked = 0;
2606
}
2607
2608
static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
2609
char const *const func = "omp_destroy_nest_lock";
2610
if (lck->lk.initialized != lck) {
2611
KMP_FATAL(LockIsUninitialized, func);
2612
}
2613
if (!__kmp_is_drdpa_lock_nestable(lck)) {
2614
KMP_FATAL(LockSimpleUsedAsNestable, func);
2615
}
2616
if (__kmp_get_drdpa_lock_owner(lck) != -1) {
2617
KMP_FATAL(LockStillOwned, func);
2618
}
2619
__kmp_destroy_nested_drdpa_lock(lck);
2620
}
2621
2622
// access functions to fields which don't exist for all lock kinds.
2623
2624
static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {
2625
return lck->lk.location;
2626
}
2627
2628
static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,
2629
const ident_t *loc) {
2630
lck->lk.location = loc;
2631
}
2632
2633
static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {
2634
return lck->lk.flags;
2635
}
2636
2637
static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,
2638
kmp_lock_flags_t flags) {
2639
lck->lk.flags = flags;
2640
}
2641
2642
// Time stamp counter
2643
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
2644
#define __kmp_tsc() __kmp_hardware_timestamp()
2645
// Runtime's default backoff parameters
2646
kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};
2647
#else
2648
// Use nanoseconds for other platforms
2649
extern kmp_uint64 __kmp_now_nsec();
2650
kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};
2651
#define __kmp_tsc() __kmp_now_nsec()
2652
#endif
2653
2654
// A useful predicate for dealing with timestamps that may wrap.
2655
// Is a before b? Since the timestamps may wrap, this is asking whether it's
2656
// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.
2657
// Times where going clockwise is less distance than going anti-clockwise
2658
// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),
2659
// then a > b (true) does not mean a reached b; whereas signed(a) = -2,
2660
// signed(b) = 0 captures the actual difference
2661
static inline bool before(kmp_uint64 a, kmp_uint64 b) {
2662
return ((kmp_int64)b - (kmp_int64)a) > 0;
2663
}
2664
2665
// Truncated binary exponential backoff function
2666
void __kmp_spin_backoff(kmp_backoff_t *boff) {
2667
// We could flatten this loop, but making it a nested loop gives better result
2668
kmp_uint32 i;
2669
for (i = boff->step; i > 0; i--) {
2670
kmp_uint64 goal = __kmp_tsc() + boff->min_tick;
2671
#if KMP_HAVE_UMWAIT
2672
if (__kmp_umwait_enabled) {
2673
__kmp_tpause(0, boff->min_tick);
2674
} else {
2675
#endif
2676
do {
2677
KMP_CPU_PAUSE();
2678
} while (before(__kmp_tsc(), goal));
2679
#if KMP_HAVE_UMWAIT
2680
}
2681
#endif
2682
}
2683
boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);
2684
}
2685
2686
#if KMP_USE_DYNAMIC_LOCK
2687
2688
// Direct lock initializers. It simply writes a tag to the low 8 bits of the
2689
// lock word.
2690
static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,
2691
kmp_dyna_lockseq_t seq) {
2692
TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));
2693
KA_TRACE(
2694
20,
2695
("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));
2696
}
2697
2698
#if KMP_USE_TSX
2699
2700
// HLE lock functions - imported from the testbed runtime.
2701
#define HLE_ACQUIRE ".byte 0xf2;"
2702
#define HLE_RELEASE ".byte 0xf3;"
2703
2704
static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {
2705
__asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");
2706
return v;
2707
}
2708
2709
static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }
2710
2711
static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {
2712
TCW_4(*lck, 0);
2713
}
2714
2715
static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2716
// Use gtid for KMP_LOCK_BUSY if necessary
2717
if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {
2718
int delay = 1;
2719
do {
2720
while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {
2721
for (int i = delay; i != 0; --i)
2722
KMP_CPU_PAUSE();
2723
delay = ((delay << 1) | 1) & 7;
2724
}
2725
} while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));
2726
}
2727
}
2728
2729
static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2730
kmp_int32 gtid) {
2731
__kmp_acquire_hle_lock(lck, gtid); // TODO: add checks
2732
}
2733
2734
static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2735
__asm__ volatile(HLE_RELEASE "movl %1,%0"
2736
: "=m"(*lck)
2737
: "r"(KMP_LOCK_FREE(hle))
2738
: "memory");
2739
return KMP_LOCK_RELEASED;
2740
}
2741
2742
static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2743
kmp_int32 gtid) {
2744
return __kmp_release_hle_lock(lck, gtid); // TODO: add checks
2745
}
2746
2747
static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {
2748
return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);
2749
}
2750
2751
static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,
2752
kmp_int32 gtid) {
2753
return __kmp_test_hle_lock(lck, gtid); // TODO: add checks
2754
}
2755
2756
static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2757
__kmp_init_queuing_lock(lck);
2758
}
2759
2760
static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {
2761
__kmp_destroy_queuing_lock(lck);
2762
}
2763
2764
static void
2765
__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
2766
__kmp_destroy_queuing_lock_with_checks(lck);
2767
}
2768
2769
KMP_ATTRIBUTE_TARGET_RTM
2770
static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2771
kmp_int32 gtid) {
2772
unsigned retries = 3, status;
2773
do {
2774
status = _xbegin();
2775
if (status == _XBEGIN_STARTED) {
2776
if (__kmp_is_unlocked_queuing_lock(lck))
2777
return;
2778
_xabort(0xff);
2779
}
2780
if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2781
// Wait until lock becomes free
2782
while (!__kmp_is_unlocked_queuing_lock(lck)) {
2783
KMP_YIELD(TRUE);
2784
}
2785
} else if (!(status & _XABORT_RETRY))
2786
break;
2787
} while (retries--);
2788
2789
// Fall-back non-speculative lock (xchg)
2790
__kmp_acquire_queuing_lock(lck, gtid);
2791
}
2792
2793
static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2794
kmp_int32 gtid) {
2795
__kmp_acquire_rtm_queuing_lock(lck, gtid);
2796
}
2797
2798
KMP_ATTRIBUTE_TARGET_RTM
2799
static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2800
kmp_int32 gtid) {
2801
if (__kmp_is_unlocked_queuing_lock(lck)) {
2802
// Releasing from speculation
2803
_xend();
2804
} else {
2805
// Releasing from a real lock
2806
__kmp_release_queuing_lock(lck, gtid);
2807
}
2808
return KMP_LOCK_RELEASED;
2809
}
2810
2811
static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2812
kmp_int32 gtid) {
2813
return __kmp_release_rtm_queuing_lock(lck, gtid);
2814
}
2815
2816
KMP_ATTRIBUTE_TARGET_RTM
2817
static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,
2818
kmp_int32 gtid) {
2819
unsigned retries = 3, status;
2820
do {
2821
status = _xbegin();
2822
if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {
2823
return 1;
2824
}
2825
if (!(status & _XABORT_RETRY))
2826
break;
2827
} while (retries--);
2828
2829
return __kmp_test_queuing_lock(lck, gtid);
2830
}
2831
2832
static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,
2833
kmp_int32 gtid) {
2834
return __kmp_test_rtm_queuing_lock(lck, gtid);
2835
}
2836
2837
// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.
2838
typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;
2839
2840
static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {
2841
KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);
2842
}
2843
2844
static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {
2845
__kmp_destroy_rtm_spin_lock(lck);
2846
}
2847
2848
KMP_ATTRIBUTE_TARGET_RTM
2849
static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2850
kmp_int32 gtid) {
2851
unsigned retries = 3, status;
2852
kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2853
kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2854
do {
2855
status = _xbegin();
2856
if (status == _XBEGIN_STARTED) {
2857
if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)
2858
return KMP_LOCK_ACQUIRED_FIRST;
2859
_xabort(0xff);
2860
}
2861
if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {
2862
// Wait until lock becomes free
2863
while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {
2864
KMP_YIELD(TRUE);
2865
}
2866
} else if (!(status & _XABORT_RETRY))
2867
break;
2868
} while (retries--);
2869
2870
// Fall-back spin lock
2871
KMP_FSYNC_PREPARE(lck);
2872
kmp_backoff_t backoff = __kmp_spin_backoff_params;
2873
while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||
2874
!__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2875
__kmp_spin_backoff(&backoff);
2876
}
2877
KMP_FSYNC_ACQUIRED(lck);
2878
return KMP_LOCK_ACQUIRED_FIRST;
2879
}
2880
2881
static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2882
kmp_int32 gtid) {
2883
return __kmp_acquire_rtm_spin_lock(lck, gtid);
2884
}
2885
2886
KMP_ATTRIBUTE_TARGET_RTM
2887
static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,
2888
kmp_int32 gtid) {
2889
if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {
2890
// Releasing from speculation
2891
_xend();
2892
} else {
2893
// Releasing from a real lock
2894
KMP_FSYNC_RELEASING(lck);
2895
KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));
2896
}
2897
return KMP_LOCK_RELEASED;
2898
}
2899
2900
static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2901
kmp_int32 gtid) {
2902
return __kmp_release_rtm_spin_lock(lck, gtid);
2903
}
2904
2905
KMP_ATTRIBUTE_TARGET_RTM
2906
static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {
2907
unsigned retries = 3, status;
2908
kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);
2909
kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);
2910
do {
2911
status = _xbegin();
2912
if (status == _XBEGIN_STARTED &&
2913
KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {
2914
return TRUE;
2915
}
2916
if (!(status & _XABORT_RETRY))
2917
break;
2918
} while (retries--);
2919
2920
if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&
2921
__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {
2922
KMP_FSYNC_ACQUIRED(lck);
2923
return TRUE;
2924
}
2925
return FALSE;
2926
}
2927
2928
static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,
2929
kmp_int32 gtid) {
2930
return __kmp_test_rtm_spin_lock(lck, gtid);
2931
}
2932
2933
#endif // KMP_USE_TSX
2934
2935
// Entry functions for indirect locks (first element of direct lock jump tables)
2936
static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,
2937
kmp_dyna_lockseq_t tag);
2938
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);
2939
static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2940
static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2941
static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);
2942
static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2943
kmp_int32);
2944
static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2945
kmp_int32);
2946
static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
2947
kmp_int32);
2948
2949
// Lock function definitions for the union parameter type
2950
#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)
2951
2952
#define expand1(lk, op) \
2953
static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \
2954
__kmp_##op##_##lk##_##lock(&lock->lk); \
2955
}
2956
#define expand2(lk, op) \
2957
static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \
2958
kmp_int32 gtid) { \
2959
return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \
2960
}
2961
#define expand3(lk, op) \
2962
static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \
2963
kmp_lock_flags_t flags) { \
2964
__kmp_set_##lk##_lock_flags(&lock->lk, flags); \
2965
}
2966
#define expand4(lk, op) \
2967
static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \
2968
const ident_t *loc) { \
2969
__kmp_set_##lk##_lock_location(&lock->lk, loc); \
2970
}
2971
2972
KMP_FOREACH_LOCK_KIND(expand1, init)
2973
KMP_FOREACH_LOCK_KIND(expand1, init_nested)
2974
KMP_FOREACH_LOCK_KIND(expand1, destroy)
2975
KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)
2976
KMP_FOREACH_LOCK_KIND(expand2, acquire)
2977
KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)
2978
KMP_FOREACH_LOCK_KIND(expand2, release)
2979
KMP_FOREACH_LOCK_KIND(expand2, release_nested)
2980
KMP_FOREACH_LOCK_KIND(expand2, test)
2981
KMP_FOREACH_LOCK_KIND(expand2, test_nested)
2982
KMP_FOREACH_LOCK_KIND(expand3, )
2983
KMP_FOREACH_LOCK_KIND(expand4, )
2984
2985
#undef expand1
2986
#undef expand2
2987
#undef expand3
2988
#undef expand4
2989
2990
// Jump tables for the indirect lock functions
2991
// Only fill in the odd entries, that avoids the need to shift out the low bit
2992
2993
// init functions
2994
#define expand(l, op) 0, __kmp_init_direct_lock,
2995
void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {
2996
__kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};
2997
#undef expand
2998
2999
// destroy functions
3000
#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,
3001
static void (*direct_destroy[])(kmp_dyna_lock_t *) = {
3002
__kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3003
#undef expand
3004
#define expand(l, op) \
3005
0, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,
3006
static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {
3007
__kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};
3008
#undef expand
3009
3010
// set/acquire functions
3011
#define expand(l, op) \
3012
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3013
static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {
3014
__kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};
3015
#undef expand
3016
#define expand(l, op) \
3017
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3018
static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3019
__kmp_set_indirect_lock_with_checks, 0,
3020
KMP_FOREACH_D_LOCK(expand, acquire)};
3021
#undef expand
3022
3023
// unset/release and test functions
3024
#define expand(l, op) \
3025
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,
3026
static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {
3027
__kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};
3028
static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {
3029
__kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};
3030
#undef expand
3031
#define expand(l, op) \
3032
0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,
3033
static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3034
__kmp_unset_indirect_lock_with_checks, 0,
3035
KMP_FOREACH_D_LOCK(expand, release)};
3036
static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {
3037
__kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};
3038
#undef expand
3039
3040
// Exposes only one set of jump tables (*lock or *lock_with_checks).
3041
void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;
3042
int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;
3043
int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;
3044
int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;
3045
3046
// Jump tables for the indirect lock functions
3047
#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3048
void (*__kmp_indirect_init[])(kmp_user_lock_p) = {
3049
KMP_FOREACH_I_LOCK(expand, init)};
3050
#undef expand
3051
3052
#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,
3053
static void (*indirect_destroy[])(kmp_user_lock_p) = {
3054
KMP_FOREACH_I_LOCK(expand, destroy)};
3055
#undef expand
3056
#define expand(l, op) \
3057
(void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,
3058
static void (*indirect_destroy_check[])(kmp_user_lock_p) = {
3059
KMP_FOREACH_I_LOCK(expand, destroy)};
3060
#undef expand
3061
3062
// set/acquire functions
3063
#define expand(l, op) \
3064
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3065
static int (*indirect_set[])(kmp_user_lock_p,
3066
kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};
3067
#undef expand
3068
#define expand(l, op) \
3069
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3070
static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {
3071
KMP_FOREACH_I_LOCK(expand, acquire)};
3072
#undef expand
3073
3074
// unset/release and test functions
3075
#define expand(l, op) \
3076
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,
3077
static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {
3078
KMP_FOREACH_I_LOCK(expand, release)};
3079
static int (*indirect_test[])(kmp_user_lock_p,
3080
kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};
3081
#undef expand
3082
#define expand(l, op) \
3083
(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,
3084
static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {
3085
KMP_FOREACH_I_LOCK(expand, release)};
3086
static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {
3087
KMP_FOREACH_I_LOCK(expand, test)};
3088
#undef expand
3089
3090
// Exposes only one jump tables (*lock or *lock_with_checks).
3091
void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;
3092
int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;
3093
int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;
3094
int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;
3095
3096
// Lock index table.
3097
kmp_indirect_lock_table_t __kmp_i_lock_table;
3098
3099
// Size of indirect locks.
3100
static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};
3101
3102
// Jump tables for lock accessor/modifier.
3103
void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3104
const ident_t *) = {0};
3105
void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,
3106
kmp_lock_flags_t) = {0};
3107
const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(
3108
kmp_user_lock_p) = {0};
3109
kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(
3110
kmp_user_lock_p) = {0};
3111
3112
// Use different lock pools for different lock types.
3113
static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};
3114
3115
// User lock allocator for dynamically dispatched indirect locks. Every entry of
3116
// the indirect lock table holds the address and type of the allocated indirect
3117
// lock (kmp_indirect_lock_t), and the size of the table doubles when it is
3118
// full. A destroyed indirect lock object is returned to the reusable pool of
3119
// locks, unique to each lock type.
3120
kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,
3121
kmp_int32 gtid,
3122
kmp_indirect_locktag_t tag) {
3123
kmp_indirect_lock_t *lck;
3124
kmp_lock_index_t idx, table_idx;
3125
3126
__kmp_acquire_lock(&__kmp_global_lock, gtid);
3127
3128
if (__kmp_indirect_lock_pool[tag] != NULL) {
3129
// Reuse the allocated and destroyed lock object
3130
lck = __kmp_indirect_lock_pool[tag];
3131
if (OMP_LOCK_T_SIZE < sizeof(void *))
3132
idx = lck->lock->pool.index;
3133
__kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;
3134
KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",
3135
lck));
3136
} else {
3137
kmp_uint32 row, col;
3138
kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;
3139
idx = 0;
3140
// Find location in list of lock tables to put new lock
3141
while (1) {
3142
table_idx = lock_table->next; // index within this table
3143
idx += lock_table->next; // global index within list of tables
3144
if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {
3145
row = table_idx / KMP_I_LOCK_CHUNK;
3146
col = table_idx % KMP_I_LOCK_CHUNK;
3147
// Allocate a new row of locks if necessary
3148
if (!lock_table->table[row]) {
3149
lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(
3150
sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);
3151
}
3152
break;
3153
}
3154
// Allocate a new lock table if necessary with double the capacity
3155
if (!lock_table->next_table) {
3156
kmp_indirect_lock_table_t *next_table =
3157
(kmp_indirect_lock_table_t *)__kmp_allocate(
3158
sizeof(kmp_indirect_lock_table_t));
3159
next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(
3160
sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);
3161
next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;
3162
next_table->next = 0;
3163
next_table->next_table = nullptr;
3164
lock_table->next_table = next_table;
3165
}
3166
lock_table = lock_table->next_table;
3167
KMP_ASSERT(lock_table);
3168
}
3169
lock_table->next++;
3170
3171
lck = &lock_table->table[row][col];
3172
// Allocate a new base lock object
3173
lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);
3174
KA_TRACE(20,
3175
("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));
3176
}
3177
3178
__kmp_release_lock(&__kmp_global_lock, gtid);
3179
3180
lck->type = tag;
3181
3182
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3183
*(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =
3184
idx << 1; // indirect lock word must be even
3185
} else {
3186
*((kmp_indirect_lock_t **)user_lock) = lck;
3187
}
3188
3189
return lck;
3190
}
3191
3192
// User lock lookup for dynamically dispatched locks.
3193
static __forceinline kmp_indirect_lock_t *
3194
__kmp_lookup_indirect_lock(void **user_lock, const char *func) {
3195
if (__kmp_env_consistency_check) {
3196
kmp_indirect_lock_t *lck = NULL;
3197
if (user_lock == NULL) {
3198
KMP_FATAL(LockIsUninitialized, func);
3199
}
3200
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3201
kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);
3202
lck = __kmp_get_i_lock(idx);
3203
} else {
3204
lck = *((kmp_indirect_lock_t **)user_lock);
3205
}
3206
if (lck == NULL) {
3207
KMP_FATAL(LockIsUninitialized, func);
3208
}
3209
return lck;
3210
} else {
3211
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3212
return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));
3213
} else {
3214
return *((kmp_indirect_lock_t **)user_lock);
3215
}
3216
}
3217
}
3218
3219
static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,
3220
kmp_dyna_lockseq_t seq) {
3221
#if KMP_USE_ADAPTIVE_LOCKS
3222
if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {
3223
KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");
3224
seq = lockseq_queuing;
3225
}
3226
#endif
3227
#if KMP_USE_TSX
3228
if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {
3229
seq = lockseq_queuing;
3230
}
3231
#endif
3232
kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);
3233
kmp_indirect_lock_t *l =
3234
__kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);
3235
KMP_I_LOCK_FUNC(l, init)(l->lock);
3236
KA_TRACE(
3237
20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",
3238
seq));
3239
}
3240
3241
static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {
3242
kmp_uint32 gtid = __kmp_entry_gtid();
3243
kmp_indirect_lock_t *l =
3244
__kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");
3245
KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3246
kmp_indirect_locktag_t tag = l->type;
3247
3248
__kmp_acquire_lock(&__kmp_global_lock, gtid);
3249
3250
// Use the base lock's space to keep the pool chain.
3251
l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];
3252
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3253
l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);
3254
}
3255
__kmp_indirect_lock_pool[tag] = l;
3256
3257
__kmp_release_lock(&__kmp_global_lock, gtid);
3258
}
3259
3260
static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3261
kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3262
return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3263
}
3264
3265
static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3266
kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3267
return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3268
}
3269
3270
static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {
3271
kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);
3272
return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3273
}
3274
3275
static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3276
kmp_int32 gtid) {
3277
kmp_indirect_lock_t *l =
3278
__kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");
3279
return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);
3280
}
3281
3282
static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3283
kmp_int32 gtid) {
3284
kmp_indirect_lock_t *l =
3285
__kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");
3286
return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);
3287
}
3288
3289
static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,
3290
kmp_int32 gtid) {
3291
kmp_indirect_lock_t *l =
3292
__kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");
3293
return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);
3294
}
3295
3296
kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;
3297
3298
// This is used only in kmp_error.cpp when consistency checking is on.
3299
kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {
3300
switch (seq) {
3301
case lockseq_tas:
3302
case lockseq_nested_tas:
3303
return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);
3304
#if KMP_USE_FUTEX
3305
case lockseq_futex:
3306
case lockseq_nested_futex:
3307
return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);
3308
#endif
3309
case lockseq_ticket:
3310
case lockseq_nested_ticket:
3311
return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);
3312
case lockseq_queuing:
3313
case lockseq_nested_queuing:
3314
#if KMP_USE_ADAPTIVE_LOCKS
3315
case lockseq_adaptive:
3316
#endif
3317
return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);
3318
case lockseq_drdpa:
3319
case lockseq_nested_drdpa:
3320
return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);
3321
default:
3322
return 0;
3323
}
3324
}
3325
3326
// Initializes data for dynamic user locks.
3327
void __kmp_init_dynamic_user_locks() {
3328
// Initialize jump table for the lock functions
3329
if (__kmp_env_consistency_check) {
3330
__kmp_direct_set = direct_set_check;
3331
__kmp_direct_unset = direct_unset_check;
3332
__kmp_direct_test = direct_test_check;
3333
__kmp_direct_destroy = direct_destroy_check;
3334
__kmp_indirect_set = indirect_set_check;
3335
__kmp_indirect_unset = indirect_unset_check;
3336
__kmp_indirect_test = indirect_test_check;
3337
__kmp_indirect_destroy = indirect_destroy_check;
3338
} else {
3339
__kmp_direct_set = direct_set;
3340
__kmp_direct_unset = direct_unset;
3341
__kmp_direct_test = direct_test;
3342
__kmp_direct_destroy = direct_destroy;
3343
__kmp_indirect_set = indirect_set;
3344
__kmp_indirect_unset = indirect_unset;
3345
__kmp_indirect_test = indirect_test;
3346
__kmp_indirect_destroy = indirect_destroy;
3347
}
3348
// If the user locks have already been initialized, then return. Allow the
3349
// switch between different KMP_CONSISTENCY_CHECK values, but do not allocate
3350
// new lock tables if they have already been allocated.
3351
if (__kmp_init_user_locks)
3352
return;
3353
3354
// Initialize lock index table
3355
__kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;
3356
__kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(
3357
sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);
3358
*(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(
3359
KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));
3360
__kmp_i_lock_table.next = 0;
3361
__kmp_i_lock_table.next_table = nullptr;
3362
3363
// Indirect lock size
3364
__kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);
3365
__kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);
3366
#if KMP_USE_ADAPTIVE_LOCKS
3367
__kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);
3368
#endif
3369
__kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);
3370
#if KMP_USE_TSX
3371
__kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);
3372
#endif
3373
__kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);
3374
#if KMP_USE_FUTEX
3375
__kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);
3376
#endif
3377
__kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);
3378
__kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);
3379
__kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);
3380
3381
// Initialize lock accessor/modifier
3382
#define fill_jumps(table, expand, sep) \
3383
{ \
3384
table[locktag##sep##ticket] = expand(ticket); \
3385
table[locktag##sep##queuing] = expand(queuing); \
3386
table[locktag##sep##drdpa] = expand(drdpa); \
3387
}
3388
3389
#if KMP_USE_ADAPTIVE_LOCKS
3390
#define fill_table(table, expand) \
3391
{ \
3392
fill_jumps(table, expand, _); \
3393
table[locktag_adaptive] = expand(queuing); \
3394
fill_jumps(table, expand, _nested_); \
3395
}
3396
#else
3397
#define fill_table(table, expand) \
3398
{ \
3399
fill_jumps(table, expand, _); \
3400
fill_jumps(table, expand, _nested_); \
3401
}
3402
#endif // KMP_USE_ADAPTIVE_LOCKS
3403
3404
#define expand(l) \
3405
(void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location
3406
fill_table(__kmp_indirect_set_location, expand);
3407
#undef expand
3408
#define expand(l) \
3409
(void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags
3410
fill_table(__kmp_indirect_set_flags, expand);
3411
#undef expand
3412
#define expand(l) \
3413
(const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location
3414
fill_table(__kmp_indirect_get_location, expand);
3415
#undef expand
3416
#define expand(l) \
3417
(kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags
3418
fill_table(__kmp_indirect_get_flags, expand);
3419
#undef expand
3420
3421
__kmp_init_user_locks = TRUE;
3422
}
3423
3424
// Clean up the lock table.
3425
void __kmp_cleanup_indirect_user_locks() {
3426
int k;
3427
3428
// Clean up locks in the pools first (they were already destroyed before going
3429
// into the pools).
3430
for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {
3431
kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];
3432
while (l != NULL) {
3433
kmp_indirect_lock_t *ll = l;
3434
l = (kmp_indirect_lock_t *)l->lock->pool.next;
3435
KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",
3436
ll));
3437
__kmp_free(ll->lock);
3438
ll->lock = NULL;
3439
}
3440
__kmp_indirect_lock_pool[k] = NULL;
3441
}
3442
// Clean up the remaining undestroyed locks.
3443
kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;
3444
while (ptr) {
3445
for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {
3446
if (!ptr->table[row])
3447
continue;
3448
for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {
3449
kmp_indirect_lock_t *l = &ptr->table[row][col];
3450
if (l->lock) {
3451
// Locks not destroyed explicitly need to be destroyed here.
3452
KMP_I_LOCK_FUNC(l, destroy)(l->lock);
3453
KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "
3454
"from table\n",
3455
l));
3456
__kmp_free(l->lock);
3457
}
3458
}
3459
__kmp_free(ptr->table[row]);
3460
}
3461
kmp_indirect_lock_table_t *next_table = ptr->next_table;
3462
if (ptr != &__kmp_i_lock_table)
3463
__kmp_free(ptr);
3464
ptr = next_table;
3465
}
3466
3467
__kmp_init_user_locks = FALSE;
3468
}
3469
3470
enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3471
int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3472
3473
#else // KMP_USE_DYNAMIC_LOCK
3474
3475
static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3476
__kmp_init_tas_lock(lck);
3477
}
3478
3479
static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {
3480
__kmp_init_nested_tas_lock(lck);
3481
}
3482
3483
#if KMP_USE_FUTEX
3484
static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3485
__kmp_init_futex_lock(lck);
3486
}
3487
3488
static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
3489
__kmp_init_nested_futex_lock(lck);
3490
}
3491
#endif
3492
3493
static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
3494
return lck == lck->lk.self;
3495
}
3496
3497
static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3498
__kmp_init_ticket_lock(lck);
3499
}
3500
3501
static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
3502
__kmp_init_nested_ticket_lock(lck);
3503
}
3504
3505
static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {
3506
return lck == lck->lk.initialized;
3507
}
3508
3509
static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3510
__kmp_init_queuing_lock(lck);
3511
}
3512
3513
static void
3514
__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {
3515
__kmp_init_nested_queuing_lock(lck);
3516
}
3517
3518
#if KMP_USE_ADAPTIVE_LOCKS
3519
static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {
3520
__kmp_init_adaptive_lock(lck);
3521
}
3522
#endif
3523
3524
static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {
3525
return lck == lck->lk.initialized;
3526
}
3527
3528
static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3529
__kmp_init_drdpa_lock(lck);
3530
}
3531
3532
static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {
3533
__kmp_init_nested_drdpa_lock(lck);
3534
}
3535
3536
/* user locks
3537
* They are implemented as a table of function pointers which are set to the
3538
* lock functions of the appropriate kind, once that has been determined. */
3539
3540
enum kmp_lock_kind __kmp_user_lock_kind = lk_default;
3541
3542
size_t __kmp_base_user_lock_size = 0;
3543
size_t __kmp_user_lock_size = 0;
3544
3545
kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;
3546
int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
3547
kmp_int32 gtid) = NULL;
3548
3549
int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,
3550
kmp_int32 gtid) = NULL;
3551
int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,
3552
kmp_int32 gtid) = NULL;
3553
void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3554
void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;
3555
void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3556
int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3557
kmp_int32 gtid) = NULL;
3558
3559
int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3560
kmp_int32 gtid) = NULL;
3561
int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
3562
kmp_int32 gtid) = NULL;
3563
void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3564
void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;
3565
3566
int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;
3567
const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;
3568
void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,
3569
const ident_t *loc) = NULL;
3570
kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;
3571
void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,
3572
kmp_lock_flags_t flags) = NULL;
3573
3574
void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {
3575
switch (user_lock_kind) {
3576
case lk_default:
3577
default:
3578
KMP_ASSERT(0);
3579
3580
case lk_tas: {
3581
__kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);
3582
__kmp_user_lock_size = sizeof(kmp_tas_lock_t);
3583
3584
__kmp_get_user_lock_owner_ =
3585
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);
3586
3587
if (__kmp_env_consistency_check) {
3588
KMP_BIND_USER_LOCK_WITH_CHECKS(tas);
3589
KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);
3590
} else {
3591
KMP_BIND_USER_LOCK(tas);
3592
KMP_BIND_NESTED_USER_LOCK(tas);
3593
}
3594
3595
__kmp_destroy_user_lock_ =
3596
(void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);
3597
3598
__kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3599
3600
__kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3601
3602
__kmp_set_user_lock_location_ =
3603
(void (*)(kmp_user_lock_p, const ident_t *))NULL;
3604
3605
__kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3606
3607
__kmp_set_user_lock_flags_ =
3608
(void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3609
} break;
3610
3611
#if KMP_USE_FUTEX
3612
3613
case lk_futex: {
3614
__kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);
3615
__kmp_user_lock_size = sizeof(kmp_futex_lock_t);
3616
3617
__kmp_get_user_lock_owner_ =
3618
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);
3619
3620
if (__kmp_env_consistency_check) {
3621
KMP_BIND_USER_LOCK_WITH_CHECKS(futex);
3622
KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);
3623
} else {
3624
KMP_BIND_USER_LOCK(futex);
3625
KMP_BIND_NESTED_USER_LOCK(futex);
3626
}
3627
3628
__kmp_destroy_user_lock_ =
3629
(void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);
3630
3631
__kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;
3632
3633
__kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;
3634
3635
__kmp_set_user_lock_location_ =
3636
(void (*)(kmp_user_lock_p, const ident_t *))NULL;
3637
3638
__kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;
3639
3640
__kmp_set_user_lock_flags_ =
3641
(void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;
3642
} break;
3643
3644
#endif // KMP_USE_FUTEX
3645
3646
case lk_ticket: {
3647
__kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);
3648
__kmp_user_lock_size = sizeof(kmp_ticket_lock_t);
3649
3650
__kmp_get_user_lock_owner_ =
3651
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);
3652
3653
if (__kmp_env_consistency_check) {
3654
KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);
3655
KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);
3656
} else {
3657
KMP_BIND_USER_LOCK(ticket);
3658
KMP_BIND_NESTED_USER_LOCK(ticket);
3659
}
3660
3661
__kmp_destroy_user_lock_ =
3662
(void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);
3663
3664
__kmp_is_user_lock_initialized_ =
3665
(int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);
3666
3667
__kmp_get_user_lock_location_ =
3668
(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);
3669
3670
__kmp_set_user_lock_location_ = (void (*)(
3671
kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);
3672
3673
__kmp_get_user_lock_flags_ =
3674
(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);
3675
3676
__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3677
&__kmp_set_ticket_lock_flags);
3678
} break;
3679
3680
case lk_queuing: {
3681
__kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);
3682
__kmp_user_lock_size = sizeof(kmp_queuing_lock_t);
3683
3684
__kmp_get_user_lock_owner_ =
3685
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3686
3687
if (__kmp_env_consistency_check) {
3688
KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);
3689
KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);
3690
} else {
3691
KMP_BIND_USER_LOCK(queuing);
3692
KMP_BIND_NESTED_USER_LOCK(queuing);
3693
}
3694
3695
__kmp_destroy_user_lock_ =
3696
(void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);
3697
3698
__kmp_is_user_lock_initialized_ =
3699
(int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3700
3701
__kmp_get_user_lock_location_ =
3702
(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3703
3704
__kmp_set_user_lock_location_ = (void (*)(
3705
kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3706
3707
__kmp_get_user_lock_flags_ =
3708
(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3709
3710
__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3711
&__kmp_set_queuing_lock_flags);
3712
} break;
3713
3714
#if KMP_USE_ADAPTIVE_LOCKS
3715
case lk_adaptive: {
3716
__kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);
3717
__kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);
3718
3719
__kmp_get_user_lock_owner_ =
3720
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);
3721
3722
if (__kmp_env_consistency_check) {
3723
KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);
3724
} else {
3725
KMP_BIND_USER_LOCK(adaptive);
3726
}
3727
3728
__kmp_destroy_user_lock_ =
3729
(void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);
3730
3731
__kmp_is_user_lock_initialized_ =
3732
(int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);
3733
3734
__kmp_get_user_lock_location_ =
3735
(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);
3736
3737
__kmp_set_user_lock_location_ = (void (*)(
3738
kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);
3739
3740
__kmp_get_user_lock_flags_ =
3741
(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);
3742
3743
__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3744
&__kmp_set_queuing_lock_flags);
3745
3746
} break;
3747
#endif // KMP_USE_ADAPTIVE_LOCKS
3748
3749
case lk_drdpa: {
3750
__kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);
3751
__kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);
3752
3753
__kmp_get_user_lock_owner_ =
3754
(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);
3755
3756
if (__kmp_env_consistency_check) {
3757
KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);
3758
KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);
3759
} else {
3760
KMP_BIND_USER_LOCK(drdpa);
3761
KMP_BIND_NESTED_USER_LOCK(drdpa);
3762
}
3763
3764
__kmp_destroy_user_lock_ =
3765
(void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);
3766
3767
__kmp_is_user_lock_initialized_ =
3768
(int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);
3769
3770
__kmp_get_user_lock_location_ =
3771
(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);
3772
3773
__kmp_set_user_lock_location_ = (void (*)(
3774
kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);
3775
3776
__kmp_get_user_lock_flags_ =
3777
(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);
3778
3779
__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(
3780
&__kmp_set_drdpa_lock_flags);
3781
} break;
3782
}
3783
}
3784
3785
// ----------------------------------------------------------------------------
3786
// User lock table & lock allocation
3787
3788
kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};
3789
kmp_user_lock_p __kmp_lock_pool = NULL;
3790
3791
// Lock block-allocation support.
3792
kmp_block_of_locks *__kmp_lock_blocks = NULL;
3793
int __kmp_num_locks_in_block = 1; // FIXME - tune this value
3794
3795
static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {
3796
// Assume that kmp_global_lock is held upon entry/exit.
3797
kmp_lock_index_t index;
3798
if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {
3799
kmp_lock_index_t size;
3800
kmp_user_lock_p *table;
3801
// Reallocate lock table.
3802
if (__kmp_user_lock_table.allocated == 0) {
3803
size = 1024;
3804
} else {
3805
size = __kmp_user_lock_table.allocated * 2;
3806
}
3807
table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);
3808
KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,
3809
sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));
3810
table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;
3811
// We cannot free the previous table now, since it may be in use by other
3812
// threads. So save the pointer to the previous table in the first
3813
// element of the new table. All the tables will be organized into a list,
3814
// and could be freed when library shutting down.
3815
__kmp_user_lock_table.table = table;
3816
__kmp_user_lock_table.allocated = size;
3817
}
3818
KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <
3819
__kmp_user_lock_table.allocated);
3820
index = __kmp_user_lock_table.used;
3821
__kmp_user_lock_table.table[index] = lck;
3822
++__kmp_user_lock_table.used;
3823
return index;
3824
}
3825
3826
static kmp_user_lock_p __kmp_lock_block_allocate() {
3827
// Assume that kmp_global_lock is held upon entry/exit.
3828
static int last_index = 0;
3829
if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {
3830
// Restart the index.
3831
last_index = 0;
3832
// Need to allocate a new block.
3833
KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3834
size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;
3835
char *buffer =
3836
(char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));
3837
// Set up the new block.
3838
kmp_block_of_locks *new_block =
3839
(kmp_block_of_locks *)(&buffer[space_for_locks]);
3840
new_block->next_block = __kmp_lock_blocks;
3841
new_block->locks = (void *)buffer;
3842
// Publish the new block.
3843
KMP_MB();
3844
__kmp_lock_blocks = new_block;
3845
}
3846
kmp_user_lock_p ret = (kmp_user_lock_p)(&(
3847
((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));
3848
last_index++;
3849
return ret;
3850
}
3851
3852
// Get memory for a lock. It may be freshly allocated memory or reused memory
3853
// from lock pool.
3854
kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,
3855
kmp_lock_flags_t flags) {
3856
kmp_user_lock_p lck;
3857
kmp_lock_index_t index;
3858
KMP_DEBUG_ASSERT(user_lock);
3859
3860
__kmp_acquire_lock(&__kmp_global_lock, gtid);
3861
3862
if (__kmp_lock_pool == NULL) {
3863
// Lock pool is empty. Allocate new memory.
3864
3865
if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.
3866
lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);
3867
} else {
3868
lck = __kmp_lock_block_allocate();
3869
}
3870
3871
// Insert lock in the table so that it can be freed in __kmp_cleanup,
3872
// and debugger has info on all allocated locks.
3873
index = __kmp_lock_table_insert(lck);
3874
} else {
3875
// Pick up lock from pool.
3876
lck = __kmp_lock_pool;
3877
index = __kmp_lock_pool->pool.index;
3878
__kmp_lock_pool = __kmp_lock_pool->pool.next;
3879
}
3880
3881
// We could potentially differentiate between nested and regular locks
3882
// here, and do the lock table lookup for regular locks only.
3883
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3884
*((kmp_lock_index_t *)user_lock) = index;
3885
} else {
3886
*((kmp_user_lock_p *)user_lock) = lck;
3887
}
3888
3889
// mark the lock if it is critical section lock.
3890
__kmp_set_user_lock_flags(lck, flags);
3891
3892
__kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper
3893
3894
return lck;
3895
}
3896
3897
// Put lock's memory to pool for reusing.
3898
void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,
3899
kmp_user_lock_p lck) {
3900
KMP_DEBUG_ASSERT(user_lock != NULL);
3901
KMP_DEBUG_ASSERT(lck != NULL);
3902
3903
__kmp_acquire_lock(&__kmp_global_lock, gtid);
3904
3905
lck->pool.next = __kmp_lock_pool;
3906
__kmp_lock_pool = lck;
3907
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3908
kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3909
KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);
3910
lck->pool.index = index;
3911
}
3912
3913
__kmp_release_lock(&__kmp_global_lock, gtid);
3914
}
3915
3916
kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {
3917
kmp_user_lock_p lck = NULL;
3918
3919
if (__kmp_env_consistency_check) {
3920
if (user_lock == NULL) {
3921
KMP_FATAL(LockIsUninitialized, func);
3922
}
3923
}
3924
3925
if (OMP_LOCK_T_SIZE < sizeof(void *)) {
3926
kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);
3927
if (__kmp_env_consistency_check) {
3928
if (!(0 < index && index < __kmp_user_lock_table.used)) {
3929
KMP_FATAL(LockIsUninitialized, func);
3930
}
3931
}
3932
KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);
3933
KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);
3934
lck = __kmp_user_lock_table.table[index];
3935
} else {
3936
lck = *((kmp_user_lock_p *)user_lock);
3937
}
3938
3939
if (__kmp_env_consistency_check) {
3940
if (lck == NULL) {
3941
KMP_FATAL(LockIsUninitialized, func);
3942
}
3943
}
3944
3945
return lck;
3946
}
3947
3948
void __kmp_cleanup_user_locks(void) {
3949
// Reset lock pool. Don't worry about lock in the pool--we will free them when
3950
// iterating through lock table (it includes all the locks, dead or alive).
3951
__kmp_lock_pool = NULL;
3952
3953
#define IS_CRITICAL(lck) \
3954
((__kmp_get_user_lock_flags_ != NULL) && \
3955
((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))
3956
3957
// Loop through lock table, free all locks.
3958
// Do not free item [0], it is reserved for lock tables list.
3959
//
3960
// FIXME - we are iterating through a list of (pointers to) objects of type
3961
// union kmp_user_lock, but we have no way of knowing whether the base type is
3962
// currently "pool" or whatever the global user lock type is.
3963
//
3964
// We are relying on the fact that for all of the user lock types
3965
// (except "tas"), the first field in the lock struct is the "initialized"
3966
// field, which is set to the address of the lock object itself when
3967
// the lock is initialized. When the union is of type "pool", the
3968
// first field is a pointer to the next object in the free list, which
3969
// will not be the same address as the object itself.
3970
//
3971
// This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail
3972
// for "pool" objects on the free list. This must happen as the "location"
3973
// field of real user locks overlaps the "index" field of "pool" objects.
3974
//
3975
// It would be better to run through the free list, and remove all "pool"
3976
// objects from the lock table before executing this loop. However,
3977
// "pool" objects do not always have their index field set (only on
3978
// lin_32e), and I don't want to search the lock table for the address
3979
// of every "pool" object on the free list.
3980
while (__kmp_user_lock_table.used > 1) {
3981
const ident *loc;
3982
3983
// reduce __kmp_user_lock_table.used before freeing the lock,
3984
// so that state of locks is consistent
3985
kmp_user_lock_p lck =
3986
__kmp_user_lock_table.table[--__kmp_user_lock_table.used];
3987
3988
if ((__kmp_is_user_lock_initialized_ != NULL) &&
3989
(*__kmp_is_user_lock_initialized_)(lck)) {
3990
// Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND
3991
// it is NOT a critical section (user is not responsible for destroying
3992
// criticals) AND we know source location to report.
3993
if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&
3994
((loc = __kmp_get_user_lock_location(lck)) != NULL) &&
3995
(loc->psource != NULL)) {
3996
kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
3997
KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);
3998
__kmp_str_loc_free(&str_loc);
3999
}
4000
4001
#ifdef KMP_DEBUG
4002
if (IS_CRITICAL(lck)) {
4003
KA_TRACE(
4004
20,
4005
("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",
4006
lck, *(void **)lck));
4007
} else {
4008
KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,
4009
*(void **)lck));
4010
}
4011
#endif // KMP_DEBUG
4012
4013
// Cleanup internal lock dynamic resources (for drdpa locks particularly).
4014
__kmp_destroy_user_lock(lck);
4015
}
4016
4017
// Free the lock if block allocation of locks is not used.
4018
if (__kmp_lock_blocks == NULL) {
4019
__kmp_free(lck);
4020
}
4021
}
4022
4023
#undef IS_CRITICAL
4024
4025
// delete lock table(s).
4026
kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;
4027
__kmp_user_lock_table.table = NULL;
4028
__kmp_user_lock_table.allocated = 0;
4029
4030
while (table_ptr != NULL) {
4031
// In the first element we saved the pointer to the previous
4032
// (smaller) lock table.
4033
kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);
4034
__kmp_free(table_ptr);
4035
table_ptr = next;
4036
}
4037
4038
// Free buffers allocated for blocks of locks.
4039
kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;
4040
__kmp_lock_blocks = NULL;
4041
4042
while (block_ptr != NULL) {
4043
kmp_block_of_locks_t *next = block_ptr->next_block;
4044
__kmp_free(block_ptr->locks);
4045
// *block_ptr itself was allocated at the end of the locks vector.
4046
block_ptr = next;
4047
}
4048
4049
TCW_4(__kmp_init_user_locks, FALSE);
4050
}
4051
4052
#endif // KMP_USE_DYNAMIC_LOCK
4053
4054