Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
35258 views
1
/*
2
* kmp_csupport.cpp -- kfront linkage support for OpenMP.
3
*/
4
5
//===----------------------------------------------------------------------===//
6
//
7
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8
// See https://llvm.org/LICENSE.txt for license information.
9
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10
//
11
//===----------------------------------------------------------------------===//
12
13
#define __KMP_IMP
14
#include "omp.h" /* extern "C" declarations of user-visible routines */
15
#include "kmp.h"
16
#include "kmp_error.h"
17
#include "kmp_i18n.h"
18
#include "kmp_itt.h"
19
#include "kmp_lock.h"
20
#include "kmp_stats.h"
21
#include "kmp_utils.h"
22
#include "ompt-specific.h"
23
24
#define MAX_MESSAGE 512
25
26
// flags will be used in future, e.g. to implement openmp_strict library
27
// restrictions
28
29
/*!
30
* @ingroup STARTUP_SHUTDOWN
31
* @param loc in source location information
32
* @param flags in for future use (currently ignored)
33
*
34
* Initialize the runtime library. This call is optional; if it is not made then
35
* it will be implicitly called by attempts to use other library functions.
36
*/
37
void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
38
// By default __kmpc_begin() is no-op.
39
char *env;
40
if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
41
__kmp_str_match_true(env)) {
42
__kmp_middle_initialize();
43
__kmp_assign_root_init_mask();
44
KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
45
} else if (__kmp_ignore_mppbeg() == FALSE) {
46
// By default __kmp_ignore_mppbeg() returns TRUE.
47
__kmp_internal_begin();
48
KC_TRACE(10, ("__kmpc_begin: called\n"));
49
}
50
}
51
52
/*!
53
* @ingroup STARTUP_SHUTDOWN
54
* @param loc source location information
55
*
56
* Shutdown the runtime library. This is also optional, and even if called will
57
* not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to
58
* zero.
59
*/
60
void __kmpc_end(ident_t *loc) {
61
// By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
62
// call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
63
// environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
64
// returns FALSE and __kmpc_end() will unregister this root (it can cause
65
// library shut down).
66
if (__kmp_ignore_mppend() == FALSE) {
67
KC_TRACE(10, ("__kmpc_end: called\n"));
68
KA_TRACE(30, ("__kmpc_end\n"));
69
70
__kmp_internal_end_thread(-1);
71
}
72
#if KMP_OS_WINDOWS && OMPT_SUPPORT
73
// Normal exit process on Windows does not allow worker threads of the final
74
// parallel region to finish reporting their events, so shutting down the
75
// library here fixes the issue at least for the cases where __kmpc_end() is
76
// placed properly.
77
if (ompt_enabled.enabled)
78
__kmp_internal_end_library(__kmp_gtid_get_specific());
79
#endif
80
}
81
82
/*!
83
@ingroup THREAD_STATES
84
@param loc Source location information.
85
@return The global thread index of the active thread.
86
87
This function can be called in any context.
88
89
If the runtime has ony been entered at the outermost level from a
90
single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is
91
that which would be returned by omp_get_thread_num() in the outermost
92
active parallel construct. (Or zero if there is no active parallel
93
construct, since the primary thread is necessarily thread zero).
94
95
If multiple non-OpenMP threads all enter an OpenMP construct then this
96
will be a unique thread identifier among all the threads created by
97
the OpenMP runtime (but the value cannot be defined in terms of
98
OpenMP thread ids returned by omp_get_thread_num()).
99
*/
100
kmp_int32 __kmpc_global_thread_num(ident_t *loc) {
101
kmp_int32 gtid = __kmp_entry_gtid();
102
103
KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
104
105
return gtid;
106
}
107
108
/*!
109
@ingroup THREAD_STATES
110
@param loc Source location information.
111
@return The number of threads under control of the OpenMP<sup>*</sup> runtime
112
113
This function can be called in any context.
114
It returns the total number of threads under the control of the OpenMP runtime.
115
That is not a number that can be determined by any OpenMP standard calls, since
116
the library may be called from more than one non-OpenMP thread, and this
117
reflects the total over all such calls. Similarly the runtime maintains
118
underlying threads even when they are not active (since the cost of creating
119
and destroying OS threads is high), this call counts all such threads even if
120
they are not waiting for work.
121
*/
122
kmp_int32 __kmpc_global_num_threads(ident_t *loc) {
123
KC_TRACE(10,
124
("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125
126
return TCR_4(__kmp_all_nth);
127
}
128
129
/*!
130
@ingroup THREAD_STATES
131
@param loc Source location information.
132
@return The thread number of the calling thread in the innermost active parallel
133
construct.
134
*/
135
kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {
136
KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
137
return __kmp_tid_from_gtid(__kmp_entry_gtid());
138
}
139
140
/*!
141
@ingroup THREAD_STATES
142
@param loc Source location information.
143
@return The number of threads in the innermost active parallel construct.
144
*/
145
kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {
146
KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
147
148
return __kmp_entry_thread()->th.th_team->t.t_nproc;
149
}
150
151
/*!
152
* @ingroup DEPRECATED
153
* @param loc location description
154
*
155
* This function need not be called. It always returns TRUE.
156
*/
157
kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
158
#ifndef KMP_DEBUG
159
160
return TRUE;
161
162
#else
163
164
const char *semi2;
165
const char *semi3;
166
int line_no;
167
168
if (__kmp_par_range == 0) {
169
return TRUE;
170
}
171
semi2 = loc->psource;
172
if (semi2 == NULL) {
173
return TRUE;
174
}
175
semi2 = strchr(semi2, ';');
176
if (semi2 == NULL) {
177
return TRUE;
178
}
179
semi2 = strchr(semi2 + 1, ';');
180
if (semi2 == NULL) {
181
return TRUE;
182
}
183
if (__kmp_par_range_filename[0]) {
184
const char *name = semi2 - 1;
185
while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
186
name--;
187
}
188
if ((*name == '/') || (*name == ';')) {
189
name++;
190
}
191
if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
192
return __kmp_par_range < 0;
193
}
194
}
195
semi3 = strchr(semi2 + 1, ';');
196
if (__kmp_par_range_routine[0]) {
197
if ((semi3 != NULL) && (semi3 > semi2) &&
198
(strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
199
return __kmp_par_range < 0;
200
}
201
}
202
if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
203
if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
204
return __kmp_par_range > 0;
205
}
206
return __kmp_par_range < 0;
207
}
208
return TRUE;
209
210
#endif /* KMP_DEBUG */
211
}
212
213
/*!
214
@ingroup THREAD_STATES
215
@param loc Source location information.
216
@return 1 if this thread is executing inside an active parallel region, zero if
217
not.
218
*/
219
kmp_int32 __kmpc_in_parallel(ident_t *loc) {
220
return __kmp_entry_thread()->th.th_root->r.r_active;
221
}
222
223
/*!
224
@ingroup PARALLEL
225
@param loc source location information
226
@param global_tid global thread number
227
@param num_threads number of threads requested for this parallel construct
228
229
Set the number of threads to be used by the next fork spawned by this thread.
230
This call is only required if the parallel construct has a `num_threads` clause.
231
*/
232
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
233
kmp_int32 num_threads) {
234
KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
235
global_tid, num_threads));
236
__kmp_assert_valid_gtid(global_tid);
237
__kmp_push_num_threads(loc, global_tid, num_threads);
238
}
239
240
void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,
241
kmp_int32 num_threads, int severity,
242
const char *message) {
243
__kmp_push_num_threads(loc, global_tid, num_threads);
244
__kmp_set_strict_num_threads(loc, global_tid, severity, message);
245
}
246
247
/*!
248
@ingroup PARALLEL
249
@param loc source location information
250
@param global_tid global thread number
251
@param list_length number of entries in the num_threads_list array
252
@param num_threads_list array of numbers of threads requested for this parallel
253
construct and subsequent nested parallel constructs
254
255
Set the number of threads to be used by the next fork spawned by this thread,
256
and some nested forks as well.
257
This call is only required if the parallel construct has a `num_threads` clause
258
that has a list of integers as the argument.
259
*/
260
void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,
261
kmp_uint32 list_length,
262
kmp_int32 *num_threads_list) {
263
KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",
264
global_tid));
265
KA_TRACE(20, ("%d", num_threads_list[0]));
266
#ifdef KMP_DEBUG
267
for (kmp_uint32 i = 1; i < list_length; ++i)
268
KA_TRACE(20, (", %d", num_threads_list[i]));
269
#endif
270
KA_TRACE(20, ("/n"));
271
272
__kmp_assert_valid_gtid(global_tid);
273
__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
274
}
275
276
void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,
277
kmp_uint32 list_length,
278
kmp_int32 *num_threads_list,
279
int severity, const char *message) {
280
__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);
281
__kmp_set_strict_num_threads(loc, global_tid, severity, message);
282
}
283
284
void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
285
KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
286
/* the num_threads are automatically popped */
287
}
288
289
void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
290
kmp_int32 proc_bind) {
291
KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
292
proc_bind));
293
__kmp_assert_valid_gtid(global_tid);
294
__kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
295
}
296
297
/*!
298
@ingroup PARALLEL
299
@param loc source location information
300
@param argc total number of arguments in the ellipsis
301
@param microtask pointer to callback routine consisting of outlined parallel
302
construct
303
@param ... pointers to shared variables that aren't global
304
305
Do the actual fork and call the microtask in the relevant number of threads.
306
*/
307
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
308
int gtid = __kmp_entry_gtid();
309
310
#if (KMP_STATS_ENABLED)
311
// If we were in a serial region, then stop the serial timer, record
312
// the event, and start parallel region timer
313
stats_state_e previous_state = KMP_GET_THREAD_STATE();
314
if (previous_state == stats_state_e::SERIAL_REGION) {
315
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
316
} else {
317
KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
318
}
319
int inParallel = __kmpc_in_parallel(loc);
320
if (inParallel) {
321
KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
322
} else {
323
KMP_COUNT_BLOCK(OMP_PARALLEL);
324
}
325
#endif
326
327
// maybe to save thr_state is enough here
328
{
329
va_list ap;
330
va_start(ap, microtask);
331
332
#if OMPT_SUPPORT
333
ompt_frame_t *ompt_frame;
334
if (ompt_enabled.enabled) {
335
kmp_info_t *master_th = __kmp_threads[gtid];
336
ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
337
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
338
}
339
OMPT_STORE_RETURN_ADDRESS(gtid);
340
#endif
341
342
#if INCLUDE_SSC_MARKS
343
SSC_MARK_FORKING();
344
#endif
345
__kmp_fork_call(loc, gtid, fork_context_intel, argc,
346
VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
347
VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
348
kmp_va_addr_of(ap));
349
#if INCLUDE_SSC_MARKS
350
SSC_MARK_JOINING();
351
#endif
352
__kmp_join_call(loc, gtid
353
#if OMPT_SUPPORT
354
,
355
fork_context_intel
356
#endif
357
);
358
359
va_end(ap);
360
361
#if OMPT_SUPPORT
362
if (ompt_enabled.enabled) {
363
ompt_frame->enter_frame = ompt_data_none;
364
}
365
#endif
366
}
367
368
#if KMP_STATS_ENABLED
369
if (previous_state == stats_state_e::SERIAL_REGION) {
370
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
371
KMP_SET_THREAD_STATE(previous_state);
372
} else {
373
KMP_POP_PARTITIONED_TIMER();
374
}
375
#endif // KMP_STATS_ENABLED
376
}
377
378
/*!
379
@ingroup PARALLEL
380
@param loc source location information
381
@param microtask pointer to callback routine consisting of outlined parallel
382
construct
383
@param cond condition for running in parallel
384
@param args struct of pointers to shared variables that aren't global
385
386
Perform a fork only if the condition is true.
387
*/
388
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
389
kmp_int32 cond, void *args) {
390
int gtid = __kmp_entry_gtid();
391
if (cond) {
392
if (args)
393
__kmpc_fork_call(loc, argc, microtask, args);
394
else
395
__kmpc_fork_call(loc, argc, microtask);
396
} else {
397
__kmpc_serialized_parallel(loc, gtid);
398
399
#if OMPT_SUPPORT
400
void *exit_frame_ptr;
401
#endif
402
403
if (args)
404
__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
405
/*npr=*/0,
406
/*argc=*/1, &args
407
#if OMPT_SUPPORT
408
,
409
&exit_frame_ptr
410
#endif
411
);
412
else
413
__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,
414
/*npr=*/0,
415
/*argc=*/0,
416
/*args=*/nullptr
417
#if OMPT_SUPPORT
418
,
419
&exit_frame_ptr
420
#endif
421
);
422
423
__kmpc_end_serialized_parallel(loc, gtid);
424
}
425
}
426
427
/*!
428
@ingroup PARALLEL
429
@param loc source location information
430
@param global_tid global thread number
431
@param num_teams number of teams requested for the teams construct
432
@param num_threads number of threads per team requested for the teams construct
433
434
Set the number of teams to be used by the teams construct.
435
This call is only required if the teams construct has a `num_teams` clause
436
or a `thread_limit` clause (or both).
437
*/
438
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
439
kmp_int32 num_teams, kmp_int32 num_threads) {
440
KA_TRACE(20,
441
("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
442
global_tid, num_teams, num_threads));
443
__kmp_assert_valid_gtid(global_tid);
444
__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
445
}
446
447
/*!
448
@ingroup PARALLEL
449
@param loc source location information
450
@param global_tid global thread number
451
@param thread_limit limit on number of threads which can be created within the
452
current task
453
454
Set the thread_limit for the current task
455
This call is there to support `thread_limit` clause on the `target` construct
456
*/
457
void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,
458
kmp_int32 thread_limit) {
459
__kmp_assert_valid_gtid(global_tid);
460
kmp_info_t *thread = __kmp_threads[global_tid];
461
if (thread_limit > 0)
462
thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;
463
}
464
465
/*!
466
@ingroup PARALLEL
467
@param loc source location information
468
@param global_tid global thread number
469
@param num_teams_lb lower bound on number of teams requested for the teams
470
construct
471
@param num_teams_ub upper bound on number of teams requested for the teams
472
construct
473
@param num_threads number of threads per team requested for the teams construct
474
475
Set the number of teams to be used by the teams construct. The number of initial
476
teams cretaed will be greater than or equal to the lower bound and less than or
477
equal to the upper bound.
478
This call is only required if the teams construct has a `num_teams` clause
479
or a `thread_limit` clause (or both).
480
*/
481
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,
482
kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
483
kmp_int32 num_threads) {
484
KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
485
" num_teams_ub=%d num_threads=%d\n",
486
global_tid, num_teams_lb, num_teams_ub, num_threads));
487
__kmp_assert_valid_gtid(global_tid);
488
__kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
489
num_threads);
490
}
491
492
/*!
493
@ingroup PARALLEL
494
@param loc source location information
495
@param argc total number of arguments in the ellipsis
496
@param microtask pointer to callback routine consisting of outlined teams
497
construct
498
@param ... pointers to shared variables that aren't global
499
500
Do the actual fork and call the microtask in the relevant number of threads.
501
*/
502
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
503
...) {
504
int gtid = __kmp_entry_gtid();
505
kmp_info_t *this_thr = __kmp_threads[gtid];
506
va_list ap;
507
va_start(ap, microtask);
508
509
#if KMP_STATS_ENABLED
510
KMP_COUNT_BLOCK(OMP_TEAMS);
511
stats_state_e previous_state = KMP_GET_THREAD_STATE();
512
if (previous_state == stats_state_e::SERIAL_REGION) {
513
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
514
} else {
515
KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
516
}
517
#endif
518
519
// remember teams entry point and nesting level
520
this_thr->th.th_teams_microtask = microtask;
521
this_thr->th.th_teams_level =
522
this_thr->th.th_team->t.t_level; // AC: can be >0 on host
523
524
#if OMPT_SUPPORT
525
kmp_team_t *parent_team = this_thr->th.th_team;
526
int tid = __kmp_tid_from_gtid(gtid);
527
if (ompt_enabled.enabled) {
528
parent_team->t.t_implicit_task_taskdata[tid]
529
.ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
530
}
531
OMPT_STORE_RETURN_ADDRESS(gtid);
532
#endif
533
534
// check if __kmpc_push_num_teams called, set default number of teams
535
// otherwise
536
if (this_thr->th.th_teams_size.nteams == 0) {
537
__kmp_push_num_teams(loc, gtid, 0, 0);
538
}
539
KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
540
KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
541
KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
542
543
__kmp_fork_call(
544
loc, gtid, fork_context_intel, argc,
545
VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task
546
VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
547
__kmp_join_call(loc, gtid
548
#if OMPT_SUPPORT
549
,
550
fork_context_intel
551
#endif
552
);
553
554
// Pop current CG root off list
555
KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
556
kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
557
this_thr->th.th_cg_roots = tmp->up;
558
KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
559
" to node %p. cg_nthreads was %d\n",
560
this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
561
KMP_DEBUG_ASSERT(tmp->cg_nthreads);
562
int i = tmp->cg_nthreads--;
563
if (i == 1) { // check is we are the last thread in CG (not always the case)
564
__kmp_free(tmp);
565
}
566
// Restore current task's thread_limit from CG root
567
KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
568
this_thr->th.th_current_task->td_icvs.thread_limit =
569
this_thr->th.th_cg_roots->cg_thread_limit;
570
571
this_thr->th.th_teams_microtask = NULL;
572
this_thr->th.th_teams_level = 0;
573
*(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
574
va_end(ap);
575
#if KMP_STATS_ENABLED
576
if (previous_state == stats_state_e::SERIAL_REGION) {
577
KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
578
KMP_SET_THREAD_STATE(previous_state);
579
} else {
580
KMP_POP_PARTITIONED_TIMER();
581
}
582
#endif // KMP_STATS_ENABLED
583
}
584
585
// I don't think this function should ever have been exported.
586
// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
587
// openmp code ever called it, but it's been exported from the RTL for so
588
// long that I'm afraid to remove the definition.
589
int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
590
591
/*!
592
@ingroup PARALLEL
593
@param loc source location information
594
@param global_tid global thread number
595
596
Enter a serialized parallel construct. This interface is used to handle a
597
conditional parallel region, like this,
598
@code
599
#pragma omp parallel if (condition)
600
@endcode
601
when the condition is false.
602
*/
603
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
604
// The implementation is now in kmp_runtime.cpp so that it can share static
605
// functions with kmp_fork_call since the tasks to be done are similar in
606
// each case.
607
__kmp_assert_valid_gtid(global_tid);
608
#if OMPT_SUPPORT
609
OMPT_STORE_RETURN_ADDRESS(global_tid);
610
#endif
611
__kmp_serialized_parallel(loc, global_tid);
612
}
613
614
/*!
615
@ingroup PARALLEL
616
@param loc source location information
617
@param global_tid global thread number
618
619
Leave a serialized parallel construct.
620
*/
621
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
622
kmp_internal_control_t *top;
623
kmp_info_t *this_thr;
624
kmp_team_t *serial_team;
625
626
KC_TRACE(10,
627
("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
628
629
/* skip all this code for autopar serialized loops since it results in
630
unacceptable overhead */
631
if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
632
return;
633
634
// Not autopar code
635
__kmp_assert_valid_gtid(global_tid);
636
if (!TCR_4(__kmp_init_parallel))
637
__kmp_parallel_initialize();
638
639
__kmp_resume_if_soft_paused();
640
641
this_thr = __kmp_threads[global_tid];
642
serial_team = this_thr->th.th_serial_team;
643
644
kmp_task_team_t *task_team = this_thr->th.th_task_team;
645
// we need to wait for the proxy tasks before finishing the thread
646
if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
647
task_team->tt.tt_hidden_helper_task_encountered))
648
__kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
649
650
KMP_MB();
651
KMP_DEBUG_ASSERT(serial_team);
652
KMP_ASSERT(serial_team->t.t_serialized);
653
KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
654
KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
655
KMP_DEBUG_ASSERT(serial_team->t.t_threads);
656
KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
657
658
#if OMPT_SUPPORT
659
if (ompt_enabled.enabled &&
660
this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
661
OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
662
if (ompt_enabled.ompt_callback_implicit_task) {
663
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
664
ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
665
OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
666
}
667
668
// reset clear the task id only after unlinking the task
669
ompt_data_t *parent_task_data;
670
__ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
671
672
if (ompt_enabled.ompt_callback_parallel_end) {
673
ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
674
&(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
675
ompt_parallel_invoker_program | ompt_parallel_team,
676
OMPT_LOAD_RETURN_ADDRESS(global_tid));
677
}
678
__ompt_lw_taskteam_unlink(this_thr);
679
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
680
}
681
#endif
682
683
/* If necessary, pop the internal control stack values and replace the team
684
* values */
685
top = serial_team->t.t_control_stack_top;
686
if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
687
copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
688
serial_team->t.t_control_stack_top = top->next;
689
__kmp_free(top);
690
}
691
692
/* pop dispatch buffers stack */
693
KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
694
{
695
dispatch_private_info_t *disp_buffer =
696
serial_team->t.t_dispatch->th_disp_buffer;
697
serial_team->t.t_dispatch->th_disp_buffer =
698
serial_team->t.t_dispatch->th_disp_buffer->next;
699
__kmp_free(disp_buffer);
700
}
701
702
/* pop the task team stack */
703
if (serial_team->t.t_serialized > 1) {
704
__kmp_pop_task_team_node(this_thr, serial_team);
705
}
706
707
this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
708
709
--serial_team->t.t_serialized;
710
if (serial_team->t.t_serialized == 0) {
711
712
/* return to the parallel section */
713
714
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
715
if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
716
__kmp_clear_x87_fpu_status_word();
717
__kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
718
__kmp_load_mxcsr(&serial_team->t.t_mxcsr);
719
}
720
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
721
722
__kmp_pop_current_task_from_thread(this_thr);
723
#if OMPD_SUPPORT
724
if (ompd_state & OMPD_ENABLE_BP)
725
ompd_bp_parallel_end();
726
#endif
727
728
this_thr->th.th_team = serial_team->t.t_parent;
729
this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
730
731
/* restore values cached in the thread */
732
this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
733
this_thr->th.th_team_master =
734
serial_team->t.t_parent->t.t_threads[0]; /* JPH */
735
this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
736
737
/* TODO the below shouldn't need to be adjusted for serialized teams */
738
this_thr->th.th_dispatch =
739
&this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
740
741
KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
742
this_thr->th.th_current_task->td_flags.executing = 1;
743
744
if (__kmp_tasking_mode != tskm_immediate_exec) {
745
// Restore task state from serial team structure
746
KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||
747
serial_team->t.t_primary_task_state == 1);
748
this_thr->th.th_task_state =
749
(kmp_uint8)serial_team->t.t_primary_task_state;
750
// Copy the task team from the new child / old parent team to the thread.
751
this_thr->th.th_task_team =
752
this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
753
KA_TRACE(20,
754
("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
755
"team %p\n",
756
global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
757
}
758
#if KMP_AFFINITY_SUPPORTED
759
if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
760
__kmp_reset_root_init_mask(global_tid);
761
}
762
#endif
763
} else {
764
if (__kmp_tasking_mode != tskm_immediate_exec) {
765
KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
766
"depth of serial team %p to %d\n",
767
global_tid, serial_team, serial_team->t.t_serialized));
768
}
769
}
770
771
serial_team->t.t_level--;
772
if (__kmp_env_consistency_check)
773
__kmp_pop_parallel(global_tid, NULL);
774
#if OMPT_SUPPORT
775
if (ompt_enabled.enabled)
776
this_thr->th.ompt_thread_info.state =
777
((this_thr->th.th_team_serialized) ? ompt_state_work_serial
778
: ompt_state_work_parallel);
779
#endif
780
}
781
782
/*!
783
@ingroup SYNCHRONIZATION
784
@param loc source location information.
785
786
Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though
787
depending on the memory ordering convention obeyed by the compiler
788
even that may not be necessary).
789
*/
790
void __kmpc_flush(ident_t *loc) {
791
KC_TRACE(10, ("__kmpc_flush: called\n"));
792
793
/* need explicit __mf() here since use volatile instead in library */
794
KMP_MFENCE(); /* Flush all pending memory write invalidates. */
795
796
#if OMPT_SUPPORT && OMPT_OPTIONAL
797
if (ompt_enabled.ompt_callback_flush) {
798
ompt_callbacks.ompt_callback(ompt_callback_flush)(
799
__ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
800
}
801
#endif
802
}
803
804
/* -------------------------------------------------------------------------- */
805
/*!
806
@ingroup SYNCHRONIZATION
807
@param loc source location information
808
@param global_tid thread id.
809
810
Execute a barrier.
811
*/
812
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
813
KMP_COUNT_BLOCK(OMP_BARRIER);
814
KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
815
__kmp_assert_valid_gtid(global_tid);
816
817
if (!TCR_4(__kmp_init_parallel))
818
__kmp_parallel_initialize();
819
820
__kmp_resume_if_soft_paused();
821
822
if (__kmp_env_consistency_check) {
823
if (loc == 0) {
824
KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
825
}
826
__kmp_check_barrier(global_tid, ct_barrier, loc);
827
}
828
829
#if OMPT_SUPPORT
830
ompt_frame_t *ompt_frame;
831
if (ompt_enabled.enabled) {
832
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
833
if (ompt_frame->enter_frame.ptr == NULL)
834
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
835
}
836
OMPT_STORE_RETURN_ADDRESS(global_tid);
837
#endif
838
__kmp_threads[global_tid]->th.th_ident = loc;
839
// TODO: explicit barrier_wait_id:
840
// this function is called when 'barrier' directive is present or
841
// implicit barrier at the end of a worksharing construct.
842
// 1) better to add a per-thread barrier counter to a thread data structure
843
// 2) set to 0 when a new team is created
844
// 4) no sync is required
845
846
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
847
#if OMPT_SUPPORT && OMPT_OPTIONAL
848
if (ompt_enabled.enabled) {
849
ompt_frame->enter_frame = ompt_data_none;
850
}
851
#endif
852
}
853
854
/* The BARRIER for a MASTER section is always explicit */
855
/*!
856
@ingroup WORK_SHARING
857
@param loc source location information.
858
@param global_tid global thread number .
859
@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.
860
*/
861
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
862
int status = 0;
863
864
KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
865
__kmp_assert_valid_gtid(global_tid);
866
867
if (!TCR_4(__kmp_init_parallel))
868
__kmp_parallel_initialize();
869
870
__kmp_resume_if_soft_paused();
871
872
if (KMP_MASTER_GTID(global_tid)) {
873
KMP_COUNT_BLOCK(OMP_MASTER);
874
KMP_PUSH_PARTITIONED_TIMER(OMP_master);
875
status = 1;
876
}
877
878
#if OMPT_SUPPORT && OMPT_OPTIONAL
879
if (status) {
880
if (ompt_enabled.ompt_callback_masked) {
881
kmp_info_t *this_thr = __kmp_threads[global_tid];
882
kmp_team_t *team = this_thr->th.th_team;
883
884
int tid = __kmp_tid_from_gtid(global_tid);
885
ompt_callbacks.ompt_callback(ompt_callback_masked)(
886
ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
887
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
888
OMPT_GET_RETURN_ADDRESS(0));
889
}
890
}
891
#endif
892
893
if (__kmp_env_consistency_check) {
894
#if KMP_USE_DYNAMIC_LOCK
895
if (status)
896
__kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
897
else
898
__kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
899
#else
900
if (status)
901
__kmp_push_sync(global_tid, ct_master, loc, NULL);
902
else
903
__kmp_check_sync(global_tid, ct_master, loc, NULL);
904
#endif
905
}
906
907
return status;
908
}
909
910
/*!
911
@ingroup WORK_SHARING
912
@param loc source location information.
913
@param global_tid global thread number .
914
915
Mark the end of a <tt>master</tt> region. This should only be called by the
916
thread that executes the <tt>master</tt> region.
917
*/
918
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
919
KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
920
__kmp_assert_valid_gtid(global_tid);
921
KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
922
KMP_POP_PARTITIONED_TIMER();
923
924
#if OMPT_SUPPORT && OMPT_OPTIONAL
925
kmp_info_t *this_thr = __kmp_threads[global_tid];
926
kmp_team_t *team = this_thr->th.th_team;
927
if (ompt_enabled.ompt_callback_masked) {
928
int tid = __kmp_tid_from_gtid(global_tid);
929
ompt_callbacks.ompt_callback(ompt_callback_masked)(
930
ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
931
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
932
OMPT_GET_RETURN_ADDRESS(0));
933
}
934
#endif
935
936
if (__kmp_env_consistency_check) {
937
if (KMP_MASTER_GTID(global_tid))
938
__kmp_pop_sync(global_tid, ct_master, loc);
939
}
940
}
941
942
/*!
943
@ingroup WORK_SHARING
944
@param loc source location information.
945
@param global_tid global thread number.
946
@param filter result of evaluating filter clause on thread global_tid, or zero
947
if no filter clause present
948
@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.
949
*/
950
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {
951
int status = 0;
952
int tid;
953
KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));
954
__kmp_assert_valid_gtid(global_tid);
955
956
if (!TCR_4(__kmp_init_parallel))
957
__kmp_parallel_initialize();
958
959
__kmp_resume_if_soft_paused();
960
961
tid = __kmp_tid_from_gtid(global_tid);
962
if (tid == filter) {
963
KMP_COUNT_BLOCK(OMP_MASKED);
964
KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
965
status = 1;
966
}
967
968
#if OMPT_SUPPORT && OMPT_OPTIONAL
969
if (status) {
970
if (ompt_enabled.ompt_callback_masked) {
971
kmp_info_t *this_thr = __kmp_threads[global_tid];
972
kmp_team_t *team = this_thr->th.th_team;
973
ompt_callbacks.ompt_callback(ompt_callback_masked)(
974
ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
975
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
976
OMPT_GET_RETURN_ADDRESS(0));
977
}
978
}
979
#endif
980
981
if (__kmp_env_consistency_check) {
982
#if KMP_USE_DYNAMIC_LOCK
983
if (status)
984
__kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
985
else
986
__kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
987
#else
988
if (status)
989
__kmp_push_sync(global_tid, ct_masked, loc, NULL);
990
else
991
__kmp_check_sync(global_tid, ct_masked, loc, NULL);
992
#endif
993
}
994
995
return status;
996
}
997
998
/*!
999
@ingroup WORK_SHARING
1000
@param loc source location information.
1001
@param global_tid global thread number .
1002
1003
Mark the end of a <tt>masked</tt> region. This should only be called by the
1004
thread that executes the <tt>masked</tt> region.
1005
*/
1006
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {
1007
KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));
1008
__kmp_assert_valid_gtid(global_tid);
1009
KMP_POP_PARTITIONED_TIMER();
1010
1011
#if OMPT_SUPPORT && OMPT_OPTIONAL
1012
kmp_info_t *this_thr = __kmp_threads[global_tid];
1013
kmp_team_t *team = this_thr->th.th_team;
1014
if (ompt_enabled.ompt_callback_masked) {
1015
int tid = __kmp_tid_from_gtid(global_tid);
1016
ompt_callbacks.ompt_callback(ompt_callback_masked)(
1017
ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
1018
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1019
OMPT_GET_RETURN_ADDRESS(0));
1020
}
1021
#endif
1022
1023
if (__kmp_env_consistency_check) {
1024
__kmp_pop_sync(global_tid, ct_masked, loc);
1025
}
1026
}
1027
1028
/*!
1029
@ingroup WORK_SHARING
1030
@param loc source location information.
1031
@param gtid global thread number.
1032
1033
Start execution of an <tt>ordered</tt> construct.
1034
*/
1035
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
1036
int cid = 0;
1037
kmp_info_t *th;
1038
KMP_DEBUG_ASSERT(__kmp_init_serial);
1039
1040
KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
1041
__kmp_assert_valid_gtid(gtid);
1042
1043
if (!TCR_4(__kmp_init_parallel))
1044
__kmp_parallel_initialize();
1045
1046
__kmp_resume_if_soft_paused();
1047
1048
#if USE_ITT_BUILD
1049
__kmp_itt_ordered_prep(gtid);
1050
// TODO: ordered_wait_id
1051
#endif /* USE_ITT_BUILD */
1052
1053
th = __kmp_threads[gtid];
1054
1055
#if OMPT_SUPPORT && OMPT_OPTIONAL
1056
kmp_team_t *team;
1057
ompt_wait_id_t lck;
1058
void *codeptr_ra;
1059
OMPT_STORE_RETURN_ADDRESS(gtid);
1060
if (ompt_enabled.enabled) {
1061
team = __kmp_team_from_gtid(gtid);
1062
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
1063
/* OMPT state update */
1064
th->th.ompt_thread_info.wait_id = lck;
1065
th->th.ompt_thread_info.state = ompt_state_wait_ordered;
1066
1067
/* OMPT event callback */
1068
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1069
if (ompt_enabled.ompt_callback_mutex_acquire) {
1070
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1071
ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
1072
codeptr_ra);
1073
}
1074
}
1075
#endif
1076
1077
if (th->th.th_dispatch->th_deo_fcn != 0)
1078
(*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
1079
else
1080
__kmp_parallel_deo(&gtid, &cid, loc);
1081
1082
#if OMPT_SUPPORT && OMPT_OPTIONAL
1083
if (ompt_enabled.enabled) {
1084
/* OMPT state update */
1085
th->th.ompt_thread_info.state = ompt_state_work_parallel;
1086
th->th.ompt_thread_info.wait_id = 0;
1087
1088
/* OMPT event callback */
1089
if (ompt_enabled.ompt_callback_mutex_acquired) {
1090
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1091
ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1092
}
1093
}
1094
#endif
1095
1096
#if USE_ITT_BUILD
1097
__kmp_itt_ordered_start(gtid);
1098
#endif /* USE_ITT_BUILD */
1099
}
1100
1101
/*!
1102
@ingroup WORK_SHARING
1103
@param loc source location information.
1104
@param gtid global thread number.
1105
1106
End execution of an <tt>ordered</tt> construct.
1107
*/
1108
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
1109
int cid = 0;
1110
kmp_info_t *th;
1111
1112
KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
1113
__kmp_assert_valid_gtid(gtid);
1114
1115
#if USE_ITT_BUILD
1116
__kmp_itt_ordered_end(gtid);
1117
// TODO: ordered_wait_id
1118
#endif /* USE_ITT_BUILD */
1119
1120
th = __kmp_threads[gtid];
1121
1122
if (th->th.th_dispatch->th_dxo_fcn != 0)
1123
(*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
1124
else
1125
__kmp_parallel_dxo(&gtid, &cid, loc);
1126
1127
#if OMPT_SUPPORT && OMPT_OPTIONAL
1128
OMPT_STORE_RETURN_ADDRESS(gtid);
1129
if (ompt_enabled.ompt_callback_mutex_released) {
1130
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1131
ompt_mutex_ordered,
1132
(ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1133
->t.t_ordered.dt.t_value,
1134
OMPT_LOAD_RETURN_ADDRESS(gtid));
1135
}
1136
#endif
1137
}
1138
1139
#if KMP_USE_DYNAMIC_LOCK
1140
1141
static __forceinline void
1142
__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
1143
kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1144
// Pointer to the allocated indirect lock is written to crit, while indexing
1145
// is ignored.
1146
void *idx;
1147
kmp_indirect_lock_t **lck;
1148
lck = (kmp_indirect_lock_t **)crit;
1149
kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1150
KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1151
KMP_SET_I_LOCK_LOCATION(ilk, loc);
1152
KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1153
KA_TRACE(20,
1154
("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1155
#if USE_ITT_BUILD
1156
__kmp_itt_critical_creating(ilk->lock, loc);
1157
#endif
1158
int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
1159
if (status == 0) {
1160
#if USE_ITT_BUILD
1161
__kmp_itt_critical_destroyed(ilk->lock);
1162
#endif
1163
// We don't really need to destroy the unclaimed lock here since it will be
1164
// cleaned up at program exit.
1165
// KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
1166
}
1167
KMP_DEBUG_ASSERT(*lck != NULL);
1168
}
1169
1170
// Fast-path acquire tas lock
1171
#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1172
{ \
1173
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1174
kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1175
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1176
if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1177
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1178
kmp_uint32 spins; \
1179
KMP_FSYNC_PREPARE(l); \
1180
KMP_INIT_YIELD(spins); \
1181
kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1182
do { \
1183
if (TCR_4(__kmp_nth) > \
1184
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1185
KMP_YIELD(TRUE); \
1186
} else { \
1187
KMP_YIELD_SPIN(spins); \
1188
} \
1189
__kmp_spin_backoff(&backoff); \
1190
} while ( \
1191
KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1192
!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1193
} \
1194
KMP_FSYNC_ACQUIRED(l); \
1195
}
1196
1197
// Fast-path test tas lock
1198
#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1199
{ \
1200
kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1201
kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1202
kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1203
rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1204
__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1205
}
1206
1207
// Fast-path release tas lock
1208
#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1209
{ KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1210
1211
#if KMP_USE_FUTEX
1212
1213
#include <sys/syscall.h>
1214
#include <unistd.h>
1215
#ifndef FUTEX_WAIT
1216
#define FUTEX_WAIT 0
1217
#endif
1218
#ifndef FUTEX_WAKE
1219
#define FUTEX_WAKE 1
1220
#endif
1221
1222
// Fast-path acquire futex lock
1223
#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1224
{ \
1225
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1226
kmp_int32 gtid_code = (gtid + 1) << 1; \
1227
KMP_MB(); \
1228
KMP_FSYNC_PREPARE(ftx); \
1229
kmp_int32 poll_val; \
1230
while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1231
&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1232
KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1233
kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1234
if (!cond) { \
1235
if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1236
poll_val | \
1237
KMP_LOCK_BUSY(1, futex))) { \
1238
continue; \
1239
} \
1240
poll_val |= KMP_LOCK_BUSY(1, futex); \
1241
} \
1242
kmp_int32 rc; \
1243
if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1244
NULL, NULL, 0)) != 0) { \
1245
continue; \
1246
} \
1247
gtid_code |= 1; \
1248
} \
1249
KMP_FSYNC_ACQUIRED(ftx); \
1250
}
1251
1252
// Fast-path test futex lock
1253
#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1254
{ \
1255
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1256
if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1257
KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1258
KMP_FSYNC_ACQUIRED(ftx); \
1259
rc = TRUE; \
1260
} else { \
1261
rc = FALSE; \
1262
} \
1263
}
1264
1265
// Fast-path release futex lock
1266
#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1267
{ \
1268
kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1269
KMP_MB(); \
1270
KMP_FSYNC_RELEASING(ftx); \
1271
kmp_int32 poll_val = \
1272
KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1273
if (KMP_LOCK_STRIP(poll_val) & 1) { \
1274
syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1275
KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1276
} \
1277
KMP_MB(); \
1278
KMP_YIELD_OVERSUB(); \
1279
}
1280
1281
#endif // KMP_USE_FUTEX
1282
1283
#else // KMP_USE_DYNAMIC_LOCK
1284
1285
static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1286
ident_t const *loc,
1287
kmp_int32 gtid) {
1288
kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1289
1290
// Because of the double-check, the following load doesn't need to be volatile
1291
kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1292
1293
if (lck == NULL) {
1294
void *idx;
1295
1296
// Allocate & initialize the lock.
1297
// Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1298
lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1299
__kmp_init_user_lock_with_checks(lck);
1300
__kmp_set_user_lock_location(lck, loc);
1301
#if USE_ITT_BUILD
1302
__kmp_itt_critical_creating(lck);
1303
// __kmp_itt_critical_creating() should be called *before* the first usage
1304
// of underlying lock. It is the only place where we can guarantee it. There
1305
// are chances the lock will destroyed with no usage, but it is not a
1306
// problem, because this is not real event seen by user but rather setting
1307
// name for object (lock). See more details in kmp_itt.h.
1308
#endif /* USE_ITT_BUILD */
1309
1310
// Use a cmpxchg instruction to slam the start of the critical section with
1311
// the lock pointer. If another thread beat us to it, deallocate the lock,
1312
// and use the lock that the other thread allocated.
1313
int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1314
1315
if (status == 0) {
1316
// Deallocate the lock and reload the value.
1317
#if USE_ITT_BUILD
1318
__kmp_itt_critical_destroyed(lck);
1319
// Let ITT know the lock is destroyed and the same memory location may be reused
1320
// for another purpose.
1321
#endif /* USE_ITT_BUILD */
1322
__kmp_destroy_user_lock_with_checks(lck);
1323
__kmp_user_lock_free(&idx, gtid, lck);
1324
lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1325
KMP_DEBUG_ASSERT(lck != NULL);
1326
}
1327
}
1328
return lck;
1329
}
1330
1331
#endif // KMP_USE_DYNAMIC_LOCK
1332
1333
/*!
1334
@ingroup WORK_SHARING
1335
@param loc source location information.
1336
@param global_tid global thread number.
1337
@param crit identity of the critical section. This could be a pointer to a lock
1338
associated with the critical section, or some other suitably unique value.
1339
1340
Enter code protected by a `critical` construct.
1341
This function blocks until the executing thread can enter the critical section.
1342
*/
1343
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1344
kmp_critical_name *crit) {
1345
#if KMP_USE_DYNAMIC_LOCK
1346
#if OMPT_SUPPORT && OMPT_OPTIONAL
1347
OMPT_STORE_RETURN_ADDRESS(global_tid);
1348
#endif // OMPT_SUPPORT
1349
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1350
#else
1351
KMP_COUNT_BLOCK(OMP_CRITICAL);
1352
#if OMPT_SUPPORT && OMPT_OPTIONAL
1353
ompt_state_t prev_state = ompt_state_undefined;
1354
ompt_thread_info_t ti;
1355
#endif
1356
kmp_user_lock_p lck;
1357
1358
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1359
__kmp_assert_valid_gtid(global_tid);
1360
1361
// TODO: add THR_OVHD_STATE
1362
1363
KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1364
KMP_CHECK_USER_LOCK_INIT();
1365
1366
if ((__kmp_user_lock_kind == lk_tas) &&
1367
(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1368
lck = (kmp_user_lock_p)crit;
1369
}
1370
#if KMP_USE_FUTEX
1371
else if ((__kmp_user_lock_kind == lk_futex) &&
1372
(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1373
lck = (kmp_user_lock_p)crit;
1374
}
1375
#endif
1376
else { // ticket, queuing or drdpa
1377
lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1378
}
1379
1380
if (__kmp_env_consistency_check)
1381
__kmp_push_sync(global_tid, ct_critical, loc, lck);
1382
1383
// since the critical directive binds to all threads, not just the current
1384
// team we have to check this even if we are in a serialized team.
1385
// also, even if we are the uber thread, we still have to conduct the lock,
1386
// as we have to contend with sibling threads.
1387
1388
#if USE_ITT_BUILD
1389
__kmp_itt_critical_acquiring(lck);
1390
#endif /* USE_ITT_BUILD */
1391
#if OMPT_SUPPORT && OMPT_OPTIONAL
1392
OMPT_STORE_RETURN_ADDRESS(gtid);
1393
void *codeptr_ra = NULL;
1394
if (ompt_enabled.enabled) {
1395
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1396
/* OMPT state update */
1397
prev_state = ti.state;
1398
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1399
ti.state = ompt_state_wait_critical;
1400
1401
/* OMPT event callback */
1402
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1403
if (ompt_enabled.ompt_callback_mutex_acquire) {
1404
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1405
ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1406
(ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1407
}
1408
}
1409
#endif
1410
// Value of 'crit' should be good for using as a critical_id of the critical
1411
// section directive.
1412
__kmp_acquire_user_lock_with_checks(lck, global_tid);
1413
1414
#if USE_ITT_BUILD
1415
__kmp_itt_critical_acquired(lck);
1416
#endif /* USE_ITT_BUILD */
1417
#if OMPT_SUPPORT && OMPT_OPTIONAL
1418
if (ompt_enabled.enabled) {
1419
/* OMPT state update */
1420
ti.state = prev_state;
1421
ti.wait_id = 0;
1422
1423
/* OMPT event callback */
1424
if (ompt_enabled.ompt_callback_mutex_acquired) {
1425
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1426
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1427
}
1428
}
1429
#endif
1430
KMP_POP_PARTITIONED_TIMER();
1431
1432
KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1433
KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1434
#endif // KMP_USE_DYNAMIC_LOCK
1435
}
1436
1437
#if KMP_USE_DYNAMIC_LOCK
1438
1439
// Converts the given hint to an internal lock implementation
1440
static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1441
#if KMP_USE_TSX
1442
#define KMP_TSX_LOCK(seq) lockseq_##seq
1443
#else
1444
#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1445
#endif
1446
1447
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1448
#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1449
#else
1450
#define KMP_CPUINFO_RTM 0
1451
#endif
1452
1453
// Hints that do not require further logic
1454
if (hint & kmp_lock_hint_hle)
1455
return KMP_TSX_LOCK(hle);
1456
if (hint & kmp_lock_hint_rtm)
1457
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1458
if (hint & kmp_lock_hint_adaptive)
1459
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1460
1461
// Rule out conflicting hints first by returning the default lock
1462
if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1463
return __kmp_user_lock_seq;
1464
if ((hint & omp_lock_hint_speculative) &&
1465
(hint & omp_lock_hint_nonspeculative))
1466
return __kmp_user_lock_seq;
1467
1468
// Do not even consider speculation when it appears to be contended
1469
if (hint & omp_lock_hint_contended)
1470
return lockseq_queuing;
1471
1472
// Uncontended lock without speculation
1473
if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1474
return lockseq_tas;
1475
1476
// Use RTM lock for speculation
1477
if (hint & omp_lock_hint_speculative)
1478
return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1479
1480
return __kmp_user_lock_seq;
1481
}
1482
1483
#if OMPT_SUPPORT && OMPT_OPTIONAL
1484
#if KMP_USE_DYNAMIC_LOCK
1485
static kmp_mutex_impl_t
1486
__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1487
if (user_lock) {
1488
switch (KMP_EXTRACT_D_TAG(user_lock)) {
1489
case 0:
1490
break;
1491
#if KMP_USE_FUTEX
1492
case locktag_futex:
1493
return kmp_mutex_impl_queuing;
1494
#endif
1495
case locktag_tas:
1496
return kmp_mutex_impl_spin;
1497
#if KMP_USE_TSX
1498
case locktag_hle:
1499
case locktag_rtm_spin:
1500
return kmp_mutex_impl_speculative;
1501
#endif
1502
default:
1503
return kmp_mutex_impl_none;
1504
}
1505
ilock = KMP_LOOKUP_I_LOCK(user_lock);
1506
}
1507
KMP_ASSERT(ilock);
1508
switch (ilock->type) {
1509
#if KMP_USE_TSX
1510
case locktag_adaptive:
1511
case locktag_rtm_queuing:
1512
return kmp_mutex_impl_speculative;
1513
#endif
1514
case locktag_nested_tas:
1515
return kmp_mutex_impl_spin;
1516
#if KMP_USE_FUTEX
1517
case locktag_nested_futex:
1518
#endif
1519
case locktag_ticket:
1520
case locktag_queuing:
1521
case locktag_drdpa:
1522
case locktag_nested_ticket:
1523
case locktag_nested_queuing:
1524
case locktag_nested_drdpa:
1525
return kmp_mutex_impl_queuing;
1526
default:
1527
return kmp_mutex_impl_none;
1528
}
1529
}
1530
#else
1531
// For locks without dynamic binding
1532
static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1533
switch (__kmp_user_lock_kind) {
1534
case lk_tas:
1535
return kmp_mutex_impl_spin;
1536
#if KMP_USE_FUTEX
1537
case lk_futex:
1538
#endif
1539
case lk_ticket:
1540
case lk_queuing:
1541
case lk_drdpa:
1542
return kmp_mutex_impl_queuing;
1543
#if KMP_USE_TSX
1544
case lk_hle:
1545
case lk_rtm_queuing:
1546
case lk_rtm_spin:
1547
case lk_adaptive:
1548
return kmp_mutex_impl_speculative;
1549
#endif
1550
default:
1551
return kmp_mutex_impl_none;
1552
}
1553
}
1554
#endif // KMP_USE_DYNAMIC_LOCK
1555
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
1556
1557
/*!
1558
@ingroup WORK_SHARING
1559
@param loc source location information.
1560
@param global_tid global thread number.
1561
@param crit identity of the critical section. This could be a pointer to a lock
1562
associated with the critical section, or some other suitably unique value.
1563
@param hint the lock hint.
1564
1565
Enter code protected by a `critical` construct with a hint. The hint value is
1566
used to suggest a lock implementation. This function blocks until the executing
1567
thread can enter the critical section unless the hint suggests use of
1568
speculative execution and the hardware supports it.
1569
*/
1570
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1571
kmp_critical_name *crit, uint32_t hint) {
1572
KMP_COUNT_BLOCK(OMP_CRITICAL);
1573
kmp_user_lock_p lck;
1574
#if OMPT_SUPPORT && OMPT_OPTIONAL
1575
ompt_state_t prev_state = ompt_state_undefined;
1576
ompt_thread_info_t ti;
1577
// This is the case, if called from __kmpc_critical:
1578
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1579
if (!codeptr)
1580
codeptr = OMPT_GET_RETURN_ADDRESS(0);
1581
#endif
1582
1583
KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1584
__kmp_assert_valid_gtid(global_tid);
1585
1586
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1587
// Check if it is initialized.
1588
KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1589
kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1590
if (*lk == 0) {
1591
if (KMP_IS_D_LOCK(lockseq)) {
1592
KMP_COMPARE_AND_STORE_ACQ32(
1593
(volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,
1594
KMP_GET_D_TAG(lockseq));
1595
} else {
1596
__kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1597
}
1598
}
1599
// Branch for accessing the actual lock object and set operation. This
1600
// branching is inevitable since this lock initialization does not follow the
1601
// normal dispatch path (lock table is not used).
1602
if (KMP_EXTRACT_D_TAG(lk) != 0) {
1603
lck = (kmp_user_lock_p)lk;
1604
if (__kmp_env_consistency_check) {
1605
__kmp_push_sync(global_tid, ct_critical, loc, lck,
1606
__kmp_map_hint_to_lock(hint));
1607
}
1608
#if USE_ITT_BUILD
1609
__kmp_itt_critical_acquiring(lck);
1610
#endif
1611
#if OMPT_SUPPORT && OMPT_OPTIONAL
1612
if (ompt_enabled.enabled) {
1613
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1614
/* OMPT state update */
1615
prev_state = ti.state;
1616
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1617
ti.state = ompt_state_wait_critical;
1618
1619
/* OMPT event callback */
1620
if (ompt_enabled.ompt_callback_mutex_acquire) {
1621
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1622
ompt_mutex_critical, (unsigned int)hint,
1623
__ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1624
codeptr);
1625
}
1626
}
1627
#endif
1628
#if KMP_USE_INLINED_TAS
1629
if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1630
KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1631
} else
1632
#elif KMP_USE_INLINED_FUTEX
1633
if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1634
KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1635
} else
1636
#endif
1637
{
1638
KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1639
}
1640
} else {
1641
kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1642
lck = ilk->lock;
1643
if (__kmp_env_consistency_check) {
1644
__kmp_push_sync(global_tid, ct_critical, loc, lck,
1645
__kmp_map_hint_to_lock(hint));
1646
}
1647
#if USE_ITT_BUILD
1648
__kmp_itt_critical_acquiring(lck);
1649
#endif
1650
#if OMPT_SUPPORT && OMPT_OPTIONAL
1651
if (ompt_enabled.enabled) {
1652
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1653
/* OMPT state update */
1654
prev_state = ti.state;
1655
ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1656
ti.state = ompt_state_wait_critical;
1657
1658
/* OMPT event callback */
1659
if (ompt_enabled.ompt_callback_mutex_acquire) {
1660
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1661
ompt_mutex_critical, (unsigned int)hint,
1662
__ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1663
codeptr);
1664
}
1665
}
1666
#endif
1667
KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1668
}
1669
KMP_POP_PARTITIONED_TIMER();
1670
1671
#if USE_ITT_BUILD
1672
__kmp_itt_critical_acquired(lck);
1673
#endif /* USE_ITT_BUILD */
1674
#if OMPT_SUPPORT && OMPT_OPTIONAL
1675
if (ompt_enabled.enabled) {
1676
/* OMPT state update */
1677
ti.state = prev_state;
1678
ti.wait_id = 0;
1679
1680
/* OMPT event callback */
1681
if (ompt_enabled.ompt_callback_mutex_acquired) {
1682
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1683
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1684
}
1685
}
1686
#endif
1687
1688
KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1689
KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1690
} // __kmpc_critical_with_hint
1691
1692
#endif // KMP_USE_DYNAMIC_LOCK
1693
1694
/*!
1695
@ingroup WORK_SHARING
1696
@param loc source location information.
1697
@param global_tid global thread number .
1698
@param crit identity of the critical section. This could be a pointer to a lock
1699
associated with the critical section, or some other suitably unique value.
1700
1701
Leave a critical section, releasing any lock that was held during its execution.
1702
*/
1703
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1704
kmp_critical_name *crit) {
1705
kmp_user_lock_p lck;
1706
1707
KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1708
1709
#if KMP_USE_DYNAMIC_LOCK
1710
int locktag = KMP_EXTRACT_D_TAG(crit);
1711
if (locktag) {
1712
lck = (kmp_user_lock_p)crit;
1713
KMP_ASSERT(lck != NULL);
1714
if (__kmp_env_consistency_check) {
1715
__kmp_pop_sync(global_tid, ct_critical, loc);
1716
}
1717
#if USE_ITT_BUILD
1718
__kmp_itt_critical_releasing(lck);
1719
#endif
1720
#if KMP_USE_INLINED_TAS
1721
if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1722
KMP_RELEASE_TAS_LOCK(lck, global_tid);
1723
} else
1724
#elif KMP_USE_INLINED_FUTEX
1725
if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1726
KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1727
} else
1728
#endif
1729
{
1730
KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1731
}
1732
} else {
1733
kmp_indirect_lock_t *ilk =
1734
(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1735
KMP_ASSERT(ilk != NULL);
1736
lck = ilk->lock;
1737
if (__kmp_env_consistency_check) {
1738
__kmp_pop_sync(global_tid, ct_critical, loc);
1739
}
1740
#if USE_ITT_BUILD
1741
__kmp_itt_critical_releasing(lck);
1742
#endif
1743
KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1744
}
1745
1746
#else // KMP_USE_DYNAMIC_LOCK
1747
1748
if ((__kmp_user_lock_kind == lk_tas) &&
1749
(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1750
lck = (kmp_user_lock_p)crit;
1751
}
1752
#if KMP_USE_FUTEX
1753
else if ((__kmp_user_lock_kind == lk_futex) &&
1754
(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1755
lck = (kmp_user_lock_p)crit;
1756
}
1757
#endif
1758
else { // ticket, queuing or drdpa
1759
lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1760
}
1761
1762
KMP_ASSERT(lck != NULL);
1763
1764
if (__kmp_env_consistency_check)
1765
__kmp_pop_sync(global_tid, ct_critical, loc);
1766
1767
#if USE_ITT_BUILD
1768
__kmp_itt_critical_releasing(lck);
1769
#endif /* USE_ITT_BUILD */
1770
// Value of 'crit' should be good for using as a critical_id of the critical
1771
// section directive.
1772
__kmp_release_user_lock_with_checks(lck, global_tid);
1773
1774
#endif // KMP_USE_DYNAMIC_LOCK
1775
1776
#if OMPT_SUPPORT && OMPT_OPTIONAL
1777
/* OMPT release event triggers after lock is released; place here to trigger
1778
* for all #if branches */
1779
OMPT_STORE_RETURN_ADDRESS(global_tid);
1780
if (ompt_enabled.ompt_callback_mutex_released) {
1781
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1782
ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1783
OMPT_LOAD_RETURN_ADDRESS(0));
1784
}
1785
#endif
1786
1787
KMP_POP_PARTITIONED_TIMER();
1788
KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1789
}
1790
1791
/*!
1792
@ingroup SYNCHRONIZATION
1793
@param loc source location information
1794
@param global_tid thread id.
1795
@return one if the thread should execute the master block, zero otherwise
1796
1797
Start execution of a combined barrier and master. The barrier is executed inside
1798
this function.
1799
*/
1800
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1801
int status;
1802
KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1803
__kmp_assert_valid_gtid(global_tid);
1804
1805
if (!TCR_4(__kmp_init_parallel))
1806
__kmp_parallel_initialize();
1807
1808
__kmp_resume_if_soft_paused();
1809
1810
if (__kmp_env_consistency_check)
1811
__kmp_check_barrier(global_tid, ct_barrier, loc);
1812
1813
#if OMPT_SUPPORT
1814
ompt_frame_t *ompt_frame;
1815
if (ompt_enabled.enabled) {
1816
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1817
if (ompt_frame->enter_frame.ptr == NULL)
1818
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1819
}
1820
OMPT_STORE_RETURN_ADDRESS(global_tid);
1821
#endif
1822
#if USE_ITT_NOTIFY
1823
__kmp_threads[global_tid]->th.th_ident = loc;
1824
#endif
1825
status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1826
#if OMPT_SUPPORT && OMPT_OPTIONAL
1827
if (ompt_enabled.enabled) {
1828
ompt_frame->enter_frame = ompt_data_none;
1829
}
1830
#endif
1831
1832
return (status != 0) ? 0 : 1;
1833
}
1834
1835
/*!
1836
@ingroup SYNCHRONIZATION
1837
@param loc source location information
1838
@param global_tid thread id.
1839
1840
Complete the execution of a combined barrier and master. This function should
1841
only be called at the completion of the <tt>master</tt> code. Other threads will
1842
still be waiting at the barrier and this call releases them.
1843
*/
1844
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1845
KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1846
__kmp_assert_valid_gtid(global_tid);
1847
__kmp_end_split_barrier(bs_plain_barrier, global_tid);
1848
}
1849
1850
/*!
1851
@ingroup SYNCHRONIZATION
1852
@param loc source location information
1853
@param global_tid thread id.
1854
@return one if the thread should execute the master block, zero otherwise
1855
1856
Start execution of a combined barrier and master(nowait) construct.
1857
The barrier is executed inside this function.
1858
There is no equivalent "end" function, since the
1859
*/
1860
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1861
kmp_int32 ret;
1862
KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1863
__kmp_assert_valid_gtid(global_tid);
1864
1865
if (!TCR_4(__kmp_init_parallel))
1866
__kmp_parallel_initialize();
1867
1868
__kmp_resume_if_soft_paused();
1869
1870
if (__kmp_env_consistency_check) {
1871
if (loc == 0) {
1872
KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1873
}
1874
__kmp_check_barrier(global_tid, ct_barrier, loc);
1875
}
1876
1877
#if OMPT_SUPPORT
1878
ompt_frame_t *ompt_frame;
1879
if (ompt_enabled.enabled) {
1880
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1881
if (ompt_frame->enter_frame.ptr == NULL)
1882
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1883
}
1884
OMPT_STORE_RETURN_ADDRESS(global_tid);
1885
#endif
1886
#if USE_ITT_NOTIFY
1887
__kmp_threads[global_tid]->th.th_ident = loc;
1888
#endif
1889
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1890
#if OMPT_SUPPORT && OMPT_OPTIONAL
1891
if (ompt_enabled.enabled) {
1892
ompt_frame->enter_frame = ompt_data_none;
1893
}
1894
#endif
1895
1896
ret = __kmpc_master(loc, global_tid);
1897
1898
if (__kmp_env_consistency_check) {
1899
/* there's no __kmpc_end_master called; so the (stats) */
1900
/* actions of __kmpc_end_master are done here */
1901
if (ret) {
1902
/* only one thread should do the pop since only */
1903
/* one did the push (see __kmpc_master()) */
1904
__kmp_pop_sync(global_tid, ct_master, loc);
1905
}
1906
}
1907
1908
return (ret);
1909
}
1910
1911
/* The BARRIER for a SINGLE process section is always explicit */
1912
/*!
1913
@ingroup WORK_SHARING
1914
@param loc source location information
1915
@param global_tid global thread number
1916
@return One if this thread should execute the single construct, zero otherwise.
1917
1918
Test whether to execute a <tt>single</tt> construct.
1919
There are no implicit barriers in the two "single" calls, rather the compiler
1920
should introduce an explicit barrier if it is required.
1921
*/
1922
1923
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1924
__kmp_assert_valid_gtid(global_tid);
1925
kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1926
1927
if (rc) {
1928
// We are going to execute the single statement, so we should count it.
1929
KMP_COUNT_BLOCK(OMP_SINGLE);
1930
KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1931
}
1932
1933
#if OMPT_SUPPORT && OMPT_OPTIONAL
1934
kmp_info_t *this_thr = __kmp_threads[global_tid];
1935
kmp_team_t *team = this_thr->th.th_team;
1936
int tid = __kmp_tid_from_gtid(global_tid);
1937
1938
if (ompt_enabled.enabled) {
1939
if (rc) {
1940
if (ompt_enabled.ompt_callback_work) {
1941
ompt_callbacks.ompt_callback(ompt_callback_work)(
1942
ompt_work_single_executor, ompt_scope_begin,
1943
&(team->t.ompt_team_info.parallel_data),
1944
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1945
1, OMPT_GET_RETURN_ADDRESS(0));
1946
}
1947
} else {
1948
if (ompt_enabled.ompt_callback_work) {
1949
ompt_callbacks.ompt_callback(ompt_callback_work)(
1950
ompt_work_single_other, ompt_scope_begin,
1951
&(team->t.ompt_team_info.parallel_data),
1952
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1953
1, OMPT_GET_RETURN_ADDRESS(0));
1954
ompt_callbacks.ompt_callback(ompt_callback_work)(
1955
ompt_work_single_other, ompt_scope_end,
1956
&(team->t.ompt_team_info.parallel_data),
1957
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1958
1, OMPT_GET_RETURN_ADDRESS(0));
1959
}
1960
}
1961
}
1962
#endif
1963
1964
return rc;
1965
}
1966
1967
/*!
1968
@ingroup WORK_SHARING
1969
@param loc source location information
1970
@param global_tid global thread number
1971
1972
Mark the end of a <tt>single</tt> construct. This function should
1973
only be called by the thread that executed the block of code protected
1974
by the `single` construct.
1975
*/
1976
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1977
__kmp_assert_valid_gtid(global_tid);
1978
__kmp_exit_single(global_tid);
1979
KMP_POP_PARTITIONED_TIMER();
1980
1981
#if OMPT_SUPPORT && OMPT_OPTIONAL
1982
kmp_info_t *this_thr = __kmp_threads[global_tid];
1983
kmp_team_t *team = this_thr->th.th_team;
1984
int tid = __kmp_tid_from_gtid(global_tid);
1985
1986
if (ompt_enabled.ompt_callback_work) {
1987
ompt_callbacks.ompt_callback(ompt_callback_work)(
1988
ompt_work_single_executor, ompt_scope_end,
1989
&(team->t.ompt_team_info.parallel_data),
1990
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1991
OMPT_GET_RETURN_ADDRESS(0));
1992
}
1993
#endif
1994
}
1995
1996
/*!
1997
@ingroup WORK_SHARING
1998
@param loc Source location
1999
@param global_tid Global thread id
2000
2001
Mark the end of a statically scheduled loop.
2002
*/
2003
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
2004
KMP_POP_PARTITIONED_TIMER();
2005
KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
2006
2007
#if OMPT_SUPPORT && OMPT_OPTIONAL
2008
if (ompt_enabled.ompt_callback_work) {
2009
ompt_work_t ompt_work_type = ompt_work_loop_static;
2010
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
2011
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2012
// Determine workshare type
2013
if (loc != NULL) {
2014
if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
2015
ompt_work_type = ompt_work_loop_static;
2016
} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
2017
ompt_work_type = ompt_work_sections;
2018
} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
2019
ompt_work_type = ompt_work_distribute;
2020
} else {
2021
// use default set above.
2022
// a warning about this case is provided in __kmpc_for_static_init
2023
}
2024
KMP_DEBUG_ASSERT(ompt_work_type);
2025
}
2026
ompt_callbacks.ompt_callback(ompt_callback_work)(
2027
ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
2028
&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
2029
}
2030
#endif
2031
if (__kmp_env_consistency_check)
2032
__kmp_pop_workshare(global_tid, ct_pdo, loc);
2033
}
2034
2035
// User routines which take C-style arguments (call by value)
2036
// different from the Fortran equivalent routines
2037
2038
void ompc_set_num_threads(int arg) {
2039
// !!!!! TODO: check the per-task binding
2040
__kmp_set_num_threads(arg, __kmp_entry_gtid());
2041
}
2042
2043
void ompc_set_dynamic(int flag) {
2044
kmp_info_t *thread;
2045
2046
/* For the thread-private implementation of the internal controls */
2047
thread = __kmp_entry_thread();
2048
2049
__kmp_save_internal_controls(thread);
2050
2051
set__dynamic(thread, flag ? true : false);
2052
}
2053
2054
void ompc_set_nested(int flag) {
2055
kmp_info_t *thread;
2056
2057
/* For the thread-private internal controls implementation */
2058
thread = __kmp_entry_thread();
2059
2060
__kmp_save_internal_controls(thread);
2061
2062
set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
2063
}
2064
2065
void ompc_set_max_active_levels(int max_active_levels) {
2066
/* TO DO */
2067
/* we want per-task implementation of this internal control */
2068
2069
/* For the per-thread internal controls implementation */
2070
__kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
2071
}
2072
2073
void ompc_set_schedule(omp_sched_t kind, int modifier) {
2074
// !!!!! TODO: check the per-task binding
2075
__kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
2076
}
2077
2078
int ompc_get_ancestor_thread_num(int level) {
2079
return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
2080
}
2081
2082
int ompc_get_team_size(int level) {
2083
return __kmp_get_team_size(__kmp_entry_gtid(), level);
2084
}
2085
2086
/* OpenMP 5.0 Affinity Format API */
2087
void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {
2088
if (!__kmp_init_serial) {
2089
__kmp_serial_initialize();
2090
}
2091
__kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
2092
format, KMP_STRLEN(format) + 1);
2093
}
2094
2095
size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {
2096
size_t format_size;
2097
if (!__kmp_init_serial) {
2098
__kmp_serial_initialize();
2099
}
2100
format_size = KMP_STRLEN(__kmp_affinity_format);
2101
if (buffer && size) {
2102
__kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2103
format_size + 1);
2104
}
2105
return format_size;
2106
}
2107
2108
void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {
2109
int gtid;
2110
if (!TCR_4(__kmp_init_middle)) {
2111
__kmp_middle_initialize();
2112
}
2113
__kmp_assign_root_init_mask();
2114
gtid = __kmp_get_gtid();
2115
#if KMP_AFFINITY_SUPPORTED
2116
if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2117
__kmp_affinity.flags.reset) {
2118
__kmp_reset_root_init_mask(gtid);
2119
}
2120
#endif
2121
__kmp_aux_display_affinity(gtid, format);
2122
}
2123
2124
size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,
2125
char const *format) {
2126
int gtid;
2127
size_t num_required;
2128
kmp_str_buf_t capture_buf;
2129
if (!TCR_4(__kmp_init_middle)) {
2130
__kmp_middle_initialize();
2131
}
2132
__kmp_assign_root_init_mask();
2133
gtid = __kmp_get_gtid();
2134
#if KMP_AFFINITY_SUPPORTED
2135
if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2136
__kmp_affinity.flags.reset) {
2137
__kmp_reset_root_init_mask(gtid);
2138
}
2139
#endif
2140
__kmp_str_buf_init(&capture_buf);
2141
num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2142
if (buffer && buf_size) {
2143
__kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2144
capture_buf.used + 1);
2145
}
2146
__kmp_str_buf_free(&capture_buf);
2147
return num_required;
2148
}
2149
2150
void kmpc_set_stacksize(int arg) {
2151
// __kmp_aux_set_stacksize initializes the library if needed
2152
__kmp_aux_set_stacksize(arg);
2153
}
2154
2155
void kmpc_set_stacksize_s(size_t arg) {
2156
// __kmp_aux_set_stacksize initializes the library if needed
2157
__kmp_aux_set_stacksize(arg);
2158
}
2159
2160
void kmpc_set_blocktime(int arg) {
2161
int gtid, tid, bt = arg;
2162
kmp_info_t *thread;
2163
2164
gtid = __kmp_entry_gtid();
2165
tid = __kmp_tid_from_gtid(gtid);
2166
thread = __kmp_thread_from_gtid(gtid);
2167
2168
__kmp_aux_convert_blocktime(&bt);
2169
__kmp_aux_set_blocktime(bt, thread, tid);
2170
}
2171
2172
void kmpc_set_library(int arg) {
2173
// __kmp_user_set_library initializes the library if needed
2174
__kmp_user_set_library((enum library_type)arg);
2175
}
2176
2177
void kmpc_set_defaults(char const *str) {
2178
// __kmp_aux_set_defaults initializes the library if needed
2179
__kmp_aux_set_defaults(str, KMP_STRLEN(str));
2180
}
2181
2182
void kmpc_set_disp_num_buffers(int arg) {
2183
// ignore after initialization because some teams have already
2184
// allocated dispatch buffers
2185
if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2186
arg <= KMP_MAX_DISP_NUM_BUFF) {
2187
__kmp_dispatch_num_buffers = arg;
2188
}
2189
}
2190
2191
int kmpc_set_affinity_mask_proc(int proc, void **mask) {
2192
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2193
return -1;
2194
#else
2195
if (!TCR_4(__kmp_init_middle)) {
2196
__kmp_middle_initialize();
2197
}
2198
__kmp_assign_root_init_mask();
2199
return __kmp_aux_set_affinity_mask_proc(proc, mask);
2200
#endif
2201
}
2202
2203
int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
2204
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2205
return -1;
2206
#else
2207
if (!TCR_4(__kmp_init_middle)) {
2208
__kmp_middle_initialize();
2209
}
2210
__kmp_assign_root_init_mask();
2211
return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2212
#endif
2213
}
2214
2215
int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2216
#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2217
return -1;
2218
#else
2219
if (!TCR_4(__kmp_init_middle)) {
2220
__kmp_middle_initialize();
2221
}
2222
__kmp_assign_root_init_mask();
2223
return __kmp_aux_get_affinity_mask_proc(proc, mask);
2224
#endif
2225
}
2226
2227
/* -------------------------------------------------------------------------- */
2228
/*!
2229
@ingroup THREADPRIVATE
2230
@param loc source location information
2231
@param gtid global thread number
2232
@param cpy_size size of the cpy_data buffer
2233
@param cpy_data pointer to data to be copied
2234
@param cpy_func helper function to call for copying data
2235
@param didit flag variable: 1=single thread; 0=not single thread
2236
2237
__kmpc_copyprivate implements the interface for the private data broadcast
2238
needed for the copyprivate clause associated with a single region in an
2239
OpenMP<sup>*</sup> program (both C and Fortran).
2240
All threads participating in the parallel region call this routine.
2241
One of the threads (called the single thread) should have the <tt>didit</tt>
2242
variable set to 1 and all other threads should have that variable set to 0.
2243
All threads pass a pointer to a data buffer (cpy_data) that they have built.
2244
2245
The OpenMP specification forbids the use of nowait on the single region when a
2246
copyprivate clause is present. However, @ref __kmpc_copyprivate implements a
2247
barrier internally to avoid race conditions, so the code generation for the
2248
single region should avoid generating a barrier after the call to @ref
2249
__kmpc_copyprivate.
2250
2251
The <tt>gtid</tt> parameter is the global thread id for the current thread.
2252
The <tt>loc</tt> parameter is a pointer to source location information.
2253
2254
Internal implementation: The single thread will first copy its descriptor
2255
address (cpy_data) to a team-private location, then the other threads will each
2256
call the function pointed to by the parameter cpy_func, which carries out the
2257
copy by copying the data using the cpy_data buffer.
2258
2259
The cpy_func routine used for the copy and the contents of the data area defined
2260
by cpy_data and cpy_size may be built in any fashion that will allow the copy
2261
to be done. For instance, the cpy_data buffer can hold the actual data to be
2262
copied or it may hold a list of pointers to the data. The cpy_func routine must
2263
interpret the cpy_data buffer appropriately.
2264
2265
The interface to cpy_func is as follows:
2266
@code
2267
void cpy_func( void *destination, void *source )
2268
@endcode
2269
where void *destination is the cpy_data pointer for the thread being copied to
2270
and void *source is the cpy_data pointer for the thread being copied from.
2271
*/
2272
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2273
void *cpy_data, void (*cpy_func)(void *, void *),
2274
kmp_int32 didit) {
2275
void **data_ptr;
2276
KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2277
__kmp_assert_valid_gtid(gtid);
2278
2279
KMP_MB();
2280
2281
data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2282
2283
if (__kmp_env_consistency_check) {
2284
if (loc == 0) {
2285
KMP_WARNING(ConstructIdentInvalid);
2286
}
2287
}
2288
2289
// ToDo: Optimize the following two barriers into some kind of split barrier
2290
2291
if (didit)
2292
*data_ptr = cpy_data;
2293
2294
#if OMPT_SUPPORT
2295
ompt_frame_t *ompt_frame;
2296
if (ompt_enabled.enabled) {
2297
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2298
if (ompt_frame->enter_frame.ptr == NULL)
2299
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2300
}
2301
OMPT_STORE_RETURN_ADDRESS(gtid);
2302
#endif
2303
/* This barrier is not a barrier region boundary */
2304
#if USE_ITT_NOTIFY
2305
__kmp_threads[gtid]->th.th_ident = loc;
2306
#endif
2307
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2308
2309
if (!didit)
2310
(*cpy_func)(cpy_data, *data_ptr);
2311
2312
// Consider next barrier a user-visible barrier for barrier region boundaries
2313
// Nesting checks are already handled by the single construct checks
2314
{
2315
#if OMPT_SUPPORT
2316
OMPT_STORE_RETURN_ADDRESS(gtid);
2317
#endif
2318
#if USE_ITT_NOTIFY
2319
__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2320
// tasks can overwrite the location)
2321
#endif
2322
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2323
#if OMPT_SUPPORT && OMPT_OPTIONAL
2324
if (ompt_enabled.enabled) {
2325
ompt_frame->enter_frame = ompt_data_none;
2326
}
2327
#endif
2328
}
2329
}
2330
2331
/* --------------------------------------------------------------------------*/
2332
/*!
2333
@ingroup THREADPRIVATE
2334
@param loc source location information
2335
@param gtid global thread number
2336
@param cpy_data pointer to the data to be saved/copied or 0
2337
@return the saved pointer to the data
2338
2339
__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:
2340
__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so
2341
coming from single), and returns that pointer in all calls (for single thread
2342
it's not needed). This version doesn't do any actual data copying. Data copying
2343
has to be done somewhere else, e.g. inline in the generated code. Due to this,
2344
this function doesn't have any barrier at the end of the function, like
2345
__kmpc_copyprivate does, so generated code needs barrier after copying of all
2346
data was done.
2347
*/
2348
void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {
2349
void **data_ptr;
2350
2351
KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));
2352
2353
KMP_MB();
2354
2355
data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2356
2357
if (__kmp_env_consistency_check) {
2358
if (loc == 0) {
2359
KMP_WARNING(ConstructIdentInvalid);
2360
}
2361
}
2362
2363
// ToDo: Optimize the following barrier
2364
2365
if (cpy_data)
2366
*data_ptr = cpy_data;
2367
2368
#if OMPT_SUPPORT
2369
ompt_frame_t *ompt_frame;
2370
if (ompt_enabled.enabled) {
2371
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2372
if (ompt_frame->enter_frame.ptr == NULL)
2373
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2374
OMPT_STORE_RETURN_ADDRESS(gtid);
2375
}
2376
#endif
2377
/* This barrier is not a barrier region boundary */
2378
#if USE_ITT_NOTIFY
2379
__kmp_threads[gtid]->th.th_ident = loc;
2380
#endif
2381
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2382
2383
return *data_ptr;
2384
}
2385
2386
/* -------------------------------------------------------------------------- */
2387
2388
#define INIT_LOCK __kmp_init_user_lock_with_checks
2389
#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2390
#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2391
#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2392
#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2393
#define ACQUIRE_NESTED_LOCK_TIMED \
2394
__kmp_acquire_nested_user_lock_with_checks_timed
2395
#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2396
#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2397
#define TEST_LOCK __kmp_test_user_lock_with_checks
2398
#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2399
#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2400
#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2401
2402
// TODO: Make check abort messages use location info & pass it into
2403
// with_checks routines
2404
2405
#if KMP_USE_DYNAMIC_LOCK
2406
2407
// internal lock initializer
2408
static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2409
kmp_dyna_lockseq_t seq) {
2410
if (KMP_IS_D_LOCK(seq)) {
2411
KMP_INIT_D_LOCK(lock, seq);
2412
#if USE_ITT_BUILD
2413
__kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2414
#endif
2415
} else {
2416
KMP_INIT_I_LOCK(lock, seq);
2417
#if USE_ITT_BUILD
2418
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2419
__kmp_itt_lock_creating(ilk->lock, loc);
2420
#endif
2421
}
2422
}
2423
2424
// internal nest lock initializer
2425
static __forceinline void
2426
__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2427
kmp_dyna_lockseq_t seq) {
2428
#if KMP_USE_TSX
2429
// Don't have nested lock implementation for speculative locks
2430
if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2431
seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2432
seq = __kmp_user_lock_seq;
2433
#endif
2434
switch (seq) {
2435
case lockseq_tas:
2436
seq = lockseq_nested_tas;
2437
break;
2438
#if KMP_USE_FUTEX
2439
case lockseq_futex:
2440
seq = lockseq_nested_futex;
2441
break;
2442
#endif
2443
case lockseq_ticket:
2444
seq = lockseq_nested_ticket;
2445
break;
2446
case lockseq_queuing:
2447
seq = lockseq_nested_queuing;
2448
break;
2449
case lockseq_drdpa:
2450
seq = lockseq_nested_drdpa;
2451
break;
2452
default:
2453
seq = lockseq_nested_queuing;
2454
}
2455
KMP_INIT_I_LOCK(lock, seq);
2456
#if USE_ITT_BUILD
2457
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2458
__kmp_itt_lock_creating(ilk->lock, loc);
2459
#endif
2460
}
2461
2462
/* initialize the lock with a hint */
2463
void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2464
uintptr_t hint) {
2465
KMP_DEBUG_ASSERT(__kmp_init_serial);
2466
if (__kmp_env_consistency_check && user_lock == NULL) {
2467
KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2468
}
2469
2470
__kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2471
2472
#if OMPT_SUPPORT && OMPT_OPTIONAL
2473
// This is the case, if called from omp_init_lock_with_hint:
2474
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475
if (!codeptr)
2476
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2477
if (ompt_enabled.ompt_callback_lock_init) {
2478
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2479
ompt_mutex_lock, (omp_lock_hint_t)hint,
2480
__ompt_get_mutex_impl_type(user_lock),
2481
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2482
}
2483
#endif
2484
}
2485
2486
/* initialize the lock with a hint */
2487
void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2488
void **user_lock, uintptr_t hint) {
2489
KMP_DEBUG_ASSERT(__kmp_init_serial);
2490
if (__kmp_env_consistency_check && user_lock == NULL) {
2491
KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2492
}
2493
2494
__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2495
2496
#if OMPT_SUPPORT && OMPT_OPTIONAL
2497
// This is the case, if called from omp_init_lock_with_hint:
2498
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2499
if (!codeptr)
2500
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2501
if (ompt_enabled.ompt_callback_lock_init) {
2502
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2503
ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2504
__ompt_get_mutex_impl_type(user_lock),
2505
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2506
}
2507
#endif
2508
}
2509
2510
#endif // KMP_USE_DYNAMIC_LOCK
2511
2512
/* initialize the lock */
2513
void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2514
#if KMP_USE_DYNAMIC_LOCK
2515
2516
KMP_DEBUG_ASSERT(__kmp_init_serial);
2517
if (__kmp_env_consistency_check && user_lock == NULL) {
2518
KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2519
}
2520
__kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2521
2522
#if OMPT_SUPPORT && OMPT_OPTIONAL
2523
// This is the case, if called from omp_init_lock_with_hint:
2524
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2525
if (!codeptr)
2526
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2527
if (ompt_enabled.ompt_callback_lock_init) {
2528
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2529
ompt_mutex_lock, omp_lock_hint_none,
2530
__ompt_get_mutex_impl_type(user_lock),
2531
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2532
}
2533
#endif
2534
2535
#else // KMP_USE_DYNAMIC_LOCK
2536
2537
static char const *const func = "omp_init_lock";
2538
kmp_user_lock_p lck;
2539
KMP_DEBUG_ASSERT(__kmp_init_serial);
2540
2541
if (__kmp_env_consistency_check) {
2542
if (user_lock == NULL) {
2543
KMP_FATAL(LockIsUninitialized, func);
2544
}
2545
}
2546
2547
KMP_CHECK_USER_LOCK_INIT();
2548
2549
if ((__kmp_user_lock_kind == lk_tas) &&
2550
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2551
lck = (kmp_user_lock_p)user_lock;
2552
}
2553
#if KMP_USE_FUTEX
2554
else if ((__kmp_user_lock_kind == lk_futex) &&
2555
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2556
lck = (kmp_user_lock_p)user_lock;
2557
}
2558
#endif
2559
else {
2560
lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2561
}
2562
INIT_LOCK(lck);
2563
__kmp_set_user_lock_location(lck, loc);
2564
2565
#if OMPT_SUPPORT && OMPT_OPTIONAL
2566
// This is the case, if called from omp_init_lock_with_hint:
2567
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2568
if (!codeptr)
2569
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2570
if (ompt_enabled.ompt_callback_lock_init) {
2571
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2572
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2573
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2574
}
2575
#endif
2576
2577
#if USE_ITT_BUILD
2578
__kmp_itt_lock_creating(lck);
2579
#endif /* USE_ITT_BUILD */
2580
2581
#endif // KMP_USE_DYNAMIC_LOCK
2582
} // __kmpc_init_lock
2583
2584
/* initialize the lock */
2585
void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2586
#if KMP_USE_DYNAMIC_LOCK
2587
2588
KMP_DEBUG_ASSERT(__kmp_init_serial);
2589
if (__kmp_env_consistency_check && user_lock == NULL) {
2590
KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2591
}
2592
__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2593
2594
#if OMPT_SUPPORT && OMPT_OPTIONAL
2595
// This is the case, if called from omp_init_lock_with_hint:
2596
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2597
if (!codeptr)
2598
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2599
if (ompt_enabled.ompt_callback_lock_init) {
2600
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2601
ompt_mutex_nest_lock, omp_lock_hint_none,
2602
__ompt_get_mutex_impl_type(user_lock),
2603
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2604
}
2605
#endif
2606
2607
#else // KMP_USE_DYNAMIC_LOCK
2608
2609
static char const *const func = "omp_init_nest_lock";
2610
kmp_user_lock_p lck;
2611
KMP_DEBUG_ASSERT(__kmp_init_serial);
2612
2613
if (__kmp_env_consistency_check) {
2614
if (user_lock == NULL) {
2615
KMP_FATAL(LockIsUninitialized, func);
2616
}
2617
}
2618
2619
KMP_CHECK_USER_LOCK_INIT();
2620
2621
if ((__kmp_user_lock_kind == lk_tas) &&
2622
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2623
OMP_NEST_LOCK_T_SIZE)) {
2624
lck = (kmp_user_lock_p)user_lock;
2625
}
2626
#if KMP_USE_FUTEX
2627
else if ((__kmp_user_lock_kind == lk_futex) &&
2628
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2629
OMP_NEST_LOCK_T_SIZE)) {
2630
lck = (kmp_user_lock_p)user_lock;
2631
}
2632
#endif
2633
else {
2634
lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2635
}
2636
2637
INIT_NESTED_LOCK(lck);
2638
__kmp_set_user_lock_location(lck, loc);
2639
2640
#if OMPT_SUPPORT && OMPT_OPTIONAL
2641
// This is the case, if called from omp_init_lock_with_hint:
2642
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2643
if (!codeptr)
2644
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2645
if (ompt_enabled.ompt_callback_lock_init) {
2646
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2647
ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2648
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2649
}
2650
#endif
2651
2652
#if USE_ITT_BUILD
2653
__kmp_itt_lock_creating(lck);
2654
#endif /* USE_ITT_BUILD */
2655
2656
#endif // KMP_USE_DYNAMIC_LOCK
2657
} // __kmpc_init_nest_lock
2658
2659
void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2660
#if KMP_USE_DYNAMIC_LOCK
2661
2662
#if USE_ITT_BUILD
2663
kmp_user_lock_p lck;
2664
if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2665
lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2666
} else {
2667
lck = (kmp_user_lock_p)user_lock;
2668
}
2669
__kmp_itt_lock_destroyed(lck);
2670
#endif
2671
#if OMPT_SUPPORT && OMPT_OPTIONAL
2672
// This is the case, if called from omp_init_lock_with_hint:
2673
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2674
if (!codeptr)
2675
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2676
if (ompt_enabled.ompt_callback_lock_destroy) {
2677
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2678
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2679
}
2680
#endif
2681
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2682
#else
2683
kmp_user_lock_p lck;
2684
2685
if ((__kmp_user_lock_kind == lk_tas) &&
2686
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2687
lck = (kmp_user_lock_p)user_lock;
2688
}
2689
#if KMP_USE_FUTEX
2690
else if ((__kmp_user_lock_kind == lk_futex) &&
2691
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2692
lck = (kmp_user_lock_p)user_lock;
2693
}
2694
#endif
2695
else {
2696
lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2697
}
2698
2699
#if OMPT_SUPPORT && OMPT_OPTIONAL
2700
// This is the case, if called from omp_init_lock_with_hint:
2701
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2702
if (!codeptr)
2703
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2704
if (ompt_enabled.ompt_callback_lock_destroy) {
2705
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2706
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2707
}
2708
#endif
2709
2710
#if USE_ITT_BUILD
2711
__kmp_itt_lock_destroyed(lck);
2712
#endif /* USE_ITT_BUILD */
2713
DESTROY_LOCK(lck);
2714
2715
if ((__kmp_user_lock_kind == lk_tas) &&
2716
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2717
;
2718
}
2719
#if KMP_USE_FUTEX
2720
else if ((__kmp_user_lock_kind == lk_futex) &&
2721
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2722
;
2723
}
2724
#endif
2725
else {
2726
__kmp_user_lock_free(user_lock, gtid, lck);
2727
}
2728
#endif // KMP_USE_DYNAMIC_LOCK
2729
} // __kmpc_destroy_lock
2730
2731
/* destroy the lock */
2732
void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2733
#if KMP_USE_DYNAMIC_LOCK
2734
2735
#if USE_ITT_BUILD
2736
kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2737
__kmp_itt_lock_destroyed(ilk->lock);
2738
#endif
2739
#if OMPT_SUPPORT && OMPT_OPTIONAL
2740
// This is the case, if called from omp_init_lock_with_hint:
2741
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2742
if (!codeptr)
2743
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2744
if (ompt_enabled.ompt_callback_lock_destroy) {
2745
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2746
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2747
}
2748
#endif
2749
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2750
2751
#else // KMP_USE_DYNAMIC_LOCK
2752
2753
kmp_user_lock_p lck;
2754
2755
if ((__kmp_user_lock_kind == lk_tas) &&
2756
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2757
OMP_NEST_LOCK_T_SIZE)) {
2758
lck = (kmp_user_lock_p)user_lock;
2759
}
2760
#if KMP_USE_FUTEX
2761
else if ((__kmp_user_lock_kind == lk_futex) &&
2762
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2763
OMP_NEST_LOCK_T_SIZE)) {
2764
lck = (kmp_user_lock_p)user_lock;
2765
}
2766
#endif
2767
else {
2768
lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2769
}
2770
2771
#if OMPT_SUPPORT && OMPT_OPTIONAL
2772
// This is the case, if called from omp_init_lock_with_hint:
2773
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2774
if (!codeptr)
2775
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2776
if (ompt_enabled.ompt_callback_lock_destroy) {
2777
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2778
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2779
}
2780
#endif
2781
2782
#if USE_ITT_BUILD
2783
__kmp_itt_lock_destroyed(lck);
2784
#endif /* USE_ITT_BUILD */
2785
2786
DESTROY_NESTED_LOCK(lck);
2787
2788
if ((__kmp_user_lock_kind == lk_tas) &&
2789
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2790
OMP_NEST_LOCK_T_SIZE)) {
2791
;
2792
}
2793
#if KMP_USE_FUTEX
2794
else if ((__kmp_user_lock_kind == lk_futex) &&
2795
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2796
OMP_NEST_LOCK_T_SIZE)) {
2797
;
2798
}
2799
#endif
2800
else {
2801
__kmp_user_lock_free(user_lock, gtid, lck);
2802
}
2803
#endif // KMP_USE_DYNAMIC_LOCK
2804
} // __kmpc_destroy_nest_lock
2805
2806
void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2807
KMP_COUNT_BLOCK(OMP_set_lock);
2808
#if KMP_USE_DYNAMIC_LOCK
2809
int tag = KMP_EXTRACT_D_TAG(user_lock);
2810
#if USE_ITT_BUILD
2811
__kmp_itt_lock_acquiring(
2812
(kmp_user_lock_p)
2813
user_lock); // itt function will get to the right lock object.
2814
#endif
2815
#if OMPT_SUPPORT && OMPT_OPTIONAL
2816
// This is the case, if called from omp_init_lock_with_hint:
2817
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2818
if (!codeptr)
2819
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2820
if (ompt_enabled.ompt_callback_mutex_acquire) {
2821
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2822
ompt_mutex_lock, omp_lock_hint_none,
2823
__ompt_get_mutex_impl_type(user_lock),
2824
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2825
}
2826
#endif
2827
#if KMP_USE_INLINED_TAS
2828
if (tag == locktag_tas && !__kmp_env_consistency_check) {
2829
KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2830
} else
2831
#elif KMP_USE_INLINED_FUTEX
2832
if (tag == locktag_futex && !__kmp_env_consistency_check) {
2833
KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2834
} else
2835
#endif
2836
{
2837
__kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2838
}
2839
#if USE_ITT_BUILD
2840
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2841
#endif
2842
#if OMPT_SUPPORT && OMPT_OPTIONAL
2843
if (ompt_enabled.ompt_callback_mutex_acquired) {
2844
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2845
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2846
}
2847
#endif
2848
2849
#else // KMP_USE_DYNAMIC_LOCK
2850
2851
kmp_user_lock_p lck;
2852
2853
if ((__kmp_user_lock_kind == lk_tas) &&
2854
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2855
lck = (kmp_user_lock_p)user_lock;
2856
}
2857
#if KMP_USE_FUTEX
2858
else if ((__kmp_user_lock_kind == lk_futex) &&
2859
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2860
lck = (kmp_user_lock_p)user_lock;
2861
}
2862
#endif
2863
else {
2864
lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2865
}
2866
2867
#if USE_ITT_BUILD
2868
__kmp_itt_lock_acquiring(lck);
2869
#endif /* USE_ITT_BUILD */
2870
#if OMPT_SUPPORT && OMPT_OPTIONAL
2871
// This is the case, if called from omp_init_lock_with_hint:
2872
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2873
if (!codeptr)
2874
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2875
if (ompt_enabled.ompt_callback_mutex_acquire) {
2876
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2877
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2878
(ompt_wait_id_t)(uintptr_t)lck, codeptr);
2879
}
2880
#endif
2881
2882
ACQUIRE_LOCK(lck, gtid);
2883
2884
#if USE_ITT_BUILD
2885
__kmp_itt_lock_acquired(lck);
2886
#endif /* USE_ITT_BUILD */
2887
2888
#if OMPT_SUPPORT && OMPT_OPTIONAL
2889
if (ompt_enabled.ompt_callback_mutex_acquired) {
2890
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2891
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2892
}
2893
#endif
2894
2895
#endif // KMP_USE_DYNAMIC_LOCK
2896
}
2897
2898
void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2899
#if KMP_USE_DYNAMIC_LOCK
2900
2901
#if USE_ITT_BUILD
2902
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2903
#endif
2904
#if OMPT_SUPPORT && OMPT_OPTIONAL
2905
// This is the case, if called from omp_init_lock_with_hint:
2906
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2907
if (!codeptr)
2908
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2909
if (ompt_enabled.enabled) {
2910
if (ompt_enabled.ompt_callback_mutex_acquire) {
2911
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2912
ompt_mutex_nest_lock, omp_lock_hint_none,
2913
__ompt_get_mutex_impl_type(user_lock),
2914
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2915
}
2916
}
2917
#endif
2918
int acquire_status =
2919
KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2920
(void)acquire_status;
2921
#if USE_ITT_BUILD
2922
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2923
#endif
2924
2925
#if OMPT_SUPPORT && OMPT_OPTIONAL
2926
if (ompt_enabled.enabled) {
2927
if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2928
if (ompt_enabled.ompt_callback_mutex_acquired) {
2929
// lock_first
2930
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2931
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2932
codeptr);
2933
}
2934
} else {
2935
if (ompt_enabled.ompt_callback_nest_lock) {
2936
// lock_next
2937
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2938
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2939
}
2940
}
2941
}
2942
#endif
2943
2944
#else // KMP_USE_DYNAMIC_LOCK
2945
int acquire_status;
2946
kmp_user_lock_p lck;
2947
2948
if ((__kmp_user_lock_kind == lk_tas) &&
2949
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2950
OMP_NEST_LOCK_T_SIZE)) {
2951
lck = (kmp_user_lock_p)user_lock;
2952
}
2953
#if KMP_USE_FUTEX
2954
else if ((__kmp_user_lock_kind == lk_futex) &&
2955
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2956
OMP_NEST_LOCK_T_SIZE)) {
2957
lck = (kmp_user_lock_p)user_lock;
2958
}
2959
#endif
2960
else {
2961
lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2962
}
2963
2964
#if USE_ITT_BUILD
2965
__kmp_itt_lock_acquiring(lck);
2966
#endif /* USE_ITT_BUILD */
2967
#if OMPT_SUPPORT && OMPT_OPTIONAL
2968
// This is the case, if called from omp_init_lock_with_hint:
2969
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2970
if (!codeptr)
2971
codeptr = OMPT_GET_RETURN_ADDRESS(0);
2972
if (ompt_enabled.enabled) {
2973
if (ompt_enabled.ompt_callback_mutex_acquire) {
2974
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2975
ompt_mutex_nest_lock, omp_lock_hint_none,
2976
__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2977
codeptr);
2978
}
2979
}
2980
#endif
2981
2982
ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2983
2984
#if USE_ITT_BUILD
2985
__kmp_itt_lock_acquired(lck);
2986
#endif /* USE_ITT_BUILD */
2987
2988
#if OMPT_SUPPORT && OMPT_OPTIONAL
2989
if (ompt_enabled.enabled) {
2990
if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2991
if (ompt_enabled.ompt_callback_mutex_acquired) {
2992
// lock_first
2993
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2994
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2995
}
2996
} else {
2997
if (ompt_enabled.ompt_callback_nest_lock) {
2998
// lock_next
2999
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3000
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3001
}
3002
}
3003
}
3004
#endif
3005
3006
#endif // KMP_USE_DYNAMIC_LOCK
3007
}
3008
3009
void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3010
#if KMP_USE_DYNAMIC_LOCK
3011
3012
int tag = KMP_EXTRACT_D_TAG(user_lock);
3013
#if USE_ITT_BUILD
3014
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3015
#endif
3016
#if KMP_USE_INLINED_TAS
3017
if (tag == locktag_tas && !__kmp_env_consistency_check) {
3018
KMP_RELEASE_TAS_LOCK(user_lock, gtid);
3019
} else
3020
#elif KMP_USE_INLINED_FUTEX
3021
if (tag == locktag_futex && !__kmp_env_consistency_check) {
3022
KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
3023
} else
3024
#endif
3025
{
3026
__kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3027
}
3028
3029
#if OMPT_SUPPORT && OMPT_OPTIONAL
3030
// This is the case, if called from omp_init_lock_with_hint:
3031
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3032
if (!codeptr)
3033
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3034
if (ompt_enabled.ompt_callback_mutex_released) {
3035
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3036
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3037
}
3038
#endif
3039
3040
#else // KMP_USE_DYNAMIC_LOCK
3041
3042
kmp_user_lock_p lck;
3043
3044
/* Can't use serial interval since not block structured */
3045
/* release the lock */
3046
3047
if ((__kmp_user_lock_kind == lk_tas) &&
3048
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3049
#if KMP_OS_LINUX && \
3050
(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3051
// "fast" path implemented to fix customer performance issue
3052
#if USE_ITT_BUILD
3053
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3054
#endif /* USE_ITT_BUILD */
3055
TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
3056
KMP_MB();
3057
3058
#if OMPT_SUPPORT && OMPT_OPTIONAL
3059
// This is the case, if called from omp_init_lock_with_hint:
3060
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3061
if (!codeptr)
3062
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3063
if (ompt_enabled.ompt_callback_mutex_released) {
3064
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3065
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3066
}
3067
#endif
3068
3069
return;
3070
#else
3071
lck = (kmp_user_lock_p)user_lock;
3072
#endif
3073
}
3074
#if KMP_USE_FUTEX
3075
else if ((__kmp_user_lock_kind == lk_futex) &&
3076
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3077
lck = (kmp_user_lock_p)user_lock;
3078
}
3079
#endif
3080
else {
3081
lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
3082
}
3083
3084
#if USE_ITT_BUILD
3085
__kmp_itt_lock_releasing(lck);
3086
#endif /* USE_ITT_BUILD */
3087
3088
RELEASE_LOCK(lck, gtid);
3089
3090
#if OMPT_SUPPORT && OMPT_OPTIONAL
3091
// This is the case, if called from omp_init_lock_with_hint:
3092
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3093
if (!codeptr)
3094
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3095
if (ompt_enabled.ompt_callback_mutex_released) {
3096
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3097
ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3098
}
3099
#endif
3100
3101
#endif // KMP_USE_DYNAMIC_LOCK
3102
}
3103
3104
/* release the lock */
3105
void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3106
#if KMP_USE_DYNAMIC_LOCK
3107
3108
#if USE_ITT_BUILD
3109
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3110
#endif
3111
int release_status =
3112
KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3113
(void)release_status;
3114
3115
#if OMPT_SUPPORT && OMPT_OPTIONAL
3116
// This is the case, if called from omp_init_lock_with_hint:
3117
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3118
if (!codeptr)
3119
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3120
if (ompt_enabled.enabled) {
3121
if (release_status == KMP_LOCK_RELEASED) {
3122
if (ompt_enabled.ompt_callback_mutex_released) {
3123
// release_lock_last
3124
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3125
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3126
codeptr);
3127
}
3128
} else if (ompt_enabled.ompt_callback_nest_lock) {
3129
// release_lock_prev
3130
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3131
ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3132
}
3133
}
3134
#endif
3135
3136
#else // KMP_USE_DYNAMIC_LOCK
3137
3138
kmp_user_lock_p lck;
3139
3140
/* Can't use serial interval since not block structured */
3141
3142
if ((__kmp_user_lock_kind == lk_tas) &&
3143
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3144
OMP_NEST_LOCK_T_SIZE)) {
3145
#if KMP_OS_LINUX && \
3146
(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3147
// "fast" path implemented to fix customer performance issue
3148
kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3149
#if USE_ITT_BUILD
3150
__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3151
#endif /* USE_ITT_BUILD */
3152
3153
#if OMPT_SUPPORT && OMPT_OPTIONAL
3154
int release_status = KMP_LOCK_STILL_HELD;
3155
#endif
3156
3157
if (--(tl->lk.depth_locked) == 0) {
3158
TCW_4(tl->lk.poll, 0);
3159
#if OMPT_SUPPORT && OMPT_OPTIONAL
3160
release_status = KMP_LOCK_RELEASED;
3161
#endif
3162
}
3163
KMP_MB();
3164
3165
#if OMPT_SUPPORT && OMPT_OPTIONAL
3166
// This is the case, if called from omp_init_lock_with_hint:
3167
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3168
if (!codeptr)
3169
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3170
if (ompt_enabled.enabled) {
3171
if (release_status == KMP_LOCK_RELEASED) {
3172
if (ompt_enabled.ompt_callback_mutex_released) {
3173
// release_lock_last
3174
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3175
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3176
}
3177
} else if (ompt_enabled.ompt_callback_nest_lock) {
3178
// release_lock_previous
3179
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3180
ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3181
}
3182
}
3183
#endif
3184
3185
return;
3186
#else
3187
lck = (kmp_user_lock_p)user_lock;
3188
#endif
3189
}
3190
#if KMP_USE_FUTEX
3191
else if ((__kmp_user_lock_kind == lk_futex) &&
3192
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3193
OMP_NEST_LOCK_T_SIZE)) {
3194
lck = (kmp_user_lock_p)user_lock;
3195
}
3196
#endif
3197
else {
3198
lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
3199
}
3200
3201
#if USE_ITT_BUILD
3202
__kmp_itt_lock_releasing(lck);
3203
#endif /* USE_ITT_BUILD */
3204
3205
int release_status;
3206
release_status = RELEASE_NESTED_LOCK(lck, gtid);
3207
#if OMPT_SUPPORT && OMPT_OPTIONAL
3208
// This is the case, if called from omp_init_lock_with_hint:
3209
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3210
if (!codeptr)
3211
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3212
if (ompt_enabled.enabled) {
3213
if (release_status == KMP_LOCK_RELEASED) {
3214
if (ompt_enabled.ompt_callback_mutex_released) {
3215
// release_lock_last
3216
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3217
ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3218
}
3219
} else if (ompt_enabled.ompt_callback_nest_lock) {
3220
// release_lock_previous
3221
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3222
ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3223
}
3224
}
3225
#endif
3226
3227
#endif // KMP_USE_DYNAMIC_LOCK
3228
}
3229
3230
/* try to acquire the lock */
3231
int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3232
KMP_COUNT_BLOCK(OMP_test_lock);
3233
3234
#if KMP_USE_DYNAMIC_LOCK
3235
int rc;
3236
int tag = KMP_EXTRACT_D_TAG(user_lock);
3237
#if USE_ITT_BUILD
3238
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3239
#endif
3240
#if OMPT_SUPPORT && OMPT_OPTIONAL
3241
// This is the case, if called from omp_init_lock_with_hint:
3242
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3243
if (!codeptr)
3244
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3245
if (ompt_enabled.ompt_callback_mutex_acquire) {
3246
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3247
ompt_mutex_test_lock, omp_lock_hint_none,
3248
__ompt_get_mutex_impl_type(user_lock),
3249
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3250
}
3251
#endif
3252
#if KMP_USE_INLINED_TAS
3253
if (tag == locktag_tas && !__kmp_env_consistency_check) {
3254
KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3255
} else
3256
#elif KMP_USE_INLINED_FUTEX
3257
if (tag == locktag_futex && !__kmp_env_consistency_check) {
3258
KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3259
} else
3260
#endif
3261
{
3262
rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3263
}
3264
if (rc) {
3265
#if USE_ITT_BUILD
3266
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3267
#endif
3268
#if OMPT_SUPPORT && OMPT_OPTIONAL
3269
if (ompt_enabled.ompt_callback_mutex_acquired) {
3270
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3271
ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3272
}
3273
#endif
3274
return FTN_TRUE;
3275
} else {
3276
#if USE_ITT_BUILD
3277
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3278
#endif
3279
return FTN_FALSE;
3280
}
3281
3282
#else // KMP_USE_DYNAMIC_LOCK
3283
3284
kmp_user_lock_p lck;
3285
int rc;
3286
3287
if ((__kmp_user_lock_kind == lk_tas) &&
3288
(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3289
lck = (kmp_user_lock_p)user_lock;
3290
}
3291
#if KMP_USE_FUTEX
3292
else if ((__kmp_user_lock_kind == lk_futex) &&
3293
(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3294
lck = (kmp_user_lock_p)user_lock;
3295
}
3296
#endif
3297
else {
3298
lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3299
}
3300
3301
#if USE_ITT_BUILD
3302
__kmp_itt_lock_acquiring(lck);
3303
#endif /* USE_ITT_BUILD */
3304
#if OMPT_SUPPORT && OMPT_OPTIONAL
3305
// This is the case, if called from omp_init_lock_with_hint:
3306
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3307
if (!codeptr)
3308
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3309
if (ompt_enabled.ompt_callback_mutex_acquire) {
3310
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3311
ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3312
(ompt_wait_id_t)(uintptr_t)lck, codeptr);
3313
}
3314
#endif
3315
3316
rc = TEST_LOCK(lck, gtid);
3317
#if USE_ITT_BUILD
3318
if (rc) {
3319
__kmp_itt_lock_acquired(lck);
3320
} else {
3321
__kmp_itt_lock_cancelled(lck);
3322
}
3323
#endif /* USE_ITT_BUILD */
3324
#if OMPT_SUPPORT && OMPT_OPTIONAL
3325
if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3326
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3327
ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3328
}
3329
#endif
3330
3331
return (rc ? FTN_TRUE : FTN_FALSE);
3332
3333
/* Can't use serial interval since not block structured */
3334
3335
#endif // KMP_USE_DYNAMIC_LOCK
3336
}
3337
3338
/* try to acquire the lock */
3339
int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3340
#if KMP_USE_DYNAMIC_LOCK
3341
int rc;
3342
#if USE_ITT_BUILD
3343
__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3344
#endif
3345
#if OMPT_SUPPORT && OMPT_OPTIONAL
3346
// This is the case, if called from omp_init_lock_with_hint:
3347
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3348
if (!codeptr)
3349
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3350
if (ompt_enabled.ompt_callback_mutex_acquire) {
3351
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3352
ompt_mutex_test_nest_lock, omp_lock_hint_none,
3353
__ompt_get_mutex_impl_type(user_lock),
3354
(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3355
}
3356
#endif
3357
rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3358
#if USE_ITT_BUILD
3359
if (rc) {
3360
__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3361
} else {
3362
__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3363
}
3364
#endif
3365
#if OMPT_SUPPORT && OMPT_OPTIONAL
3366
if (ompt_enabled.enabled && rc) {
3367
if (rc == 1) {
3368
if (ompt_enabled.ompt_callback_mutex_acquired) {
3369
// lock_first
3370
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3371
ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3372
codeptr);
3373
}
3374
} else {
3375
if (ompt_enabled.ompt_callback_nest_lock) {
3376
// lock_next
3377
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3378
ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3379
}
3380
}
3381
}
3382
#endif
3383
return rc;
3384
3385
#else // KMP_USE_DYNAMIC_LOCK
3386
3387
kmp_user_lock_p lck;
3388
int rc;
3389
3390
if ((__kmp_user_lock_kind == lk_tas) &&
3391
(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3392
OMP_NEST_LOCK_T_SIZE)) {
3393
lck = (kmp_user_lock_p)user_lock;
3394
}
3395
#if KMP_USE_FUTEX
3396
else if ((__kmp_user_lock_kind == lk_futex) &&
3397
(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3398
OMP_NEST_LOCK_T_SIZE)) {
3399
lck = (kmp_user_lock_p)user_lock;
3400
}
3401
#endif
3402
else {
3403
lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3404
}
3405
3406
#if USE_ITT_BUILD
3407
__kmp_itt_lock_acquiring(lck);
3408
#endif /* USE_ITT_BUILD */
3409
3410
#if OMPT_SUPPORT && OMPT_OPTIONAL
3411
// This is the case, if called from omp_init_lock_with_hint:
3412
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3413
if (!codeptr)
3414
codeptr = OMPT_GET_RETURN_ADDRESS(0);
3415
if (ompt_enabled.enabled) &&
3416
ompt_enabled.ompt_callback_mutex_acquire) {
3417
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3418
ompt_mutex_test_nest_lock, omp_lock_hint_none,
3419
__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3420
codeptr);
3421
}
3422
#endif
3423
3424
rc = TEST_NESTED_LOCK(lck, gtid);
3425
#if USE_ITT_BUILD
3426
if (rc) {
3427
__kmp_itt_lock_acquired(lck);
3428
} else {
3429
__kmp_itt_lock_cancelled(lck);
3430
}
3431
#endif /* USE_ITT_BUILD */
3432
#if OMPT_SUPPORT && OMPT_OPTIONAL
3433
if (ompt_enabled.enabled && rc) {
3434
if (rc == 1) {
3435
if (ompt_enabled.ompt_callback_mutex_acquired) {
3436
// lock_first
3437
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3438
ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3439
}
3440
} else {
3441
if (ompt_enabled.ompt_callback_nest_lock) {
3442
// lock_next
3443
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3444
ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3445
}
3446
}
3447
}
3448
#endif
3449
return rc;
3450
3451
/* Can't use serial interval since not block structured */
3452
3453
#endif // KMP_USE_DYNAMIC_LOCK
3454
}
3455
3456
// Interface to fast scalable reduce methods routines
3457
3458
// keep the selected method in a thread local structure for cross-function
3459
// usage: will be used in __kmpc_end_reduce* functions;
3460
// another solution: to re-determine the method one more time in
3461
// __kmpc_end_reduce* functions (new prototype required then)
3462
// AT: which solution is better?
3463
#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3464
((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3465
3466
#define __KMP_GET_REDUCTION_METHOD(gtid) \
3467
(__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3468
3469
// description of the packed_reduction_method variable: look at the macros in
3470
// kmp.h
3471
3472
// used in a critical section reduce block
3473
static __forceinline void
3474
__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3475
kmp_critical_name *crit) {
3476
3477
// this lock was visible to a customer and to the threading profile tool as a
3478
// serial overhead span (although it's used for an internal purpose only)
3479
// why was it visible in previous implementation?
3480
// should we keep it visible in new reduce block?
3481
kmp_user_lock_p lck;
3482
3483
#if KMP_USE_DYNAMIC_LOCK
3484
3485
kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3486
// Check if it is initialized.
3487
if (*lk == 0) {
3488
if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3489
KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3490
KMP_GET_D_TAG(__kmp_user_lock_seq));
3491
} else {
3492
__kmp_init_indirect_csptr(crit, loc, global_tid,
3493
KMP_GET_I_TAG(__kmp_user_lock_seq));
3494
}
3495
}
3496
// Branch for accessing the actual lock object and set operation. This
3497
// branching is inevitable since this lock initialization does not follow the
3498
// normal dispatch path (lock table is not used).
3499
if (KMP_EXTRACT_D_TAG(lk) != 0) {
3500
lck = (kmp_user_lock_p)lk;
3501
KMP_DEBUG_ASSERT(lck != NULL);
3502
if (__kmp_env_consistency_check) {
3503
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3504
}
3505
KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3506
} else {
3507
kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3508
lck = ilk->lock;
3509
KMP_DEBUG_ASSERT(lck != NULL);
3510
if (__kmp_env_consistency_check) {
3511
__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3512
}
3513
KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3514
}
3515
3516
#else // KMP_USE_DYNAMIC_LOCK
3517
3518
// We know that the fast reduction code is only emitted by Intel compilers
3519
// with 32 byte critical sections. If there isn't enough space, then we
3520
// have to use a pointer.
3521
if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3522
lck = (kmp_user_lock_p)crit;
3523
} else {
3524
lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3525
}
3526
KMP_DEBUG_ASSERT(lck != NULL);
3527
3528
if (__kmp_env_consistency_check)
3529
__kmp_push_sync(global_tid, ct_critical, loc, lck);
3530
3531
__kmp_acquire_user_lock_with_checks(lck, global_tid);
3532
3533
#endif // KMP_USE_DYNAMIC_LOCK
3534
}
3535
3536
// used in a critical section reduce block
3537
static __forceinline void
3538
__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3539
kmp_critical_name *crit) {
3540
3541
kmp_user_lock_p lck;
3542
3543
#if KMP_USE_DYNAMIC_LOCK
3544
3545
if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3546
lck = (kmp_user_lock_p)crit;
3547
if (__kmp_env_consistency_check)
3548
__kmp_pop_sync(global_tid, ct_critical, loc);
3549
KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3550
} else {
3551
kmp_indirect_lock_t *ilk =
3552
(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3553
if (__kmp_env_consistency_check)
3554
__kmp_pop_sync(global_tid, ct_critical, loc);
3555
KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3556
}
3557
3558
#else // KMP_USE_DYNAMIC_LOCK
3559
3560
// We know that the fast reduction code is only emitted by Intel compilers
3561
// with 32 byte critical sections. If there isn't enough space, then we have
3562
// to use a pointer.
3563
if (__kmp_base_user_lock_size > 32) {
3564
lck = *((kmp_user_lock_p *)crit);
3565
KMP_ASSERT(lck != NULL);
3566
} else {
3567
lck = (kmp_user_lock_p)crit;
3568
}
3569
3570
if (__kmp_env_consistency_check)
3571
__kmp_pop_sync(global_tid, ct_critical, loc);
3572
3573
__kmp_release_user_lock_with_checks(lck, global_tid);
3574
3575
#endif // KMP_USE_DYNAMIC_LOCK
3576
} // __kmp_end_critical_section_reduce_block
3577
3578
static __forceinline int
3579
__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3580
int *task_state) {
3581
kmp_team_t *team;
3582
3583
// Check if we are inside the teams construct?
3584
if (th->th.th_teams_microtask) {
3585
*team_p = team = th->th.th_team;
3586
if (team->t.t_level == th->th.th_teams_level) {
3587
// This is reduction at teams construct.
3588
KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3589
// Let's swap teams temporarily for the reduction.
3590
th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3591
th->th.th_team = team->t.t_parent;
3592
th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3593
th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3594
*task_state = th->th.th_task_state;
3595
th->th.th_task_state = 0;
3596
3597
return 1;
3598
}
3599
}
3600
return 0;
3601
}
3602
3603
static __forceinline void
3604
__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3605
// Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3606
th->th.th_info.ds.ds_tid = 0;
3607
th->th.th_team = team;
3608
th->th.th_team_nproc = team->t.t_nproc;
3609
th->th.th_task_team = team->t.t_task_team[task_state];
3610
__kmp_type_convert(task_state, &(th->th.th_task_state));
3611
}
3612
3613
/* 2.a.i. Reduce Block without a terminating barrier */
3614
/*!
3615
@ingroup SYNCHRONIZATION
3616
@param loc source location information
3617
@param global_tid global thread number
3618
@param num_vars number of items (variables) to be reduced
3619
@param reduce_size size of data in bytes to be reduced
3620
@param reduce_data pointer to data to be reduced
3621
@param reduce_func callback function providing reduction operation on two
3622
operands and returning result of reduction in lhs_data
3623
@param lck pointer to the unique lock data structure
3624
@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3625
threads if atomic reduction needed
3626
3627
The nowait version is used for a reduce clause with the nowait argument.
3628
*/
3629
kmp_int32
3630
__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3631
size_t reduce_size, void *reduce_data,
3632
void (*reduce_func)(void *lhs_data, void *rhs_data),
3633
kmp_critical_name *lck) {
3634
3635
KMP_COUNT_BLOCK(REDUCE_nowait);
3636
int retval = 0;
3637
PACKED_REDUCTION_METHOD_T packed_reduction_method;
3638
kmp_info_t *th;
3639
kmp_team_t *team;
3640
int teams_swapped = 0, task_state;
3641
KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3642
__kmp_assert_valid_gtid(global_tid);
3643
3644
// why do we need this initialization here at all?
3645
// Reduction clause can not be used as a stand-alone directive.
3646
3647
// do not call __kmp_serial_initialize(), it will be called by
3648
// __kmp_parallel_initialize() if needed
3649
// possible detection of false-positive race by the threadchecker ???
3650
if (!TCR_4(__kmp_init_parallel))
3651
__kmp_parallel_initialize();
3652
3653
__kmp_resume_if_soft_paused();
3654
3655
// check correctness of reduce block nesting
3656
#if KMP_USE_DYNAMIC_LOCK
3657
if (__kmp_env_consistency_check)
3658
__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3659
#else
3660
if (__kmp_env_consistency_check)
3661
__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3662
#endif
3663
3664
th = __kmp_thread_from_gtid(global_tid);
3665
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3666
3667
// packed_reduction_method value will be reused by __kmp_end_reduce* function,
3668
// the value should be kept in a variable
3669
// the variable should be either a construct-specific or thread-specific
3670
// property, not a team specific property
3671
// (a thread can reach the next reduce block on the next construct, reduce
3672
// method may differ on the next construct)
3673
// an ident_t "loc" parameter could be used as a construct-specific property
3674
// (what if loc == 0?)
3675
// (if both construct-specific and team-specific variables were shared,
3676
// then unness extra syncs should be needed)
3677
// a thread-specific variable is better regarding two issues above (next
3678
// construct and extra syncs)
3679
// a thread-specific "th_local.reduction_method" variable is used currently
3680
// each thread executes 'determine' and 'set' lines (no need to execute by one
3681
// thread, to avoid unness extra syncs)
3682
3683
packed_reduction_method = __kmp_determine_reduction_method(
3684
loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3685
__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3686
3687
OMPT_REDUCTION_DECL(th, global_tid);
3688
if (packed_reduction_method == critical_reduce_block) {
3689
3690
OMPT_REDUCTION_BEGIN;
3691
3692
__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3693
retval = 1;
3694
3695
} else if (packed_reduction_method == empty_reduce_block) {
3696
3697
OMPT_REDUCTION_BEGIN;
3698
3699
// usage: if team size == 1, no synchronization is required ( Intel
3700
// platforms only )
3701
retval = 1;
3702
3703
} else if (packed_reduction_method == atomic_reduce_block) {
3704
3705
retval = 2;
3706
3707
// all threads should do this pop here (because __kmpc_end_reduce_nowait()
3708
// won't be called by the code gen)
3709
// (it's not quite good, because the checking block has been closed by
3710
// this 'pop',
3711
// but atomic operation has not been executed yet, will be executed
3712
// slightly later, literally on next instruction)
3713
if (__kmp_env_consistency_check)
3714
__kmp_pop_sync(global_tid, ct_reduce, loc);
3715
3716
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3717
tree_reduce_block)) {
3718
3719
// AT: performance issue: a real barrier here
3720
// AT: (if primary thread is slow, other threads are blocked here waiting for
3721
// the primary thread to come and release them)
3722
// AT: (it's not what a customer might expect specifying NOWAIT clause)
3723
// AT: (specifying NOWAIT won't result in improvement of performance, it'll
3724
// be confusing to a customer)
3725
// AT: another implementation of *barrier_gather*nowait() (or some other design)
3726
// might go faster and be more in line with sense of NOWAIT
3727
// AT: TO DO: do epcc test and compare times
3728
3729
// this barrier should be invisible to a customer and to the threading profile
3730
// tool (it's neither a terminating barrier nor customer's code, it's
3731
// used for an internal purpose)
3732
#if OMPT_SUPPORT
3733
// JP: can this barrier potentially leed to task scheduling?
3734
// JP: as long as there is a barrier in the implementation, OMPT should and
3735
// will provide the barrier events
3736
// so we set-up the necessary frame/return addresses.
3737
ompt_frame_t *ompt_frame;
3738
if (ompt_enabled.enabled) {
3739
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3740
if (ompt_frame->enter_frame.ptr == NULL)
3741
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3742
}
3743
OMPT_STORE_RETURN_ADDRESS(global_tid);
3744
#endif
3745
#if USE_ITT_NOTIFY
3746
__kmp_threads[global_tid]->th.th_ident = loc;
3747
#endif
3748
retval =
3749
__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3750
global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3751
retval = (retval != 0) ? (0) : (1);
3752
#if OMPT_SUPPORT && OMPT_OPTIONAL
3753
if (ompt_enabled.enabled) {
3754
ompt_frame->enter_frame = ompt_data_none;
3755
}
3756
#endif
3757
3758
// all other workers except primary thread should do this pop here
3759
// ( none of other workers will get to __kmpc_end_reduce_nowait() )
3760
if (__kmp_env_consistency_check) {
3761
if (retval == 0) {
3762
__kmp_pop_sync(global_tid, ct_reduce, loc);
3763
}
3764
}
3765
3766
} else {
3767
3768
// should never reach this block
3769
KMP_ASSERT(0); // "unexpected method"
3770
}
3771
if (teams_swapped) {
3772
__kmp_restore_swapped_teams(th, team, task_state);
3773
}
3774
KA_TRACE(
3775
10,
3776
("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3777
global_tid, packed_reduction_method, retval));
3778
3779
return retval;
3780
}
3781
3782
/*!
3783
@ingroup SYNCHRONIZATION
3784
@param loc source location information
3785
@param global_tid global thread id.
3786
@param lck pointer to the unique lock data structure
3787
3788
Finish the execution of a reduce nowait.
3789
*/
3790
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3791
kmp_critical_name *lck) {
3792
3793
PACKED_REDUCTION_METHOD_T packed_reduction_method;
3794
3795
KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3796
__kmp_assert_valid_gtid(global_tid);
3797
3798
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3799
3800
OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3801
3802
if (packed_reduction_method == critical_reduce_block) {
3803
3804
__kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3805
OMPT_REDUCTION_END;
3806
3807
} else if (packed_reduction_method == empty_reduce_block) {
3808
3809
// usage: if team size == 1, no synchronization is required ( on Intel
3810
// platforms only )
3811
3812
OMPT_REDUCTION_END;
3813
3814
} else if (packed_reduction_method == atomic_reduce_block) {
3815
3816
// neither primary thread nor other workers should get here
3817
// (code gen does not generate this call in case 2: atomic reduce block)
3818
// actually it's better to remove this elseif at all;
3819
// after removal this value will checked by the 'else' and will assert
3820
3821
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3822
tree_reduce_block)) {
3823
3824
// only primary thread gets here
3825
// OMPT: tree reduction is annotated in the barrier code
3826
3827
} else {
3828
3829
// should never reach this block
3830
KMP_ASSERT(0); // "unexpected method"
3831
}
3832
3833
if (__kmp_env_consistency_check)
3834
__kmp_pop_sync(global_tid, ct_reduce, loc);
3835
3836
KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3837
global_tid, packed_reduction_method));
3838
3839
return;
3840
}
3841
3842
/* 2.a.ii. Reduce Block with a terminating barrier */
3843
3844
/*!
3845
@ingroup SYNCHRONIZATION
3846
@param loc source location information
3847
@param global_tid global thread number
3848
@param num_vars number of items (variables) to be reduced
3849
@param reduce_size size of data in bytes to be reduced
3850
@param reduce_data pointer to data to be reduced
3851
@param reduce_func callback function providing reduction operation on two
3852
operands and returning result of reduction in lhs_data
3853
@param lck pointer to the unique lock data structure
3854
@result 1 for the primary thread, 0 for all other team threads, 2 for all team
3855
threads if atomic reduction needed
3856
3857
A blocking reduce that includes an implicit barrier.
3858
*/
3859
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3860
size_t reduce_size, void *reduce_data,
3861
void (*reduce_func)(void *lhs_data, void *rhs_data),
3862
kmp_critical_name *lck) {
3863
KMP_COUNT_BLOCK(REDUCE_wait);
3864
int retval = 0;
3865
PACKED_REDUCTION_METHOD_T packed_reduction_method;
3866
kmp_info_t *th;
3867
kmp_team_t *team;
3868
int teams_swapped = 0, task_state;
3869
3870
KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3871
__kmp_assert_valid_gtid(global_tid);
3872
3873
// why do we need this initialization here at all?
3874
// Reduction clause can not be a stand-alone directive.
3875
3876
// do not call __kmp_serial_initialize(), it will be called by
3877
// __kmp_parallel_initialize() if needed
3878
// possible detection of false-positive race by the threadchecker ???
3879
if (!TCR_4(__kmp_init_parallel))
3880
__kmp_parallel_initialize();
3881
3882
__kmp_resume_if_soft_paused();
3883
3884
// check correctness of reduce block nesting
3885
#if KMP_USE_DYNAMIC_LOCK
3886
if (__kmp_env_consistency_check)
3887
__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3888
#else
3889
if (__kmp_env_consistency_check)
3890
__kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3891
#endif
3892
3893
th = __kmp_thread_from_gtid(global_tid);
3894
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3895
3896
packed_reduction_method = __kmp_determine_reduction_method(
3897
loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3898
__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3899
3900
OMPT_REDUCTION_DECL(th, global_tid);
3901
3902
if (packed_reduction_method == critical_reduce_block) {
3903
3904
OMPT_REDUCTION_BEGIN;
3905
__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3906
retval = 1;
3907
3908
} else if (packed_reduction_method == empty_reduce_block) {
3909
3910
OMPT_REDUCTION_BEGIN;
3911
// usage: if team size == 1, no synchronization is required ( Intel
3912
// platforms only )
3913
retval = 1;
3914
3915
} else if (packed_reduction_method == atomic_reduce_block) {
3916
3917
retval = 2;
3918
3919
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3920
tree_reduce_block)) {
3921
3922
// case tree_reduce_block:
3923
// this barrier should be visible to a customer and to the threading profile
3924
// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3925
#if OMPT_SUPPORT
3926
ompt_frame_t *ompt_frame;
3927
if (ompt_enabled.enabled) {
3928
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3929
if (ompt_frame->enter_frame.ptr == NULL)
3930
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3931
}
3932
OMPT_STORE_RETURN_ADDRESS(global_tid);
3933
#endif
3934
#if USE_ITT_NOTIFY
3935
__kmp_threads[global_tid]->th.th_ident =
3936
loc; // needed for correct notification of frames
3937
#endif
3938
retval =
3939
__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3940
global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3941
retval = (retval != 0) ? (0) : (1);
3942
#if OMPT_SUPPORT && OMPT_OPTIONAL
3943
if (ompt_enabled.enabled) {
3944
ompt_frame->enter_frame = ompt_data_none;
3945
}
3946
#endif
3947
3948
// all other workers except primary thread should do this pop here
3949
// (none of other workers except primary will enter __kmpc_end_reduce())
3950
if (__kmp_env_consistency_check) {
3951
if (retval == 0) { // 0: all other workers; 1: primary thread
3952
__kmp_pop_sync(global_tid, ct_reduce, loc);
3953
}
3954
}
3955
3956
} else {
3957
3958
// should never reach this block
3959
KMP_ASSERT(0); // "unexpected method"
3960
}
3961
if (teams_swapped) {
3962
__kmp_restore_swapped_teams(th, team, task_state);
3963
}
3964
3965
KA_TRACE(10,
3966
("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3967
global_tid, packed_reduction_method, retval));
3968
return retval;
3969
}
3970
3971
/*!
3972
@ingroup SYNCHRONIZATION
3973
@param loc source location information
3974
@param global_tid global thread id.
3975
@param lck pointer to the unique lock data structure
3976
3977
Finish the execution of a blocking reduce.
3978
The <tt>lck</tt> pointer must be the same as that used in the corresponding
3979
start function.
3980
*/
3981
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3982
kmp_critical_name *lck) {
3983
3984
PACKED_REDUCTION_METHOD_T packed_reduction_method;
3985
kmp_info_t *th;
3986
kmp_team_t *team;
3987
int teams_swapped = 0, task_state;
3988
3989
KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3990
__kmp_assert_valid_gtid(global_tid);
3991
3992
th = __kmp_thread_from_gtid(global_tid);
3993
teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3994
3995
packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3996
3997
// this barrier should be visible to a customer and to the threading profile
3998
// tool (it's a terminating barrier on constructs if NOWAIT not specified)
3999
OMPT_REDUCTION_DECL(th, global_tid);
4000
4001
if (packed_reduction_method == critical_reduce_block) {
4002
__kmp_end_critical_section_reduce_block(loc, global_tid, lck);
4003
4004
OMPT_REDUCTION_END;
4005
4006
// TODO: implicit barrier: should be exposed
4007
#if OMPT_SUPPORT
4008
ompt_frame_t *ompt_frame;
4009
if (ompt_enabled.enabled) {
4010
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4011
if (ompt_frame->enter_frame.ptr == NULL)
4012
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4013
}
4014
OMPT_STORE_RETURN_ADDRESS(global_tid);
4015
#endif
4016
#if USE_ITT_NOTIFY
4017
__kmp_threads[global_tid]->th.th_ident = loc;
4018
#endif
4019
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4020
#if OMPT_SUPPORT && OMPT_OPTIONAL
4021
if (ompt_enabled.enabled) {
4022
ompt_frame->enter_frame = ompt_data_none;
4023
}
4024
#endif
4025
4026
} else if (packed_reduction_method == empty_reduce_block) {
4027
4028
OMPT_REDUCTION_END;
4029
4030
// usage: if team size==1, no synchronization is required (Intel platforms only)
4031
4032
// TODO: implicit barrier: should be exposed
4033
#if OMPT_SUPPORT
4034
ompt_frame_t *ompt_frame;
4035
if (ompt_enabled.enabled) {
4036
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4037
if (ompt_frame->enter_frame.ptr == NULL)
4038
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4039
}
4040
OMPT_STORE_RETURN_ADDRESS(global_tid);
4041
#endif
4042
#if USE_ITT_NOTIFY
4043
__kmp_threads[global_tid]->th.th_ident = loc;
4044
#endif
4045
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4046
#if OMPT_SUPPORT && OMPT_OPTIONAL
4047
if (ompt_enabled.enabled) {
4048
ompt_frame->enter_frame = ompt_data_none;
4049
}
4050
#endif
4051
4052
} else if (packed_reduction_method == atomic_reduce_block) {
4053
4054
#if OMPT_SUPPORT
4055
ompt_frame_t *ompt_frame;
4056
if (ompt_enabled.enabled) {
4057
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
4058
if (ompt_frame->enter_frame.ptr == NULL)
4059
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
4060
}
4061
OMPT_STORE_RETURN_ADDRESS(global_tid);
4062
#endif
4063
// TODO: implicit barrier: should be exposed
4064
#if USE_ITT_NOTIFY
4065
__kmp_threads[global_tid]->th.th_ident = loc;
4066
#endif
4067
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
4068
#if OMPT_SUPPORT && OMPT_OPTIONAL
4069
if (ompt_enabled.enabled) {
4070
ompt_frame->enter_frame = ompt_data_none;
4071
}
4072
#endif
4073
4074
} else if (TEST_REDUCTION_METHOD(packed_reduction_method,
4075
tree_reduce_block)) {
4076
4077
// only primary thread executes here (primary releases all other workers)
4078
__kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
4079
global_tid);
4080
4081
} else {
4082
4083
// should never reach this block
4084
KMP_ASSERT(0); // "unexpected method"
4085
}
4086
if (teams_swapped) {
4087
__kmp_restore_swapped_teams(th, team, task_state);
4088
}
4089
4090
if (__kmp_env_consistency_check)
4091
__kmp_pop_sync(global_tid, ct_reduce, loc);
4092
4093
KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4094
global_tid, packed_reduction_method));
4095
4096
return;
4097
}
4098
4099
#undef __KMP_GET_REDUCTION_METHOD
4100
#undef __KMP_SET_REDUCTION_METHOD
4101
4102
/* end of interface to fast scalable reduce routines */
4103
4104
kmp_uint64 __kmpc_get_taskid() {
4105
4106
kmp_int32 gtid;
4107
kmp_info_t *thread;
4108
4109
gtid = __kmp_get_gtid();
4110
if (gtid < 0) {
4111
return 0;
4112
}
4113
thread = __kmp_thread_from_gtid(gtid);
4114
return thread->th.th_current_task->td_task_id;
4115
4116
} // __kmpc_get_taskid
4117
4118
kmp_uint64 __kmpc_get_parent_taskid() {
4119
4120
kmp_int32 gtid;
4121
kmp_info_t *thread;
4122
kmp_taskdata_t *parent_task;
4123
4124
gtid = __kmp_get_gtid();
4125
if (gtid < 0) {
4126
return 0;
4127
}
4128
thread = __kmp_thread_from_gtid(gtid);
4129
parent_task = thread->th.th_current_task->td_parent;
4130
return (parent_task == NULL ? 0 : parent_task->td_task_id);
4131
4132
} // __kmpc_get_parent_taskid
4133
4134
/*!
4135
@ingroup WORK_SHARING
4136
@param loc source location information.
4137
@param gtid global thread number.
4138
@param num_dims number of associated doacross loops.
4139
@param dims info on loops bounds.
4140
4141
Initialize doacross loop information.
4142
Expect compiler send us inclusive bounds,
4143
e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.
4144
*/
4145
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
4146
const struct kmp_dim *dims) {
4147
__kmp_assert_valid_gtid(gtid);
4148
int j, idx;
4149
kmp_int64 last, trace_count;
4150
kmp_info_t *th = __kmp_threads[gtid];
4151
kmp_team_t *team = th->th.th_team;
4152
kmp_uint32 *flags;
4153
kmp_disp_t *pr_buf = th->th.th_dispatch;
4154
dispatch_shared_info_t *sh_buf;
4155
4156
KA_TRACE(
4157
20,
4158
("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4159
gtid, num_dims, !team->t.t_serialized));
4160
KMP_DEBUG_ASSERT(dims != NULL);
4161
KMP_DEBUG_ASSERT(num_dims > 0);
4162
4163
if (team->t.t_serialized) {
4164
KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
4165
return; // no dependencies if team is serialized
4166
}
4167
KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4168
idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
4169
// the next loop
4170
sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4171
4172
// Save bounds info into allocated private buffer
4173
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4174
pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4175
th, sizeof(kmp_int64) * (4 * num_dims + 1));
4176
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4177
pr_buf->th_doacross_info[0] =
4178
(kmp_int64)num_dims; // first element is number of dimensions
4179
// Save also address of num_done in order to access it later without knowing
4180
// the buffer index
4181
pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4182
pr_buf->th_doacross_info[2] = dims[0].lo;
4183
pr_buf->th_doacross_info[3] = dims[0].up;
4184
pr_buf->th_doacross_info[4] = dims[0].st;
4185
last = 5;
4186
for (j = 1; j < num_dims; ++j) {
4187
kmp_int64
4188
range_length; // To keep ranges of all dimensions but the first dims[0]
4189
if (dims[j].st == 1) { // most common case
4190
// AC: should we care of ranges bigger than LLONG_MAX? (not for now)
4191
range_length = dims[j].up - dims[j].lo + 1;
4192
} else {
4193
if (dims[j].st > 0) {
4194
KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4195
range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4196
} else { // negative increment
4197
KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4198
range_length =
4199
(kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4200
}
4201
}
4202
pr_buf->th_doacross_info[last++] = range_length;
4203
pr_buf->th_doacross_info[last++] = dims[j].lo;
4204
pr_buf->th_doacross_info[last++] = dims[j].up;
4205
pr_buf->th_doacross_info[last++] = dims[j].st;
4206
}
4207
4208
// Compute total trip count.
4209
// Start with range of dims[0] which we don't need to keep in the buffer.
4210
if (dims[0].st == 1) { // most common case
4211
trace_count = dims[0].up - dims[0].lo + 1;
4212
} else if (dims[0].st > 0) {
4213
KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4214
trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4215
} else { // negative increment
4216
KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4217
trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4218
}
4219
for (j = 1; j < num_dims; ++j) {
4220
trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
4221
}
4222
KMP_DEBUG_ASSERT(trace_count > 0);
4223
4224
// Check if shared buffer is not occupied by other loop (idx -
4225
// __kmp_dispatch_num_buffers)
4226
if (idx != sh_buf->doacross_buf_idx) {
4227
// Shared buffer is occupied, wait for it to be free
4228
__kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4229
__kmp_eq_4, NULL);
4230
}
4231
#if KMP_32_BIT_ARCH
4232
// Check if we are the first thread. After the CAS the first thread gets 0,
4233
// others get 1 if initialization is in progress, allocated pointer otherwise.
4234
// Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
4235
flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4236
(volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4237
#else
4238
flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4239
(volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4240
#endif
4241
if (flags == NULL) {
4242
// we are the first thread, allocate the array of flags
4243
size_t size =
4244
(size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration
4245
flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4246
KMP_MB();
4247
sh_buf->doacross_flags = flags;
4248
} else if (flags == (kmp_uint32 *)1) {
4249
#if KMP_32_BIT_ARCH
4250
// initialization is still in progress, need to wait
4251
while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4252
#else
4253
while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4254
#endif
4255
KMP_YIELD(TRUE);
4256
KMP_MB();
4257
} else {
4258
KMP_MB();
4259
}
4260
KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
4261
pr_buf->th_doacross_flags =
4262
sh_buf->doacross_flags; // save private copy in order to not
4263
// touch shared buffer on each iteration
4264
KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4265
}
4266
4267
void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4268
__kmp_assert_valid_gtid(gtid);
4269
kmp_int64 shft;
4270
size_t num_dims, i;
4271
kmp_uint32 flag;
4272
kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4273
kmp_info_t *th = __kmp_threads[gtid];
4274
kmp_team_t *team = th->th.th_team;
4275
kmp_disp_t *pr_buf;
4276
kmp_int64 lo, up, st;
4277
4278
KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4279
if (team->t.t_serialized) {
4280
KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4281
return; // no dependencies if team is serialized
4282
}
4283
4284
// calculate sequential iteration number and check out-of-bounds condition
4285
pr_buf = th->th.th_dispatch;
4286
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4287
num_dims = (size_t)pr_buf->th_doacross_info[0];
4288
lo = pr_buf->th_doacross_info[2];
4289
up = pr_buf->th_doacross_info[3];
4290
st = pr_buf->th_doacross_info[4];
4291
#if OMPT_SUPPORT && OMPT_OPTIONAL
4292
SimpleVLA<ompt_dependence_t> deps(num_dims);
4293
#endif
4294
if (st == 1) { // most common case
4295
if (vec[0] < lo || vec[0] > up) {
4296
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4297
"bounds [%lld,%lld]\n",
4298
gtid, vec[0], lo, up));
4299
return;
4300
}
4301
iter_number = vec[0] - lo;
4302
} else if (st > 0) {
4303
if (vec[0] < lo || vec[0] > up) {
4304
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4305
"bounds [%lld,%lld]\n",
4306
gtid, vec[0], lo, up));
4307
return;
4308
}
4309
iter_number = (kmp_uint64)(vec[0] - lo) / st;
4310
} else { // negative increment
4311
if (vec[0] > lo || vec[0] < up) {
4312
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4313
"bounds [%lld,%lld]\n",
4314
gtid, vec[0], lo, up));
4315
return;
4316
}
4317
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4318
}
4319
#if OMPT_SUPPORT && OMPT_OPTIONAL
4320
deps[0].variable.value = iter_number;
4321
deps[0].dependence_type = ompt_dependence_type_sink;
4322
#endif
4323
for (i = 1; i < num_dims; ++i) {
4324
kmp_int64 iter, ln;
4325
size_t j = i * 4;
4326
ln = pr_buf->th_doacross_info[j + 1];
4327
lo = pr_buf->th_doacross_info[j + 2];
4328
up = pr_buf->th_doacross_info[j + 3];
4329
st = pr_buf->th_doacross_info[j + 4];
4330
if (st == 1) {
4331
if (vec[i] < lo || vec[i] > up) {
4332
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4333
"bounds [%lld,%lld]\n",
4334
gtid, vec[i], lo, up));
4335
return;
4336
}
4337
iter = vec[i] - lo;
4338
} else if (st > 0) {
4339
if (vec[i] < lo || vec[i] > up) {
4340
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4341
"bounds [%lld,%lld]\n",
4342
gtid, vec[i], lo, up));
4343
return;
4344
}
4345
iter = (kmp_uint64)(vec[i] - lo) / st;
4346
} else { // st < 0
4347
if (vec[i] > lo || vec[i] < up) {
4348
KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4349
"bounds [%lld,%lld]\n",
4350
gtid, vec[i], lo, up));
4351
return;
4352
}
4353
iter = (kmp_uint64)(lo - vec[i]) / (-st);
4354
}
4355
iter_number = iter + ln * iter_number;
4356
#if OMPT_SUPPORT && OMPT_OPTIONAL
4357
deps[i].variable.value = iter;
4358
deps[i].dependence_type = ompt_dependence_type_sink;
4359
#endif
4360
}
4361
shft = iter_number % 32; // use 32-bit granularity
4362
iter_number >>= 5; // divided by 32
4363
flag = 1 << shft;
4364
while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4365
KMP_YIELD(TRUE);
4366
}
4367
KMP_MB();
4368
#if OMPT_SUPPORT && OMPT_OPTIONAL
4369
if (ompt_enabled.ompt_callback_dependences) {
4370
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4371
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4372
}
4373
#endif
4374
KA_TRACE(20,
4375
("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4376
gtid, (iter_number << 5) + shft));
4377
}
4378
4379
void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4380
__kmp_assert_valid_gtid(gtid);
4381
kmp_int64 shft;
4382
size_t num_dims, i;
4383
kmp_uint32 flag;
4384
kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4385
kmp_info_t *th = __kmp_threads[gtid];
4386
kmp_team_t *team = th->th.th_team;
4387
kmp_disp_t *pr_buf;
4388
kmp_int64 lo, st;
4389
4390
KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4391
if (team->t.t_serialized) {
4392
KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4393
return; // no dependencies if team is serialized
4394
}
4395
4396
// calculate sequential iteration number (same as in "wait" but no
4397
// out-of-bounds checks)
4398
pr_buf = th->th.th_dispatch;
4399
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4400
num_dims = (size_t)pr_buf->th_doacross_info[0];
4401
lo = pr_buf->th_doacross_info[2];
4402
st = pr_buf->th_doacross_info[4];
4403
#if OMPT_SUPPORT && OMPT_OPTIONAL
4404
SimpleVLA<ompt_dependence_t> deps(num_dims);
4405
#endif
4406
if (st == 1) { // most common case
4407
iter_number = vec[0] - lo;
4408
} else if (st > 0) {
4409
iter_number = (kmp_uint64)(vec[0] - lo) / st;
4410
} else { // negative increment
4411
iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4412
}
4413
#if OMPT_SUPPORT && OMPT_OPTIONAL
4414
deps[0].variable.value = iter_number;
4415
deps[0].dependence_type = ompt_dependence_type_source;
4416
#endif
4417
for (i = 1; i < num_dims; ++i) {
4418
kmp_int64 iter, ln;
4419
size_t j = i * 4;
4420
ln = pr_buf->th_doacross_info[j + 1];
4421
lo = pr_buf->th_doacross_info[j + 2];
4422
st = pr_buf->th_doacross_info[j + 4];
4423
if (st == 1) {
4424
iter = vec[i] - lo;
4425
} else if (st > 0) {
4426
iter = (kmp_uint64)(vec[i] - lo) / st;
4427
} else { // st < 0
4428
iter = (kmp_uint64)(lo - vec[i]) / (-st);
4429
}
4430
iter_number = iter + ln * iter_number;
4431
#if OMPT_SUPPORT && OMPT_OPTIONAL
4432
deps[i].variable.value = iter;
4433
deps[i].dependence_type = ompt_dependence_type_source;
4434
#endif
4435
}
4436
#if OMPT_SUPPORT && OMPT_OPTIONAL
4437
if (ompt_enabled.ompt_callback_dependences) {
4438
ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4439
&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4440
}
4441
#endif
4442
shft = iter_number % 32; // use 32-bit granularity
4443
iter_number >>= 5; // divided by 32
4444
flag = 1 << shft;
4445
KMP_MB();
4446
if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4447
KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4448
KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4449
(iter_number << 5) + shft));
4450
}
4451
4452
void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4453
__kmp_assert_valid_gtid(gtid);
4454
kmp_int32 num_done;
4455
kmp_info_t *th = __kmp_threads[gtid];
4456
kmp_team_t *team = th->th.th_team;
4457
kmp_disp_t *pr_buf = th->th.th_dispatch;
4458
4459
KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4460
if (team->t.t_serialized) {
4461
KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4462
return; // nothing to do
4463
}
4464
num_done =
4465
KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4466
if (num_done == th->th.th_team_nproc) {
4467
// we are the last thread, need to free shared resources
4468
int idx = pr_buf->th_doacross_buf_idx - 1;
4469
dispatch_shared_info_t *sh_buf =
4470
&team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4471
KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4472
(kmp_int64)&sh_buf->doacross_num_done);
4473
KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4474
KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4475
__kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4476
sh_buf->doacross_flags = NULL;
4477
sh_buf->doacross_num_done = 0;
4478
sh_buf->doacross_buf_idx +=
4479
__kmp_dispatch_num_buffers; // free buffer for future re-use
4480
}
4481
// free private resources (need to keep buffer index forever)
4482
pr_buf->th_doacross_flags = NULL;
4483
__kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4484
pr_buf->th_doacross_info = NULL;
4485
KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4486
}
4487
4488
/* OpenMP 5.1 Memory Management routines */
4489
void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4490
return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4491
}
4492
4493
void *omp_aligned_alloc(size_t align, size_t size,
4494
omp_allocator_handle_t allocator) {
4495
return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4496
}
4497
4498
void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {
4499
return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4500
}
4501
4502
void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,
4503
omp_allocator_handle_t allocator) {
4504
return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4505
}
4506
4507
void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,
4508
omp_allocator_handle_t free_allocator) {
4509
return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4510
free_allocator);
4511
}
4512
4513
void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4514
___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4515
}
4516
/* end of OpenMP 5.1 Memory Management routines */
4517
4518
int __kmpc_get_target_offload(void) {
4519
if (!__kmp_init_serial) {
4520
__kmp_serial_initialize();
4521
}
4522
return __kmp_target_offload;
4523
}
4524
4525
int __kmpc_pause_resource(kmp_pause_status_t level) {
4526
if (!__kmp_init_serial) {
4527
return 1; // Can't pause if runtime is not initialized
4528
}
4529
return __kmp_pause_resource(level);
4530
}
4531
4532
void __kmpc_error(ident_t *loc, int severity, const char *message) {
4533
if (!__kmp_init_serial)
4534
__kmp_serial_initialize();
4535
4536
KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4537
4538
#if OMPT_SUPPORT
4539
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4540
ompt_callbacks.ompt_callback(ompt_callback_error)(
4541
(ompt_severity_t)severity, message, KMP_STRLEN(message),
4542
OMPT_GET_RETURN_ADDRESS(0));
4543
}
4544
#endif // OMPT_SUPPORT
4545
4546
char *src_loc;
4547
if (loc && loc->psource) {
4548
kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);
4549
src_loc =
4550
__kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4551
__kmp_str_loc_free(&str_loc);
4552
} else {
4553
src_loc = __kmp_str_format("unknown");
4554
}
4555
4556
if (severity == severity_warning)
4557
KMP_WARNING(UserDirectedWarning, src_loc, message);
4558
else
4559
KMP_FATAL(UserDirectedError, src_loc, message);
4560
4561
__kmp_str_free(&src_loc);
4562
}
4563
4564
// Mark begin of scope directive.
4565
void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4566
// reserved is for extension of scope directive and not used.
4567
#if OMPT_SUPPORT && OMPT_OPTIONAL
4568
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4569
kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4570
int tid = __kmp_tid_from_gtid(gtid);
4571
ompt_callbacks.ompt_callback(ompt_callback_work)(
4572
ompt_work_scope, ompt_scope_begin,
4573
&(team->t.ompt_team_info.parallel_data),
4574
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4575
OMPT_GET_RETURN_ADDRESS(0));
4576
}
4577
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4578
}
4579
4580
// Mark end of scope directive
4581
void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {
4582
// reserved is for extension of scope directive and not used.
4583
#if OMPT_SUPPORT && OMPT_OPTIONAL
4584
if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4585
kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4586
int tid = __kmp_tid_from_gtid(gtid);
4587
ompt_callbacks.ompt_callback(ompt_callback_work)(
4588
ompt_work_scope, ompt_scope_end,
4589
&(team->t.ompt_team_info.parallel_data),
4590
&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4591
OMPT_GET_RETURN_ADDRESS(0));
4592
}
4593
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
4594
}
4595
4596
#ifdef KMP_USE_VERSION_SYMBOLS
4597
// For GOMP compatibility there are two versions of each omp_* API.
4598
// One is the plain C symbol and one is the Fortran symbol with an appended
4599
// underscore. When we implement a specific ompc_* version of an omp_*
4600
// function, we want the plain GOMP versioned symbol to alias the ompc_* version
4601
// instead of the Fortran versions in kmp_ftn_entry.h
4602
extern "C" {
4603
// Have to undef these from omp.h so they aren't translated into
4604
// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below
4605
#ifdef omp_set_affinity_format
4606
#undef omp_set_affinity_format
4607
#endif
4608
#ifdef omp_get_affinity_format
4609
#undef omp_get_affinity_format
4610
#endif
4611
#ifdef omp_display_affinity
4612
#undef omp_display_affinity
4613
#endif
4614
#ifdef omp_capture_affinity
4615
#undef omp_capture_affinity
4616
#endif
4617
KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4618
"OMP_5.0");
4619
KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4620
"OMP_5.0");
4621
KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4622
"OMP_5.0");
4623
KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
4624
"OMP_5.0");
4625
} // extern "C"
4626
#endif
4627
4628