Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_csupport.cpp
35258 views
/*1* kmp_csupport.cpp -- kfront linkage support for OpenMP.2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#define __KMP_IMP13#include "omp.h" /* extern "C" declarations of user-visible routines */14#include "kmp.h"15#include "kmp_error.h"16#include "kmp_i18n.h"17#include "kmp_itt.h"18#include "kmp_lock.h"19#include "kmp_stats.h"20#include "kmp_utils.h"21#include "ompt-specific.h"2223#define MAX_MESSAGE 5122425// flags will be used in future, e.g. to implement openmp_strict library26// restrictions2728/*!29* @ingroup STARTUP_SHUTDOWN30* @param loc in source location information31* @param flags in for future use (currently ignored)32*33* Initialize the runtime library. This call is optional; if it is not made then34* it will be implicitly called by attempts to use other library functions.35*/36void __kmpc_begin(ident_t *loc, kmp_int32 flags) {37// By default __kmpc_begin() is no-op.38char *env;39if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&40__kmp_str_match_true(env)) {41__kmp_middle_initialize();42__kmp_assign_root_init_mask();43KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));44} else if (__kmp_ignore_mppbeg() == FALSE) {45// By default __kmp_ignore_mppbeg() returns TRUE.46__kmp_internal_begin();47KC_TRACE(10, ("__kmpc_begin: called\n"));48}49}5051/*!52* @ingroup STARTUP_SHUTDOWN53* @param loc source location information54*55* Shutdown the runtime library. This is also optional, and even if called will56* not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to57* zero.58*/59void __kmpc_end(ident_t *loc) {60// By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()61// call no-op. However, this can be overridden with KMP_IGNORE_MPPEND62// environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()63// returns FALSE and __kmpc_end() will unregister this root (it can cause64// library shut down).65if (__kmp_ignore_mppend() == FALSE) {66KC_TRACE(10, ("__kmpc_end: called\n"));67KA_TRACE(30, ("__kmpc_end\n"));6869__kmp_internal_end_thread(-1);70}71#if KMP_OS_WINDOWS && OMPT_SUPPORT72// Normal exit process on Windows does not allow worker threads of the final73// parallel region to finish reporting their events, so shutting down the74// library here fixes the issue at least for the cases where __kmpc_end() is75// placed properly.76if (ompt_enabled.enabled)77__kmp_internal_end_library(__kmp_gtid_get_specific());78#endif79}8081/*!82@ingroup THREAD_STATES83@param loc Source location information.84@return The global thread index of the active thread.8586This function can be called in any context.8788If the runtime has ony been entered at the outermost level from a89single (necessarily non-OpenMP<sup>*</sup>) thread, then the thread number is90that which would be returned by omp_get_thread_num() in the outermost91active parallel construct. (Or zero if there is no active parallel92construct, since the primary thread is necessarily thread zero).9394If multiple non-OpenMP threads all enter an OpenMP construct then this95will be a unique thread identifier among all the threads created by96the OpenMP runtime (but the value cannot be defined in terms of97OpenMP thread ids returned by omp_get_thread_num()).98*/99kmp_int32 __kmpc_global_thread_num(ident_t *loc) {100kmp_int32 gtid = __kmp_entry_gtid();101102KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));103104return gtid;105}106107/*!108@ingroup THREAD_STATES109@param loc Source location information.110@return The number of threads under control of the OpenMP<sup>*</sup> runtime111112This function can be called in any context.113It returns the total number of threads under the control of the OpenMP runtime.114That is not a number that can be determined by any OpenMP standard calls, since115the library may be called from more than one non-OpenMP thread, and this116reflects the total over all such calls. Similarly the runtime maintains117underlying threads even when they are not active (since the cost of creating118and destroying OS threads is high), this call counts all such threads even if119they are not waiting for work.120*/121kmp_int32 __kmpc_global_num_threads(ident_t *loc) {122KC_TRACE(10,123("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));124125return TCR_4(__kmp_all_nth);126}127128/*!129@ingroup THREAD_STATES130@param loc Source location information.131@return The thread number of the calling thread in the innermost active parallel132construct.133*/134kmp_int32 __kmpc_bound_thread_num(ident_t *loc) {135KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));136return __kmp_tid_from_gtid(__kmp_entry_gtid());137}138139/*!140@ingroup THREAD_STATES141@param loc Source location information.142@return The number of threads in the innermost active parallel construct.143*/144kmp_int32 __kmpc_bound_num_threads(ident_t *loc) {145KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));146147return __kmp_entry_thread()->th.th_team->t.t_nproc;148}149150/*!151* @ingroup DEPRECATED152* @param loc location description153*154* This function need not be called. It always returns TRUE.155*/156kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {157#ifndef KMP_DEBUG158159return TRUE;160161#else162163const char *semi2;164const char *semi3;165int line_no;166167if (__kmp_par_range == 0) {168return TRUE;169}170semi2 = loc->psource;171if (semi2 == NULL) {172return TRUE;173}174semi2 = strchr(semi2, ';');175if (semi2 == NULL) {176return TRUE;177}178semi2 = strchr(semi2 + 1, ';');179if (semi2 == NULL) {180return TRUE;181}182if (__kmp_par_range_filename[0]) {183const char *name = semi2 - 1;184while ((name > loc->psource) && (*name != '/') && (*name != ';')) {185name--;186}187if ((*name == '/') || (*name == ';')) {188name++;189}190if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {191return __kmp_par_range < 0;192}193}194semi3 = strchr(semi2 + 1, ';');195if (__kmp_par_range_routine[0]) {196if ((semi3 != NULL) && (semi3 > semi2) &&197(strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {198return __kmp_par_range < 0;199}200}201if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {202if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {203return __kmp_par_range > 0;204}205return __kmp_par_range < 0;206}207return TRUE;208209#endif /* KMP_DEBUG */210}211212/*!213@ingroup THREAD_STATES214@param loc Source location information.215@return 1 if this thread is executing inside an active parallel region, zero if216not.217*/218kmp_int32 __kmpc_in_parallel(ident_t *loc) {219return __kmp_entry_thread()->th.th_root->r.r_active;220}221222/*!223@ingroup PARALLEL224@param loc source location information225@param global_tid global thread number226@param num_threads number of threads requested for this parallel construct227228Set the number of threads to be used by the next fork spawned by this thread.229This call is only required if the parallel construct has a `num_threads` clause.230*/231void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,232kmp_int32 num_threads) {233KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",234global_tid, num_threads));235__kmp_assert_valid_gtid(global_tid);236__kmp_push_num_threads(loc, global_tid, num_threads);237}238239void __kmpc_push_num_threads_strict(ident_t *loc, kmp_int32 global_tid,240kmp_int32 num_threads, int severity,241const char *message) {242__kmp_push_num_threads(loc, global_tid, num_threads);243__kmp_set_strict_num_threads(loc, global_tid, severity, message);244}245246/*!247@ingroup PARALLEL248@param loc source location information249@param global_tid global thread number250@param list_length number of entries in the num_threads_list array251@param num_threads_list array of numbers of threads requested for this parallel252construct and subsequent nested parallel constructs253254Set the number of threads to be used by the next fork spawned by this thread,255and some nested forks as well.256This call is only required if the parallel construct has a `num_threads` clause257that has a list of integers as the argument.258*/259void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,260kmp_uint32 list_length,261kmp_int32 *num_threads_list) {262KA_TRACE(20, ("__kmpc_push_num_threads_list: enter T#%d num_threads_list=",263global_tid));264KA_TRACE(20, ("%d", num_threads_list[0]));265#ifdef KMP_DEBUG266for (kmp_uint32 i = 1; i < list_length; ++i)267KA_TRACE(20, (", %d", num_threads_list[i]));268#endif269KA_TRACE(20, ("/n"));270271__kmp_assert_valid_gtid(global_tid);272__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);273}274275void __kmpc_push_num_threads_list_strict(ident_t *loc, kmp_int32 global_tid,276kmp_uint32 list_length,277kmp_int32 *num_threads_list,278int severity, const char *message) {279__kmp_push_num_threads_list(loc, global_tid, list_length, num_threads_list);280__kmp_set_strict_num_threads(loc, global_tid, severity, message);281}282283void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {284KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));285/* the num_threads are automatically popped */286}287288void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,289kmp_int32 proc_bind) {290KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,291proc_bind));292__kmp_assert_valid_gtid(global_tid);293__kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);294}295296/*!297@ingroup PARALLEL298@param loc source location information299@param argc total number of arguments in the ellipsis300@param microtask pointer to callback routine consisting of outlined parallel301construct302@param ... pointers to shared variables that aren't global303304Do the actual fork and call the microtask in the relevant number of threads.305*/306void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {307int gtid = __kmp_entry_gtid();308309#if (KMP_STATS_ENABLED)310// If we were in a serial region, then stop the serial timer, record311// the event, and start parallel region timer312stats_state_e previous_state = KMP_GET_THREAD_STATE();313if (previous_state == stats_state_e::SERIAL_REGION) {314KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);315} else {316KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);317}318int inParallel = __kmpc_in_parallel(loc);319if (inParallel) {320KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);321} else {322KMP_COUNT_BLOCK(OMP_PARALLEL);323}324#endif325326// maybe to save thr_state is enough here327{328va_list ap;329va_start(ap, microtask);330331#if OMPT_SUPPORT332ompt_frame_t *ompt_frame;333if (ompt_enabled.enabled) {334kmp_info_t *master_th = __kmp_threads[gtid];335ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;336ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);337}338OMPT_STORE_RETURN_ADDRESS(gtid);339#endif340341#if INCLUDE_SSC_MARKS342SSC_MARK_FORKING();343#endif344__kmp_fork_call(loc, gtid, fork_context_intel, argc,345VOLATILE_CAST(microtask_t) microtask, // "wrapped" task346VOLATILE_CAST(launch_t) __kmp_invoke_task_func,347kmp_va_addr_of(ap));348#if INCLUDE_SSC_MARKS349SSC_MARK_JOINING();350#endif351__kmp_join_call(loc, gtid352#if OMPT_SUPPORT353,354fork_context_intel355#endif356);357358va_end(ap);359360#if OMPT_SUPPORT361if (ompt_enabled.enabled) {362ompt_frame->enter_frame = ompt_data_none;363}364#endif365}366367#if KMP_STATS_ENABLED368if (previous_state == stats_state_e::SERIAL_REGION) {369KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);370KMP_SET_THREAD_STATE(previous_state);371} else {372KMP_POP_PARTITIONED_TIMER();373}374#endif // KMP_STATS_ENABLED375}376377/*!378@ingroup PARALLEL379@param loc source location information380@param microtask pointer to callback routine consisting of outlined parallel381construct382@param cond condition for running in parallel383@param args struct of pointers to shared variables that aren't global384385Perform a fork only if the condition is true.386*/387void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,388kmp_int32 cond, void *args) {389int gtid = __kmp_entry_gtid();390if (cond) {391if (args)392__kmpc_fork_call(loc, argc, microtask, args);393else394__kmpc_fork_call(loc, argc, microtask);395} else {396__kmpc_serialized_parallel(loc, gtid);397398#if OMPT_SUPPORT399void *exit_frame_ptr;400#endif401402if (args)403__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,404/*npr=*/0,405/*argc=*/1, &args406#if OMPT_SUPPORT407,408&exit_frame_ptr409#endif410);411else412__kmp_invoke_microtask(VOLATILE_CAST(microtask_t) microtask, gtid,413/*npr=*/0,414/*argc=*/0,415/*args=*/nullptr416#if OMPT_SUPPORT417,418&exit_frame_ptr419#endif420);421422__kmpc_end_serialized_parallel(loc, gtid);423}424}425426/*!427@ingroup PARALLEL428@param loc source location information429@param global_tid global thread number430@param num_teams number of teams requested for the teams construct431@param num_threads number of threads per team requested for the teams construct432433Set the number of teams to be used by the teams construct.434This call is only required if the teams construct has a `num_teams` clause435or a `thread_limit` clause (or both).436*/437void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,438kmp_int32 num_teams, kmp_int32 num_threads) {439KA_TRACE(20,440("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",441global_tid, num_teams, num_threads));442__kmp_assert_valid_gtid(global_tid);443__kmp_push_num_teams(loc, global_tid, num_teams, num_threads);444}445446/*!447@ingroup PARALLEL448@param loc source location information449@param global_tid global thread number450@param thread_limit limit on number of threads which can be created within the451current task452453Set the thread_limit for the current task454This call is there to support `thread_limit` clause on the `target` construct455*/456void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,457kmp_int32 thread_limit) {458__kmp_assert_valid_gtid(global_tid);459kmp_info_t *thread = __kmp_threads[global_tid];460if (thread_limit > 0)461thread->th.th_current_task->td_icvs.task_thread_limit = thread_limit;462}463464/*!465@ingroup PARALLEL466@param loc source location information467@param global_tid global thread number468@param num_teams_lb lower bound on number of teams requested for the teams469construct470@param num_teams_ub upper bound on number of teams requested for the teams471construct472@param num_threads number of threads per team requested for the teams construct473474Set the number of teams to be used by the teams construct. The number of initial475teams cretaed will be greater than or equal to the lower bound and less than or476equal to the upper bound.477This call is only required if the teams construct has a `num_teams` clause478or a `thread_limit` clause (or both).479*/480void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,481kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,482kmp_int32 num_threads) {483KA_TRACE(20, ("__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"484" num_teams_ub=%d num_threads=%d\n",485global_tid, num_teams_lb, num_teams_ub, num_threads));486__kmp_assert_valid_gtid(global_tid);487__kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,488num_threads);489}490491/*!492@ingroup PARALLEL493@param loc source location information494@param argc total number of arguments in the ellipsis495@param microtask pointer to callback routine consisting of outlined teams496construct497@param ... pointers to shared variables that aren't global498499Do the actual fork and call the microtask in the relevant number of threads.500*/501void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,502...) {503int gtid = __kmp_entry_gtid();504kmp_info_t *this_thr = __kmp_threads[gtid];505va_list ap;506va_start(ap, microtask);507508#if KMP_STATS_ENABLED509KMP_COUNT_BLOCK(OMP_TEAMS);510stats_state_e previous_state = KMP_GET_THREAD_STATE();511if (previous_state == stats_state_e::SERIAL_REGION) {512KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);513} else {514KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);515}516#endif517518// remember teams entry point and nesting level519this_thr->th.th_teams_microtask = microtask;520this_thr->th.th_teams_level =521this_thr->th.th_team->t.t_level; // AC: can be >0 on host522523#if OMPT_SUPPORT524kmp_team_t *parent_team = this_thr->th.th_team;525int tid = __kmp_tid_from_gtid(gtid);526if (ompt_enabled.enabled) {527parent_team->t.t_implicit_task_taskdata[tid]528.ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);529}530OMPT_STORE_RETURN_ADDRESS(gtid);531#endif532533// check if __kmpc_push_num_teams called, set default number of teams534// otherwise535if (this_thr->th.th_teams_size.nteams == 0) {536__kmp_push_num_teams(loc, gtid, 0, 0);537}538KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);539KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);540KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);541542__kmp_fork_call(543loc, gtid, fork_context_intel, argc,544VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task545VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));546__kmp_join_call(loc, gtid547#if OMPT_SUPPORT548,549fork_context_intel550#endif551);552553// Pop current CG root off list554KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);555kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;556this_thr->th.th_cg_roots = tmp->up;557KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"558" to node %p. cg_nthreads was %d\n",559this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));560KMP_DEBUG_ASSERT(tmp->cg_nthreads);561int i = tmp->cg_nthreads--;562if (i == 1) { // check is we are the last thread in CG (not always the case)563__kmp_free(tmp);564}565// Restore current task's thread_limit from CG root566KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);567this_thr->th.th_current_task->td_icvs.thread_limit =568this_thr->th.th_cg_roots->cg_thread_limit;569570this_thr->th.th_teams_microtask = NULL;571this_thr->th.th_teams_level = 0;572*(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;573va_end(ap);574#if KMP_STATS_ENABLED575if (previous_state == stats_state_e::SERIAL_REGION) {576KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);577KMP_SET_THREAD_STATE(previous_state);578} else {579KMP_POP_PARTITIONED_TIMER();580}581#endif // KMP_STATS_ENABLED582}583584// I don't think this function should ever have been exported.585// The __kmpc_ prefix was misapplied. I'm fairly certain that no generated586// openmp code ever called it, but it's been exported from the RTL for so587// long that I'm afraid to remove the definition.588int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }589590/*!591@ingroup PARALLEL592@param loc source location information593@param global_tid global thread number594595Enter a serialized parallel construct. This interface is used to handle a596conditional parallel region, like this,597@code598#pragma omp parallel if (condition)599@endcode600when the condition is false.601*/602void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {603// The implementation is now in kmp_runtime.cpp so that it can share static604// functions with kmp_fork_call since the tasks to be done are similar in605// each case.606__kmp_assert_valid_gtid(global_tid);607#if OMPT_SUPPORT608OMPT_STORE_RETURN_ADDRESS(global_tid);609#endif610__kmp_serialized_parallel(loc, global_tid);611}612613/*!614@ingroup PARALLEL615@param loc source location information616@param global_tid global thread number617618Leave a serialized parallel construct.619*/620void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {621kmp_internal_control_t *top;622kmp_info_t *this_thr;623kmp_team_t *serial_team;624625KC_TRACE(10,626("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));627628/* skip all this code for autopar serialized loops since it results in629unacceptable overhead */630if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))631return;632633// Not autopar code634__kmp_assert_valid_gtid(global_tid);635if (!TCR_4(__kmp_init_parallel))636__kmp_parallel_initialize();637638__kmp_resume_if_soft_paused();639640this_thr = __kmp_threads[global_tid];641serial_team = this_thr->th.th_serial_team;642643kmp_task_team_t *task_team = this_thr->th.th_task_team;644// we need to wait for the proxy tasks before finishing the thread645if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||646task_team->tt.tt_hidden_helper_task_encountered))647__kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));648649KMP_MB();650KMP_DEBUG_ASSERT(serial_team);651KMP_ASSERT(serial_team->t.t_serialized);652KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);653KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);654KMP_DEBUG_ASSERT(serial_team->t.t_threads);655KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);656657#if OMPT_SUPPORT658if (ompt_enabled.enabled &&659this_thr->th.ompt_thread_info.state != ompt_state_overhead) {660OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;661if (ompt_enabled.ompt_callback_implicit_task) {662ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(663ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,664OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);665}666667// reset clear the task id only after unlinking the task668ompt_data_t *parent_task_data;669__ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);670671if (ompt_enabled.ompt_callback_parallel_end) {672ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(673&(serial_team->t.ompt_team_info.parallel_data), parent_task_data,674ompt_parallel_invoker_program | ompt_parallel_team,675OMPT_LOAD_RETURN_ADDRESS(global_tid));676}677__ompt_lw_taskteam_unlink(this_thr);678this_thr->th.ompt_thread_info.state = ompt_state_overhead;679}680#endif681682/* If necessary, pop the internal control stack values and replace the team683* values */684top = serial_team->t.t_control_stack_top;685if (top && top->serial_nesting_level == serial_team->t.t_serialized) {686copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);687serial_team->t.t_control_stack_top = top->next;688__kmp_free(top);689}690691/* pop dispatch buffers stack */692KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);693{694dispatch_private_info_t *disp_buffer =695serial_team->t.t_dispatch->th_disp_buffer;696serial_team->t.t_dispatch->th_disp_buffer =697serial_team->t.t_dispatch->th_disp_buffer->next;698__kmp_free(disp_buffer);699}700701/* pop the task team stack */702if (serial_team->t.t_serialized > 1) {703__kmp_pop_task_team_node(this_thr, serial_team);704}705706this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore707708--serial_team->t.t_serialized;709if (serial_team->t.t_serialized == 0) {710711/* return to the parallel section */712713#if KMP_ARCH_X86 || KMP_ARCH_X86_64714if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {715__kmp_clear_x87_fpu_status_word();716__kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);717__kmp_load_mxcsr(&serial_team->t.t_mxcsr);718}719#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */720721__kmp_pop_current_task_from_thread(this_thr);722#if OMPD_SUPPORT723if (ompd_state & OMPD_ENABLE_BP)724ompd_bp_parallel_end();725#endif726727this_thr->th.th_team = serial_team->t.t_parent;728this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;729730/* restore values cached in the thread */731this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */732this_thr->th.th_team_master =733serial_team->t.t_parent->t.t_threads[0]; /* JPH */734this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;735736/* TODO the below shouldn't need to be adjusted for serialized teams */737this_thr->th.th_dispatch =738&this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];739740KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);741this_thr->th.th_current_task->td_flags.executing = 1;742743if (__kmp_tasking_mode != tskm_immediate_exec) {744// Restore task state from serial team structure745KMP_DEBUG_ASSERT(serial_team->t.t_primary_task_state == 0 ||746serial_team->t.t_primary_task_state == 1);747this_thr->th.th_task_state =748(kmp_uint8)serial_team->t.t_primary_task_state;749// Copy the task team from the new child / old parent team to the thread.750this_thr->th.th_task_team =751this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];752KA_TRACE(20,753("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "754"team %p\n",755global_tid, this_thr->th.th_task_team, this_thr->th.th_team));756}757#if KMP_AFFINITY_SUPPORTED758if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {759__kmp_reset_root_init_mask(global_tid);760}761#endif762} else {763if (__kmp_tasking_mode != tskm_immediate_exec) {764KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "765"depth of serial team %p to %d\n",766global_tid, serial_team, serial_team->t.t_serialized));767}768}769770serial_team->t.t_level--;771if (__kmp_env_consistency_check)772__kmp_pop_parallel(global_tid, NULL);773#if OMPT_SUPPORT774if (ompt_enabled.enabled)775this_thr->th.ompt_thread_info.state =776((this_thr->th.th_team_serialized) ? ompt_state_work_serial777: ompt_state_work_parallel);778#endif779}780781/*!782@ingroup SYNCHRONIZATION783@param loc source location information.784785Execute <tt>flush</tt>. This is implemented as a full memory fence. (Though786depending on the memory ordering convention obeyed by the compiler787even that may not be necessary).788*/789void __kmpc_flush(ident_t *loc) {790KC_TRACE(10, ("__kmpc_flush: called\n"));791792/* need explicit __mf() here since use volatile instead in library */793KMP_MFENCE(); /* Flush all pending memory write invalidates. */794795#if OMPT_SUPPORT && OMPT_OPTIONAL796if (ompt_enabled.ompt_callback_flush) {797ompt_callbacks.ompt_callback(ompt_callback_flush)(798__ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));799}800#endif801}802803/* -------------------------------------------------------------------------- */804/*!805@ingroup SYNCHRONIZATION806@param loc source location information807@param global_tid thread id.808809Execute a barrier.810*/811void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {812KMP_COUNT_BLOCK(OMP_BARRIER);813KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));814__kmp_assert_valid_gtid(global_tid);815816if (!TCR_4(__kmp_init_parallel))817__kmp_parallel_initialize();818819__kmp_resume_if_soft_paused();820821if (__kmp_env_consistency_check) {822if (loc == 0) {823KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?824}825__kmp_check_barrier(global_tid, ct_barrier, loc);826}827828#if OMPT_SUPPORT829ompt_frame_t *ompt_frame;830if (ompt_enabled.enabled) {831__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);832if (ompt_frame->enter_frame.ptr == NULL)833ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);834}835OMPT_STORE_RETURN_ADDRESS(global_tid);836#endif837__kmp_threads[global_tid]->th.th_ident = loc;838// TODO: explicit barrier_wait_id:839// this function is called when 'barrier' directive is present or840// implicit barrier at the end of a worksharing construct.841// 1) better to add a per-thread barrier counter to a thread data structure842// 2) set to 0 when a new team is created843// 4) no sync is required844845__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);846#if OMPT_SUPPORT && OMPT_OPTIONAL847if (ompt_enabled.enabled) {848ompt_frame->enter_frame = ompt_data_none;849}850#endif851}852853/* The BARRIER for a MASTER section is always explicit */854/*!855@ingroup WORK_SHARING856@param loc source location information.857@param global_tid global thread number .858@return 1 if this thread should execute the <tt>master</tt> block, 0 otherwise.859*/860kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {861int status = 0;862863KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));864__kmp_assert_valid_gtid(global_tid);865866if (!TCR_4(__kmp_init_parallel))867__kmp_parallel_initialize();868869__kmp_resume_if_soft_paused();870871if (KMP_MASTER_GTID(global_tid)) {872KMP_COUNT_BLOCK(OMP_MASTER);873KMP_PUSH_PARTITIONED_TIMER(OMP_master);874status = 1;875}876877#if OMPT_SUPPORT && OMPT_OPTIONAL878if (status) {879if (ompt_enabled.ompt_callback_masked) {880kmp_info_t *this_thr = __kmp_threads[global_tid];881kmp_team_t *team = this_thr->th.th_team;882883int tid = __kmp_tid_from_gtid(global_tid);884ompt_callbacks.ompt_callback(ompt_callback_masked)(885ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),886&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),887OMPT_GET_RETURN_ADDRESS(0));888}889}890#endif891892if (__kmp_env_consistency_check) {893#if KMP_USE_DYNAMIC_LOCK894if (status)895__kmp_push_sync(global_tid, ct_master, loc, NULL, 0);896else897__kmp_check_sync(global_tid, ct_master, loc, NULL, 0);898#else899if (status)900__kmp_push_sync(global_tid, ct_master, loc, NULL);901else902__kmp_check_sync(global_tid, ct_master, loc, NULL);903#endif904}905906return status;907}908909/*!910@ingroup WORK_SHARING911@param loc source location information.912@param global_tid global thread number .913914Mark the end of a <tt>master</tt> region. This should only be called by the915thread that executes the <tt>master</tt> region.916*/917void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {918KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));919__kmp_assert_valid_gtid(global_tid);920KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));921KMP_POP_PARTITIONED_TIMER();922923#if OMPT_SUPPORT && OMPT_OPTIONAL924kmp_info_t *this_thr = __kmp_threads[global_tid];925kmp_team_t *team = this_thr->th.th_team;926if (ompt_enabled.ompt_callback_masked) {927int tid = __kmp_tid_from_gtid(global_tid);928ompt_callbacks.ompt_callback(ompt_callback_masked)(929ompt_scope_end, &(team->t.ompt_team_info.parallel_data),930&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),931OMPT_GET_RETURN_ADDRESS(0));932}933#endif934935if (__kmp_env_consistency_check) {936if (KMP_MASTER_GTID(global_tid))937__kmp_pop_sync(global_tid, ct_master, loc);938}939}940941/*!942@ingroup WORK_SHARING943@param loc source location information.944@param global_tid global thread number.945@param filter result of evaluating filter clause on thread global_tid, or zero946if no filter clause present947@return 1 if this thread should execute the <tt>masked</tt> block, 0 otherwise.948*/949kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter) {950int status = 0;951int tid;952KC_TRACE(10, ("__kmpc_masked: called T#%d\n", global_tid));953__kmp_assert_valid_gtid(global_tid);954955if (!TCR_4(__kmp_init_parallel))956__kmp_parallel_initialize();957958__kmp_resume_if_soft_paused();959960tid = __kmp_tid_from_gtid(global_tid);961if (tid == filter) {962KMP_COUNT_BLOCK(OMP_MASKED);963KMP_PUSH_PARTITIONED_TIMER(OMP_masked);964status = 1;965}966967#if OMPT_SUPPORT && OMPT_OPTIONAL968if (status) {969if (ompt_enabled.ompt_callback_masked) {970kmp_info_t *this_thr = __kmp_threads[global_tid];971kmp_team_t *team = this_thr->th.th_team;972ompt_callbacks.ompt_callback(ompt_callback_masked)(973ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),974&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),975OMPT_GET_RETURN_ADDRESS(0));976}977}978#endif979980if (__kmp_env_consistency_check) {981#if KMP_USE_DYNAMIC_LOCK982if (status)983__kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);984else985__kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);986#else987if (status)988__kmp_push_sync(global_tid, ct_masked, loc, NULL);989else990__kmp_check_sync(global_tid, ct_masked, loc, NULL);991#endif992}993994return status;995}996997/*!998@ingroup WORK_SHARING999@param loc source location information.1000@param global_tid global thread number .10011002Mark the end of a <tt>masked</tt> region. This should only be called by the1003thread that executes the <tt>masked</tt> region.1004*/1005void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid) {1006KC_TRACE(10, ("__kmpc_end_masked: called T#%d\n", global_tid));1007__kmp_assert_valid_gtid(global_tid);1008KMP_POP_PARTITIONED_TIMER();10091010#if OMPT_SUPPORT && OMPT_OPTIONAL1011kmp_info_t *this_thr = __kmp_threads[global_tid];1012kmp_team_t *team = this_thr->th.th_team;1013if (ompt_enabled.ompt_callback_masked) {1014int tid = __kmp_tid_from_gtid(global_tid);1015ompt_callbacks.ompt_callback(ompt_callback_masked)(1016ompt_scope_end, &(team->t.ompt_team_info.parallel_data),1017&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),1018OMPT_GET_RETURN_ADDRESS(0));1019}1020#endif10211022if (__kmp_env_consistency_check) {1023__kmp_pop_sync(global_tid, ct_masked, loc);1024}1025}10261027/*!1028@ingroup WORK_SHARING1029@param loc source location information.1030@param gtid global thread number.10311032Start execution of an <tt>ordered</tt> construct.1033*/1034void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {1035int cid = 0;1036kmp_info_t *th;1037KMP_DEBUG_ASSERT(__kmp_init_serial);10381039KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));1040__kmp_assert_valid_gtid(gtid);10411042if (!TCR_4(__kmp_init_parallel))1043__kmp_parallel_initialize();10441045__kmp_resume_if_soft_paused();10461047#if USE_ITT_BUILD1048__kmp_itt_ordered_prep(gtid);1049// TODO: ordered_wait_id1050#endif /* USE_ITT_BUILD */10511052th = __kmp_threads[gtid];10531054#if OMPT_SUPPORT && OMPT_OPTIONAL1055kmp_team_t *team;1056ompt_wait_id_t lck;1057void *codeptr_ra;1058OMPT_STORE_RETURN_ADDRESS(gtid);1059if (ompt_enabled.enabled) {1060team = __kmp_team_from_gtid(gtid);1061lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;1062/* OMPT state update */1063th->th.ompt_thread_info.wait_id = lck;1064th->th.ompt_thread_info.state = ompt_state_wait_ordered;10651066/* OMPT event callback */1067codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);1068if (ompt_enabled.ompt_callback_mutex_acquire) {1069ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(1070ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,1071codeptr_ra);1072}1073}1074#endif10751076if (th->th.th_dispatch->th_deo_fcn != 0)1077(*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);1078else1079__kmp_parallel_deo(>id, &cid, loc);10801081#if OMPT_SUPPORT && OMPT_OPTIONAL1082if (ompt_enabled.enabled) {1083/* OMPT state update */1084th->th.ompt_thread_info.state = ompt_state_work_parallel;1085th->th.ompt_thread_info.wait_id = 0;10861087/* OMPT event callback */1088if (ompt_enabled.ompt_callback_mutex_acquired) {1089ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(1090ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);1091}1092}1093#endif10941095#if USE_ITT_BUILD1096__kmp_itt_ordered_start(gtid);1097#endif /* USE_ITT_BUILD */1098}10991100/*!1101@ingroup WORK_SHARING1102@param loc source location information.1103@param gtid global thread number.11041105End execution of an <tt>ordered</tt> construct.1106*/1107void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {1108int cid = 0;1109kmp_info_t *th;11101111KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));1112__kmp_assert_valid_gtid(gtid);11131114#if USE_ITT_BUILD1115__kmp_itt_ordered_end(gtid);1116// TODO: ordered_wait_id1117#endif /* USE_ITT_BUILD */11181119th = __kmp_threads[gtid];11201121if (th->th.th_dispatch->th_dxo_fcn != 0)1122(*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);1123else1124__kmp_parallel_dxo(>id, &cid, loc);11251126#if OMPT_SUPPORT && OMPT_OPTIONAL1127OMPT_STORE_RETURN_ADDRESS(gtid);1128if (ompt_enabled.ompt_callback_mutex_released) {1129ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(1130ompt_mutex_ordered,1131(ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)1132->t.t_ordered.dt.t_value,1133OMPT_LOAD_RETURN_ADDRESS(gtid));1134}1135#endif1136}11371138#if KMP_USE_DYNAMIC_LOCK11391140static __forceinline void1141__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,1142kmp_int32 gtid, kmp_indirect_locktag_t tag) {1143// Pointer to the allocated indirect lock is written to crit, while indexing1144// is ignored.1145void *idx;1146kmp_indirect_lock_t **lck;1147lck = (kmp_indirect_lock_t **)crit;1148kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);1149KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);1150KMP_SET_I_LOCK_LOCATION(ilk, loc);1151KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);1152KA_TRACE(20,1153("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));1154#if USE_ITT_BUILD1155__kmp_itt_critical_creating(ilk->lock, loc);1156#endif1157int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);1158if (status == 0) {1159#if USE_ITT_BUILD1160__kmp_itt_critical_destroyed(ilk->lock);1161#endif1162// We don't really need to destroy the unclaimed lock here since it will be1163// cleaned up at program exit.1164// KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);1165}1166KMP_DEBUG_ASSERT(*lck != NULL);1167}11681169// Fast-path acquire tas lock1170#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \1171{ \1172kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \1173kmp_int32 tas_free = KMP_LOCK_FREE(tas); \1174kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \1175if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \1176!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \1177kmp_uint32 spins; \1178KMP_FSYNC_PREPARE(l); \1179KMP_INIT_YIELD(spins); \1180kmp_backoff_t backoff = __kmp_spin_backoff_params; \1181do { \1182if (TCR_4(__kmp_nth) > \1183(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \1184KMP_YIELD(TRUE); \1185} else { \1186KMP_YIELD_SPIN(spins); \1187} \1188__kmp_spin_backoff(&backoff); \1189} while ( \1190KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \1191!__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \1192} \1193KMP_FSYNC_ACQUIRED(l); \1194}11951196// Fast-path test tas lock1197#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \1198{ \1199kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \1200kmp_int32 tas_free = KMP_LOCK_FREE(tas); \1201kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \1202rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \1203__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \1204}12051206// Fast-path release tas lock1207#define KMP_RELEASE_TAS_LOCK(lock, gtid) \1208{ KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }12091210#if KMP_USE_FUTEX12111212#include <sys/syscall.h>1213#include <unistd.h>1214#ifndef FUTEX_WAIT1215#define FUTEX_WAIT 01216#endif1217#ifndef FUTEX_WAKE1218#define FUTEX_WAKE 11219#endif12201221// Fast-path acquire futex lock1222#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \1223{ \1224kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \1225kmp_int32 gtid_code = (gtid + 1) << 1; \1226KMP_MB(); \1227KMP_FSYNC_PREPARE(ftx); \1228kmp_int32 poll_val; \1229while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \1230&(ftx->lk.poll), KMP_LOCK_FREE(futex), \1231KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \1232kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \1233if (!cond) { \1234if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \1235poll_val | \1236KMP_LOCK_BUSY(1, futex))) { \1237continue; \1238} \1239poll_val |= KMP_LOCK_BUSY(1, futex); \1240} \1241kmp_int32 rc; \1242if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \1243NULL, NULL, 0)) != 0) { \1244continue; \1245} \1246gtid_code |= 1; \1247} \1248KMP_FSYNC_ACQUIRED(ftx); \1249}12501251// Fast-path test futex lock1252#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \1253{ \1254kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \1255if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \1256KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \1257KMP_FSYNC_ACQUIRED(ftx); \1258rc = TRUE; \1259} else { \1260rc = FALSE; \1261} \1262}12631264// Fast-path release futex lock1265#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \1266{ \1267kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \1268KMP_MB(); \1269KMP_FSYNC_RELEASING(ftx); \1270kmp_int32 poll_val = \1271KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \1272if (KMP_LOCK_STRIP(poll_val) & 1) { \1273syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \1274KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \1275} \1276KMP_MB(); \1277KMP_YIELD_OVERSUB(); \1278}12791280#endif // KMP_USE_FUTEX12811282#else // KMP_USE_DYNAMIC_LOCK12831284static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,1285ident_t const *loc,1286kmp_int32 gtid) {1287kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;12881289// Because of the double-check, the following load doesn't need to be volatile1290kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);12911292if (lck == NULL) {1293void *idx;12941295// Allocate & initialize the lock.1296// Remember alloc'ed locks in table in order to free them in __kmp_cleanup()1297lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);1298__kmp_init_user_lock_with_checks(lck);1299__kmp_set_user_lock_location(lck, loc);1300#if USE_ITT_BUILD1301__kmp_itt_critical_creating(lck);1302// __kmp_itt_critical_creating() should be called *before* the first usage1303// of underlying lock. It is the only place where we can guarantee it. There1304// are chances the lock will destroyed with no usage, but it is not a1305// problem, because this is not real event seen by user but rather setting1306// name for object (lock). See more details in kmp_itt.h.1307#endif /* USE_ITT_BUILD */13081309// Use a cmpxchg instruction to slam the start of the critical section with1310// the lock pointer. If another thread beat us to it, deallocate the lock,1311// and use the lock that the other thread allocated.1312int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);13131314if (status == 0) {1315// Deallocate the lock and reload the value.1316#if USE_ITT_BUILD1317__kmp_itt_critical_destroyed(lck);1318// Let ITT know the lock is destroyed and the same memory location may be reused1319// for another purpose.1320#endif /* USE_ITT_BUILD */1321__kmp_destroy_user_lock_with_checks(lck);1322__kmp_user_lock_free(&idx, gtid, lck);1323lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);1324KMP_DEBUG_ASSERT(lck != NULL);1325}1326}1327return lck;1328}13291330#endif // KMP_USE_DYNAMIC_LOCK13311332/*!1333@ingroup WORK_SHARING1334@param loc source location information.1335@param global_tid global thread number.1336@param crit identity of the critical section. This could be a pointer to a lock1337associated with the critical section, or some other suitably unique value.13381339Enter code protected by a `critical` construct.1340This function blocks until the executing thread can enter the critical section.1341*/1342void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,1343kmp_critical_name *crit) {1344#if KMP_USE_DYNAMIC_LOCK1345#if OMPT_SUPPORT && OMPT_OPTIONAL1346OMPT_STORE_RETURN_ADDRESS(global_tid);1347#endif // OMPT_SUPPORT1348__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);1349#else1350KMP_COUNT_BLOCK(OMP_CRITICAL);1351#if OMPT_SUPPORT && OMPT_OPTIONAL1352ompt_state_t prev_state = ompt_state_undefined;1353ompt_thread_info_t ti;1354#endif1355kmp_user_lock_p lck;13561357KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));1358__kmp_assert_valid_gtid(global_tid);13591360// TODO: add THR_OVHD_STATE13611362KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);1363KMP_CHECK_USER_LOCK_INIT();13641365if ((__kmp_user_lock_kind == lk_tas) &&1366(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {1367lck = (kmp_user_lock_p)crit;1368}1369#if KMP_USE_FUTEX1370else if ((__kmp_user_lock_kind == lk_futex) &&1371(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {1372lck = (kmp_user_lock_p)crit;1373}1374#endif1375else { // ticket, queuing or drdpa1376lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);1377}13781379if (__kmp_env_consistency_check)1380__kmp_push_sync(global_tid, ct_critical, loc, lck);13811382// since the critical directive binds to all threads, not just the current1383// team we have to check this even if we are in a serialized team.1384// also, even if we are the uber thread, we still have to conduct the lock,1385// as we have to contend with sibling threads.13861387#if USE_ITT_BUILD1388__kmp_itt_critical_acquiring(lck);1389#endif /* USE_ITT_BUILD */1390#if OMPT_SUPPORT && OMPT_OPTIONAL1391OMPT_STORE_RETURN_ADDRESS(gtid);1392void *codeptr_ra = NULL;1393if (ompt_enabled.enabled) {1394ti = __kmp_threads[global_tid]->th.ompt_thread_info;1395/* OMPT state update */1396prev_state = ti.state;1397ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;1398ti.state = ompt_state_wait_critical;13991400/* OMPT event callback */1401codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);1402if (ompt_enabled.ompt_callback_mutex_acquire) {1403ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(1404ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),1405(ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);1406}1407}1408#endif1409// Value of 'crit' should be good for using as a critical_id of the critical1410// section directive.1411__kmp_acquire_user_lock_with_checks(lck, global_tid);14121413#if USE_ITT_BUILD1414__kmp_itt_critical_acquired(lck);1415#endif /* USE_ITT_BUILD */1416#if OMPT_SUPPORT && OMPT_OPTIONAL1417if (ompt_enabled.enabled) {1418/* OMPT state update */1419ti.state = prev_state;1420ti.wait_id = 0;14211422/* OMPT event callback */1423if (ompt_enabled.ompt_callback_mutex_acquired) {1424ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(1425ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);1426}1427}1428#endif1429KMP_POP_PARTITIONED_TIMER();14301431KMP_PUSH_PARTITIONED_TIMER(OMP_critical);1432KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));1433#endif // KMP_USE_DYNAMIC_LOCK1434}14351436#if KMP_USE_DYNAMIC_LOCK14371438// Converts the given hint to an internal lock implementation1439static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {1440#if KMP_USE_TSX1441#define KMP_TSX_LOCK(seq) lockseq_##seq1442#else1443#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq1444#endif14451446#if KMP_ARCH_X86 || KMP_ARCH_X86_641447#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)1448#else1449#define KMP_CPUINFO_RTM 01450#endif14511452// Hints that do not require further logic1453if (hint & kmp_lock_hint_hle)1454return KMP_TSX_LOCK(hle);1455if (hint & kmp_lock_hint_rtm)1456return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;1457if (hint & kmp_lock_hint_adaptive)1458return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;14591460// Rule out conflicting hints first by returning the default lock1461if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))1462return __kmp_user_lock_seq;1463if ((hint & omp_lock_hint_speculative) &&1464(hint & omp_lock_hint_nonspeculative))1465return __kmp_user_lock_seq;14661467// Do not even consider speculation when it appears to be contended1468if (hint & omp_lock_hint_contended)1469return lockseq_queuing;14701471// Uncontended lock without speculation1472if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))1473return lockseq_tas;14741475// Use RTM lock for speculation1476if (hint & omp_lock_hint_speculative)1477return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;14781479return __kmp_user_lock_seq;1480}14811482#if OMPT_SUPPORT && OMPT_OPTIONAL1483#if KMP_USE_DYNAMIC_LOCK1484static kmp_mutex_impl_t1485__ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {1486if (user_lock) {1487switch (KMP_EXTRACT_D_TAG(user_lock)) {1488case 0:1489break;1490#if KMP_USE_FUTEX1491case locktag_futex:1492return kmp_mutex_impl_queuing;1493#endif1494case locktag_tas:1495return kmp_mutex_impl_spin;1496#if KMP_USE_TSX1497case locktag_hle:1498case locktag_rtm_spin:1499return kmp_mutex_impl_speculative;1500#endif1501default:1502return kmp_mutex_impl_none;1503}1504ilock = KMP_LOOKUP_I_LOCK(user_lock);1505}1506KMP_ASSERT(ilock);1507switch (ilock->type) {1508#if KMP_USE_TSX1509case locktag_adaptive:1510case locktag_rtm_queuing:1511return kmp_mutex_impl_speculative;1512#endif1513case locktag_nested_tas:1514return kmp_mutex_impl_spin;1515#if KMP_USE_FUTEX1516case locktag_nested_futex:1517#endif1518case locktag_ticket:1519case locktag_queuing:1520case locktag_drdpa:1521case locktag_nested_ticket:1522case locktag_nested_queuing:1523case locktag_nested_drdpa:1524return kmp_mutex_impl_queuing;1525default:1526return kmp_mutex_impl_none;1527}1528}1529#else1530// For locks without dynamic binding1531static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {1532switch (__kmp_user_lock_kind) {1533case lk_tas:1534return kmp_mutex_impl_spin;1535#if KMP_USE_FUTEX1536case lk_futex:1537#endif1538case lk_ticket:1539case lk_queuing:1540case lk_drdpa:1541return kmp_mutex_impl_queuing;1542#if KMP_USE_TSX1543case lk_hle:1544case lk_rtm_queuing:1545case lk_rtm_spin:1546case lk_adaptive:1547return kmp_mutex_impl_speculative;1548#endif1549default:1550return kmp_mutex_impl_none;1551}1552}1553#endif // KMP_USE_DYNAMIC_LOCK1554#endif // OMPT_SUPPORT && OMPT_OPTIONAL15551556/*!1557@ingroup WORK_SHARING1558@param loc source location information.1559@param global_tid global thread number.1560@param crit identity of the critical section. This could be a pointer to a lock1561associated with the critical section, or some other suitably unique value.1562@param hint the lock hint.15631564Enter code protected by a `critical` construct with a hint. The hint value is1565used to suggest a lock implementation. This function blocks until the executing1566thread can enter the critical section unless the hint suggests use of1567speculative execution and the hardware supports it.1568*/1569void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,1570kmp_critical_name *crit, uint32_t hint) {1571KMP_COUNT_BLOCK(OMP_CRITICAL);1572kmp_user_lock_p lck;1573#if OMPT_SUPPORT && OMPT_OPTIONAL1574ompt_state_t prev_state = ompt_state_undefined;1575ompt_thread_info_t ti;1576// This is the case, if called from __kmpc_critical:1577void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);1578if (!codeptr)1579codeptr = OMPT_GET_RETURN_ADDRESS(0);1580#endif15811582KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));1583__kmp_assert_valid_gtid(global_tid);15841585kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;1586// Check if it is initialized.1587KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);1588kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);1589if (*lk == 0) {1590if (KMP_IS_D_LOCK(lockseq)) {1591KMP_COMPARE_AND_STORE_ACQ32(1592(volatile kmp_int32 *)&((kmp_base_tas_lock_t *)crit)->poll, 0,1593KMP_GET_D_TAG(lockseq));1594} else {1595__kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));1596}1597}1598// Branch for accessing the actual lock object and set operation. This1599// branching is inevitable since this lock initialization does not follow the1600// normal dispatch path (lock table is not used).1601if (KMP_EXTRACT_D_TAG(lk) != 0) {1602lck = (kmp_user_lock_p)lk;1603if (__kmp_env_consistency_check) {1604__kmp_push_sync(global_tid, ct_critical, loc, lck,1605__kmp_map_hint_to_lock(hint));1606}1607#if USE_ITT_BUILD1608__kmp_itt_critical_acquiring(lck);1609#endif1610#if OMPT_SUPPORT && OMPT_OPTIONAL1611if (ompt_enabled.enabled) {1612ti = __kmp_threads[global_tid]->th.ompt_thread_info;1613/* OMPT state update */1614prev_state = ti.state;1615ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;1616ti.state = ompt_state_wait_critical;16171618/* OMPT event callback */1619if (ompt_enabled.ompt_callback_mutex_acquire) {1620ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(1621ompt_mutex_critical, (unsigned int)hint,1622__ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,1623codeptr);1624}1625}1626#endif1627#if KMP_USE_INLINED_TAS1628if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {1629KMP_ACQUIRE_TAS_LOCK(lck, global_tid);1630} else1631#elif KMP_USE_INLINED_FUTEX1632if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {1633KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);1634} else1635#endif1636{1637KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);1638}1639} else {1640kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);1641lck = ilk->lock;1642if (__kmp_env_consistency_check) {1643__kmp_push_sync(global_tid, ct_critical, loc, lck,1644__kmp_map_hint_to_lock(hint));1645}1646#if USE_ITT_BUILD1647__kmp_itt_critical_acquiring(lck);1648#endif1649#if OMPT_SUPPORT && OMPT_OPTIONAL1650if (ompt_enabled.enabled) {1651ti = __kmp_threads[global_tid]->th.ompt_thread_info;1652/* OMPT state update */1653prev_state = ti.state;1654ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;1655ti.state = ompt_state_wait_critical;16561657/* OMPT event callback */1658if (ompt_enabled.ompt_callback_mutex_acquire) {1659ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(1660ompt_mutex_critical, (unsigned int)hint,1661__ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,1662codeptr);1663}1664}1665#endif1666KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);1667}1668KMP_POP_PARTITIONED_TIMER();16691670#if USE_ITT_BUILD1671__kmp_itt_critical_acquired(lck);1672#endif /* USE_ITT_BUILD */1673#if OMPT_SUPPORT && OMPT_OPTIONAL1674if (ompt_enabled.enabled) {1675/* OMPT state update */1676ti.state = prev_state;1677ti.wait_id = 0;16781679/* OMPT event callback */1680if (ompt_enabled.ompt_callback_mutex_acquired) {1681ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(1682ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);1683}1684}1685#endif16861687KMP_PUSH_PARTITIONED_TIMER(OMP_critical);1688KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));1689} // __kmpc_critical_with_hint16901691#endif // KMP_USE_DYNAMIC_LOCK16921693/*!1694@ingroup WORK_SHARING1695@param loc source location information.1696@param global_tid global thread number .1697@param crit identity of the critical section. This could be a pointer to a lock1698associated with the critical section, or some other suitably unique value.16991700Leave a critical section, releasing any lock that was held during its execution.1701*/1702void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,1703kmp_critical_name *crit) {1704kmp_user_lock_p lck;17051706KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));17071708#if KMP_USE_DYNAMIC_LOCK1709int locktag = KMP_EXTRACT_D_TAG(crit);1710if (locktag) {1711lck = (kmp_user_lock_p)crit;1712KMP_ASSERT(lck != NULL);1713if (__kmp_env_consistency_check) {1714__kmp_pop_sync(global_tid, ct_critical, loc);1715}1716#if USE_ITT_BUILD1717__kmp_itt_critical_releasing(lck);1718#endif1719#if KMP_USE_INLINED_TAS1720if (locktag == locktag_tas && !__kmp_env_consistency_check) {1721KMP_RELEASE_TAS_LOCK(lck, global_tid);1722} else1723#elif KMP_USE_INLINED_FUTEX1724if (locktag == locktag_futex && !__kmp_env_consistency_check) {1725KMP_RELEASE_FUTEX_LOCK(lck, global_tid);1726} else1727#endif1728{1729KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);1730}1731} else {1732kmp_indirect_lock_t *ilk =1733(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));1734KMP_ASSERT(ilk != NULL);1735lck = ilk->lock;1736if (__kmp_env_consistency_check) {1737__kmp_pop_sync(global_tid, ct_critical, loc);1738}1739#if USE_ITT_BUILD1740__kmp_itt_critical_releasing(lck);1741#endif1742KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);1743}17441745#else // KMP_USE_DYNAMIC_LOCK17461747if ((__kmp_user_lock_kind == lk_tas) &&1748(sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {1749lck = (kmp_user_lock_p)crit;1750}1751#if KMP_USE_FUTEX1752else if ((__kmp_user_lock_kind == lk_futex) &&1753(sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {1754lck = (kmp_user_lock_p)crit;1755}1756#endif1757else { // ticket, queuing or drdpa1758lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));1759}17601761KMP_ASSERT(lck != NULL);17621763if (__kmp_env_consistency_check)1764__kmp_pop_sync(global_tid, ct_critical, loc);17651766#if USE_ITT_BUILD1767__kmp_itt_critical_releasing(lck);1768#endif /* USE_ITT_BUILD */1769// Value of 'crit' should be good for using as a critical_id of the critical1770// section directive.1771__kmp_release_user_lock_with_checks(lck, global_tid);17721773#endif // KMP_USE_DYNAMIC_LOCK17741775#if OMPT_SUPPORT && OMPT_OPTIONAL1776/* OMPT release event triggers after lock is released; place here to trigger1777* for all #if branches */1778OMPT_STORE_RETURN_ADDRESS(global_tid);1779if (ompt_enabled.ompt_callback_mutex_released) {1780ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(1781ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,1782OMPT_LOAD_RETURN_ADDRESS(0));1783}1784#endif17851786KMP_POP_PARTITIONED_TIMER();1787KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));1788}17891790/*!1791@ingroup SYNCHRONIZATION1792@param loc source location information1793@param global_tid thread id.1794@return one if the thread should execute the master block, zero otherwise17951796Start execution of a combined barrier and master. The barrier is executed inside1797this function.1798*/1799kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {1800int status;1801KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));1802__kmp_assert_valid_gtid(global_tid);18031804if (!TCR_4(__kmp_init_parallel))1805__kmp_parallel_initialize();18061807__kmp_resume_if_soft_paused();18081809if (__kmp_env_consistency_check)1810__kmp_check_barrier(global_tid, ct_barrier, loc);18111812#if OMPT_SUPPORT1813ompt_frame_t *ompt_frame;1814if (ompt_enabled.enabled) {1815__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);1816if (ompt_frame->enter_frame.ptr == NULL)1817ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1818}1819OMPT_STORE_RETURN_ADDRESS(global_tid);1820#endif1821#if USE_ITT_NOTIFY1822__kmp_threads[global_tid]->th.th_ident = loc;1823#endif1824status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);1825#if OMPT_SUPPORT && OMPT_OPTIONAL1826if (ompt_enabled.enabled) {1827ompt_frame->enter_frame = ompt_data_none;1828}1829#endif18301831return (status != 0) ? 0 : 1;1832}18331834/*!1835@ingroup SYNCHRONIZATION1836@param loc source location information1837@param global_tid thread id.18381839Complete the execution of a combined barrier and master. This function should1840only be called at the completion of the <tt>master</tt> code. Other threads will1841still be waiting at the barrier and this call releases them.1842*/1843void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {1844KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));1845__kmp_assert_valid_gtid(global_tid);1846__kmp_end_split_barrier(bs_plain_barrier, global_tid);1847}18481849/*!1850@ingroup SYNCHRONIZATION1851@param loc source location information1852@param global_tid thread id.1853@return one if the thread should execute the master block, zero otherwise18541855Start execution of a combined barrier and master(nowait) construct.1856The barrier is executed inside this function.1857There is no equivalent "end" function, since the1858*/1859kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {1860kmp_int32 ret;1861KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));1862__kmp_assert_valid_gtid(global_tid);18631864if (!TCR_4(__kmp_init_parallel))1865__kmp_parallel_initialize();18661867__kmp_resume_if_soft_paused();18681869if (__kmp_env_consistency_check) {1870if (loc == 0) {1871KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?1872}1873__kmp_check_barrier(global_tid, ct_barrier, loc);1874}18751876#if OMPT_SUPPORT1877ompt_frame_t *ompt_frame;1878if (ompt_enabled.enabled) {1879__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);1880if (ompt_frame->enter_frame.ptr == NULL)1881ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1882}1883OMPT_STORE_RETURN_ADDRESS(global_tid);1884#endif1885#if USE_ITT_NOTIFY1886__kmp_threads[global_tid]->th.th_ident = loc;1887#endif1888__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);1889#if OMPT_SUPPORT && OMPT_OPTIONAL1890if (ompt_enabled.enabled) {1891ompt_frame->enter_frame = ompt_data_none;1892}1893#endif18941895ret = __kmpc_master(loc, global_tid);18961897if (__kmp_env_consistency_check) {1898/* there's no __kmpc_end_master called; so the (stats) */1899/* actions of __kmpc_end_master are done here */1900if (ret) {1901/* only one thread should do the pop since only */1902/* one did the push (see __kmpc_master()) */1903__kmp_pop_sync(global_tid, ct_master, loc);1904}1905}19061907return (ret);1908}19091910/* The BARRIER for a SINGLE process section is always explicit */1911/*!1912@ingroup WORK_SHARING1913@param loc source location information1914@param global_tid global thread number1915@return One if this thread should execute the single construct, zero otherwise.19161917Test whether to execute a <tt>single</tt> construct.1918There are no implicit barriers in the two "single" calls, rather the compiler1919should introduce an explicit barrier if it is required.1920*/19211922kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {1923__kmp_assert_valid_gtid(global_tid);1924kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);19251926if (rc) {1927// We are going to execute the single statement, so we should count it.1928KMP_COUNT_BLOCK(OMP_SINGLE);1929KMP_PUSH_PARTITIONED_TIMER(OMP_single);1930}19311932#if OMPT_SUPPORT && OMPT_OPTIONAL1933kmp_info_t *this_thr = __kmp_threads[global_tid];1934kmp_team_t *team = this_thr->th.th_team;1935int tid = __kmp_tid_from_gtid(global_tid);19361937if (ompt_enabled.enabled) {1938if (rc) {1939if (ompt_enabled.ompt_callback_work) {1940ompt_callbacks.ompt_callback(ompt_callback_work)(1941ompt_work_single_executor, ompt_scope_begin,1942&(team->t.ompt_team_info.parallel_data),1943&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),19441, OMPT_GET_RETURN_ADDRESS(0));1945}1946} else {1947if (ompt_enabled.ompt_callback_work) {1948ompt_callbacks.ompt_callback(ompt_callback_work)(1949ompt_work_single_other, ompt_scope_begin,1950&(team->t.ompt_team_info.parallel_data),1951&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),19521, OMPT_GET_RETURN_ADDRESS(0));1953ompt_callbacks.ompt_callback(ompt_callback_work)(1954ompt_work_single_other, ompt_scope_end,1955&(team->t.ompt_team_info.parallel_data),1956&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),19571, OMPT_GET_RETURN_ADDRESS(0));1958}1959}1960}1961#endif19621963return rc;1964}19651966/*!1967@ingroup WORK_SHARING1968@param loc source location information1969@param global_tid global thread number19701971Mark the end of a <tt>single</tt> construct. This function should1972only be called by the thread that executed the block of code protected1973by the `single` construct.1974*/1975void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {1976__kmp_assert_valid_gtid(global_tid);1977__kmp_exit_single(global_tid);1978KMP_POP_PARTITIONED_TIMER();19791980#if OMPT_SUPPORT && OMPT_OPTIONAL1981kmp_info_t *this_thr = __kmp_threads[global_tid];1982kmp_team_t *team = this_thr->th.th_team;1983int tid = __kmp_tid_from_gtid(global_tid);19841985if (ompt_enabled.ompt_callback_work) {1986ompt_callbacks.ompt_callback(ompt_callback_work)(1987ompt_work_single_executor, ompt_scope_end,1988&(team->t.ompt_team_info.parallel_data),1989&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,1990OMPT_GET_RETURN_ADDRESS(0));1991}1992#endif1993}19941995/*!1996@ingroup WORK_SHARING1997@param loc Source location1998@param global_tid Global thread id19992000Mark the end of a statically scheduled loop.2001*/2002void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {2003KMP_POP_PARTITIONED_TIMER();2004KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));20052006#if OMPT_SUPPORT && OMPT_OPTIONAL2007if (ompt_enabled.ompt_callback_work) {2008ompt_work_t ompt_work_type = ompt_work_loop_static;2009ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);2010ompt_task_info_t *task_info = __ompt_get_task_info_object(0);2011// Determine workshare type2012if (loc != NULL) {2013if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {2014ompt_work_type = ompt_work_loop_static;2015} else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {2016ompt_work_type = ompt_work_sections;2017} else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {2018ompt_work_type = ompt_work_distribute;2019} else {2020// use default set above.2021// a warning about this case is provided in __kmpc_for_static_init2022}2023KMP_DEBUG_ASSERT(ompt_work_type);2024}2025ompt_callbacks.ompt_callback(ompt_callback_work)(2026ompt_work_type, ompt_scope_end, &(team_info->parallel_data),2027&(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));2028}2029#endif2030if (__kmp_env_consistency_check)2031__kmp_pop_workshare(global_tid, ct_pdo, loc);2032}20332034// User routines which take C-style arguments (call by value)2035// different from the Fortran equivalent routines20362037void ompc_set_num_threads(int arg) {2038// !!!!! TODO: check the per-task binding2039__kmp_set_num_threads(arg, __kmp_entry_gtid());2040}20412042void ompc_set_dynamic(int flag) {2043kmp_info_t *thread;20442045/* For the thread-private implementation of the internal controls */2046thread = __kmp_entry_thread();20472048__kmp_save_internal_controls(thread);20492050set__dynamic(thread, flag ? true : false);2051}20522053void ompc_set_nested(int flag) {2054kmp_info_t *thread;20552056/* For the thread-private internal controls implementation */2057thread = __kmp_entry_thread();20582059__kmp_save_internal_controls(thread);20602061set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);2062}20632064void ompc_set_max_active_levels(int max_active_levels) {2065/* TO DO */2066/* we want per-task implementation of this internal control */20672068/* For the per-thread internal controls implementation */2069__kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);2070}20712072void ompc_set_schedule(omp_sched_t kind, int modifier) {2073// !!!!! TODO: check the per-task binding2074__kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);2075}20762077int ompc_get_ancestor_thread_num(int level) {2078return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);2079}20802081int ompc_get_team_size(int level) {2082return __kmp_get_team_size(__kmp_entry_gtid(), level);2083}20842085/* OpenMP 5.0 Affinity Format API */2086void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format) {2087if (!__kmp_init_serial) {2088__kmp_serial_initialize();2089}2090__kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,2091format, KMP_STRLEN(format) + 1);2092}20932094size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size) {2095size_t format_size;2096if (!__kmp_init_serial) {2097__kmp_serial_initialize();2098}2099format_size = KMP_STRLEN(__kmp_affinity_format);2100if (buffer && size) {2101__kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,2102format_size + 1);2103}2104return format_size;2105}21062107void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format) {2108int gtid;2109if (!TCR_4(__kmp_init_middle)) {2110__kmp_middle_initialize();2111}2112__kmp_assign_root_init_mask();2113gtid = __kmp_get_gtid();2114#if KMP_AFFINITY_SUPPORTED2115if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&2116__kmp_affinity.flags.reset) {2117__kmp_reset_root_init_mask(gtid);2118}2119#endif2120__kmp_aux_display_affinity(gtid, format);2121}21222123size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,2124char const *format) {2125int gtid;2126size_t num_required;2127kmp_str_buf_t capture_buf;2128if (!TCR_4(__kmp_init_middle)) {2129__kmp_middle_initialize();2130}2131__kmp_assign_root_init_mask();2132gtid = __kmp_get_gtid();2133#if KMP_AFFINITY_SUPPORTED2134if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&2135__kmp_affinity.flags.reset) {2136__kmp_reset_root_init_mask(gtid);2137}2138#endif2139__kmp_str_buf_init(&capture_buf);2140num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);2141if (buffer && buf_size) {2142__kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,2143capture_buf.used + 1);2144}2145__kmp_str_buf_free(&capture_buf);2146return num_required;2147}21482149void kmpc_set_stacksize(int arg) {2150// __kmp_aux_set_stacksize initializes the library if needed2151__kmp_aux_set_stacksize(arg);2152}21532154void kmpc_set_stacksize_s(size_t arg) {2155// __kmp_aux_set_stacksize initializes the library if needed2156__kmp_aux_set_stacksize(arg);2157}21582159void kmpc_set_blocktime(int arg) {2160int gtid, tid, bt = arg;2161kmp_info_t *thread;21622163gtid = __kmp_entry_gtid();2164tid = __kmp_tid_from_gtid(gtid);2165thread = __kmp_thread_from_gtid(gtid);21662167__kmp_aux_convert_blocktime(&bt);2168__kmp_aux_set_blocktime(bt, thread, tid);2169}21702171void kmpc_set_library(int arg) {2172// __kmp_user_set_library initializes the library if needed2173__kmp_user_set_library((enum library_type)arg);2174}21752176void kmpc_set_defaults(char const *str) {2177// __kmp_aux_set_defaults initializes the library if needed2178__kmp_aux_set_defaults(str, KMP_STRLEN(str));2179}21802181void kmpc_set_disp_num_buffers(int arg) {2182// ignore after initialization because some teams have already2183// allocated dispatch buffers2184if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&2185arg <= KMP_MAX_DISP_NUM_BUFF) {2186__kmp_dispatch_num_buffers = arg;2187}2188}21892190int kmpc_set_affinity_mask_proc(int proc, void **mask) {2191#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED2192return -1;2193#else2194if (!TCR_4(__kmp_init_middle)) {2195__kmp_middle_initialize();2196}2197__kmp_assign_root_init_mask();2198return __kmp_aux_set_affinity_mask_proc(proc, mask);2199#endif2200}22012202int kmpc_unset_affinity_mask_proc(int proc, void **mask) {2203#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED2204return -1;2205#else2206if (!TCR_4(__kmp_init_middle)) {2207__kmp_middle_initialize();2208}2209__kmp_assign_root_init_mask();2210return __kmp_aux_unset_affinity_mask_proc(proc, mask);2211#endif2212}22132214int kmpc_get_affinity_mask_proc(int proc, void **mask) {2215#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED2216return -1;2217#else2218if (!TCR_4(__kmp_init_middle)) {2219__kmp_middle_initialize();2220}2221__kmp_assign_root_init_mask();2222return __kmp_aux_get_affinity_mask_proc(proc, mask);2223#endif2224}22252226/* -------------------------------------------------------------------------- */2227/*!2228@ingroup THREADPRIVATE2229@param loc source location information2230@param gtid global thread number2231@param cpy_size size of the cpy_data buffer2232@param cpy_data pointer to data to be copied2233@param cpy_func helper function to call for copying data2234@param didit flag variable: 1=single thread; 0=not single thread22352236__kmpc_copyprivate implements the interface for the private data broadcast2237needed for the copyprivate clause associated with a single region in an2238OpenMP<sup>*</sup> program (both C and Fortran).2239All threads participating in the parallel region call this routine.2240One of the threads (called the single thread) should have the <tt>didit</tt>2241variable set to 1 and all other threads should have that variable set to 0.2242All threads pass a pointer to a data buffer (cpy_data) that they have built.22432244The OpenMP specification forbids the use of nowait on the single region when a2245copyprivate clause is present. However, @ref __kmpc_copyprivate implements a2246barrier internally to avoid race conditions, so the code generation for the2247single region should avoid generating a barrier after the call to @ref2248__kmpc_copyprivate.22492250The <tt>gtid</tt> parameter is the global thread id for the current thread.2251The <tt>loc</tt> parameter is a pointer to source location information.22522253Internal implementation: The single thread will first copy its descriptor2254address (cpy_data) to a team-private location, then the other threads will each2255call the function pointed to by the parameter cpy_func, which carries out the2256copy by copying the data using the cpy_data buffer.22572258The cpy_func routine used for the copy and the contents of the data area defined2259by cpy_data and cpy_size may be built in any fashion that will allow the copy2260to be done. For instance, the cpy_data buffer can hold the actual data to be2261copied or it may hold a list of pointers to the data. The cpy_func routine must2262interpret the cpy_data buffer appropriately.22632264The interface to cpy_func is as follows:2265@code2266void cpy_func( void *destination, void *source )2267@endcode2268where void *destination is the cpy_data pointer for the thread being copied to2269and void *source is the cpy_data pointer for the thread being copied from.2270*/2271void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,2272void *cpy_data, void (*cpy_func)(void *, void *),2273kmp_int32 didit) {2274void **data_ptr;2275KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));2276__kmp_assert_valid_gtid(gtid);22772278KMP_MB();22792280data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;22812282if (__kmp_env_consistency_check) {2283if (loc == 0) {2284KMP_WARNING(ConstructIdentInvalid);2285}2286}22872288// ToDo: Optimize the following two barriers into some kind of split barrier22892290if (didit)2291*data_ptr = cpy_data;22922293#if OMPT_SUPPORT2294ompt_frame_t *ompt_frame;2295if (ompt_enabled.enabled) {2296__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);2297if (ompt_frame->enter_frame.ptr == NULL)2298ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);2299}2300OMPT_STORE_RETURN_ADDRESS(gtid);2301#endif2302/* This barrier is not a barrier region boundary */2303#if USE_ITT_NOTIFY2304__kmp_threads[gtid]->th.th_ident = loc;2305#endif2306__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);23072308if (!didit)2309(*cpy_func)(cpy_data, *data_ptr);23102311// Consider next barrier a user-visible barrier for barrier region boundaries2312// Nesting checks are already handled by the single construct checks2313{2314#if OMPT_SUPPORT2315OMPT_STORE_RETURN_ADDRESS(gtid);2316#endif2317#if USE_ITT_NOTIFY2318__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.2319// tasks can overwrite the location)2320#endif2321__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);2322#if OMPT_SUPPORT && OMPT_OPTIONAL2323if (ompt_enabled.enabled) {2324ompt_frame->enter_frame = ompt_data_none;2325}2326#endif2327}2328}23292330/* --------------------------------------------------------------------------*/2331/*!2332@ingroup THREADPRIVATE2333@param loc source location information2334@param gtid global thread number2335@param cpy_data pointer to the data to be saved/copied or 02336@return the saved pointer to the data23372338__kmpc_copyprivate_light is a lighter version of __kmpc_copyprivate:2339__kmpc_copyprivate_light only saves the pointer it's given (if it's not 0, so2340coming from single), and returns that pointer in all calls (for single thread2341it's not needed). This version doesn't do any actual data copying. Data copying2342has to be done somewhere else, e.g. inline in the generated code. Due to this,2343this function doesn't have any barrier at the end of the function, like2344__kmpc_copyprivate does, so generated code needs barrier after copying of all2345data was done.2346*/2347void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data) {2348void **data_ptr;23492350KC_TRACE(10, ("__kmpc_copyprivate_light: called T#%d\n", gtid));23512352KMP_MB();23532354data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;23552356if (__kmp_env_consistency_check) {2357if (loc == 0) {2358KMP_WARNING(ConstructIdentInvalid);2359}2360}23612362// ToDo: Optimize the following barrier23632364if (cpy_data)2365*data_ptr = cpy_data;23662367#if OMPT_SUPPORT2368ompt_frame_t *ompt_frame;2369if (ompt_enabled.enabled) {2370__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);2371if (ompt_frame->enter_frame.ptr == NULL)2372ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);2373OMPT_STORE_RETURN_ADDRESS(gtid);2374}2375#endif2376/* This barrier is not a barrier region boundary */2377#if USE_ITT_NOTIFY2378__kmp_threads[gtid]->th.th_ident = loc;2379#endif2380__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);23812382return *data_ptr;2383}23842385/* -------------------------------------------------------------------------- */23862387#define INIT_LOCK __kmp_init_user_lock_with_checks2388#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks2389#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks2390#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed2391#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks2392#define ACQUIRE_NESTED_LOCK_TIMED \2393__kmp_acquire_nested_user_lock_with_checks_timed2394#define RELEASE_LOCK __kmp_release_user_lock_with_checks2395#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks2396#define TEST_LOCK __kmp_test_user_lock_with_checks2397#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks2398#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks2399#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks24002401// TODO: Make check abort messages use location info & pass it into2402// with_checks routines24032404#if KMP_USE_DYNAMIC_LOCK24052406// internal lock initializer2407static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,2408kmp_dyna_lockseq_t seq) {2409if (KMP_IS_D_LOCK(seq)) {2410KMP_INIT_D_LOCK(lock, seq);2411#if USE_ITT_BUILD2412__kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);2413#endif2414} else {2415KMP_INIT_I_LOCK(lock, seq);2416#if USE_ITT_BUILD2417kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);2418__kmp_itt_lock_creating(ilk->lock, loc);2419#endif2420}2421}24222423// internal nest lock initializer2424static __forceinline void2425__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,2426kmp_dyna_lockseq_t seq) {2427#if KMP_USE_TSX2428// Don't have nested lock implementation for speculative locks2429if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||2430seq == lockseq_rtm_spin || seq == lockseq_adaptive)2431seq = __kmp_user_lock_seq;2432#endif2433switch (seq) {2434case lockseq_tas:2435seq = lockseq_nested_tas;2436break;2437#if KMP_USE_FUTEX2438case lockseq_futex:2439seq = lockseq_nested_futex;2440break;2441#endif2442case lockseq_ticket:2443seq = lockseq_nested_ticket;2444break;2445case lockseq_queuing:2446seq = lockseq_nested_queuing;2447break;2448case lockseq_drdpa:2449seq = lockseq_nested_drdpa;2450break;2451default:2452seq = lockseq_nested_queuing;2453}2454KMP_INIT_I_LOCK(lock, seq);2455#if USE_ITT_BUILD2456kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);2457__kmp_itt_lock_creating(ilk->lock, loc);2458#endif2459}24602461/* initialize the lock with a hint */2462void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,2463uintptr_t hint) {2464KMP_DEBUG_ASSERT(__kmp_init_serial);2465if (__kmp_env_consistency_check && user_lock == NULL) {2466KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");2467}24682469__kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));24702471#if OMPT_SUPPORT && OMPT_OPTIONAL2472// This is the case, if called from omp_init_lock_with_hint:2473void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2474if (!codeptr)2475codeptr = OMPT_GET_RETURN_ADDRESS(0);2476if (ompt_enabled.ompt_callback_lock_init) {2477ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2478ompt_mutex_lock, (omp_lock_hint_t)hint,2479__ompt_get_mutex_impl_type(user_lock),2480(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2481}2482#endif2483}24842485/* initialize the lock with a hint */2486void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,2487void **user_lock, uintptr_t hint) {2488KMP_DEBUG_ASSERT(__kmp_init_serial);2489if (__kmp_env_consistency_check && user_lock == NULL) {2490KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");2491}24922493__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));24942495#if OMPT_SUPPORT && OMPT_OPTIONAL2496// This is the case, if called from omp_init_lock_with_hint:2497void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2498if (!codeptr)2499codeptr = OMPT_GET_RETURN_ADDRESS(0);2500if (ompt_enabled.ompt_callback_lock_init) {2501ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2502ompt_mutex_nest_lock, (omp_lock_hint_t)hint,2503__ompt_get_mutex_impl_type(user_lock),2504(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2505}2506#endif2507}25082509#endif // KMP_USE_DYNAMIC_LOCK25102511/* initialize the lock */2512void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2513#if KMP_USE_DYNAMIC_LOCK25142515KMP_DEBUG_ASSERT(__kmp_init_serial);2516if (__kmp_env_consistency_check && user_lock == NULL) {2517KMP_FATAL(LockIsUninitialized, "omp_init_lock");2518}2519__kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);25202521#if OMPT_SUPPORT && OMPT_OPTIONAL2522// This is the case, if called from omp_init_lock_with_hint:2523void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2524if (!codeptr)2525codeptr = OMPT_GET_RETURN_ADDRESS(0);2526if (ompt_enabled.ompt_callback_lock_init) {2527ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2528ompt_mutex_lock, omp_lock_hint_none,2529__ompt_get_mutex_impl_type(user_lock),2530(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2531}2532#endif25332534#else // KMP_USE_DYNAMIC_LOCK25352536static char const *const func = "omp_init_lock";2537kmp_user_lock_p lck;2538KMP_DEBUG_ASSERT(__kmp_init_serial);25392540if (__kmp_env_consistency_check) {2541if (user_lock == NULL) {2542KMP_FATAL(LockIsUninitialized, func);2543}2544}25452546KMP_CHECK_USER_LOCK_INIT();25472548if ((__kmp_user_lock_kind == lk_tas) &&2549(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {2550lck = (kmp_user_lock_p)user_lock;2551}2552#if KMP_USE_FUTEX2553else if ((__kmp_user_lock_kind == lk_futex) &&2554(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {2555lck = (kmp_user_lock_p)user_lock;2556}2557#endif2558else {2559lck = __kmp_user_lock_allocate(user_lock, gtid, 0);2560}2561INIT_LOCK(lck);2562__kmp_set_user_lock_location(lck, loc);25632564#if OMPT_SUPPORT && OMPT_OPTIONAL2565// This is the case, if called from omp_init_lock_with_hint:2566void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2567if (!codeptr)2568codeptr = OMPT_GET_RETURN_ADDRESS(0);2569if (ompt_enabled.ompt_callback_lock_init) {2570ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2571ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),2572(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2573}2574#endif25752576#if USE_ITT_BUILD2577__kmp_itt_lock_creating(lck);2578#endif /* USE_ITT_BUILD */25792580#endif // KMP_USE_DYNAMIC_LOCK2581} // __kmpc_init_lock25822583/* initialize the lock */2584void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2585#if KMP_USE_DYNAMIC_LOCK25862587KMP_DEBUG_ASSERT(__kmp_init_serial);2588if (__kmp_env_consistency_check && user_lock == NULL) {2589KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");2590}2591__kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);25922593#if OMPT_SUPPORT && OMPT_OPTIONAL2594// This is the case, if called from omp_init_lock_with_hint:2595void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2596if (!codeptr)2597codeptr = OMPT_GET_RETURN_ADDRESS(0);2598if (ompt_enabled.ompt_callback_lock_init) {2599ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2600ompt_mutex_nest_lock, omp_lock_hint_none,2601__ompt_get_mutex_impl_type(user_lock),2602(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2603}2604#endif26052606#else // KMP_USE_DYNAMIC_LOCK26072608static char const *const func = "omp_init_nest_lock";2609kmp_user_lock_p lck;2610KMP_DEBUG_ASSERT(__kmp_init_serial);26112612if (__kmp_env_consistency_check) {2613if (user_lock == NULL) {2614KMP_FATAL(LockIsUninitialized, func);2615}2616}26172618KMP_CHECK_USER_LOCK_INIT();26192620if ((__kmp_user_lock_kind == lk_tas) &&2621(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=2622OMP_NEST_LOCK_T_SIZE)) {2623lck = (kmp_user_lock_p)user_lock;2624}2625#if KMP_USE_FUTEX2626else if ((__kmp_user_lock_kind == lk_futex) &&2627(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=2628OMP_NEST_LOCK_T_SIZE)) {2629lck = (kmp_user_lock_p)user_lock;2630}2631#endif2632else {2633lck = __kmp_user_lock_allocate(user_lock, gtid, 0);2634}26352636INIT_NESTED_LOCK(lck);2637__kmp_set_user_lock_location(lck, loc);26382639#if OMPT_SUPPORT && OMPT_OPTIONAL2640// This is the case, if called from omp_init_lock_with_hint:2641void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2642if (!codeptr)2643codeptr = OMPT_GET_RETURN_ADDRESS(0);2644if (ompt_enabled.ompt_callback_lock_init) {2645ompt_callbacks.ompt_callback(ompt_callback_lock_init)(2646ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),2647(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2648}2649#endif26502651#if USE_ITT_BUILD2652__kmp_itt_lock_creating(lck);2653#endif /* USE_ITT_BUILD */26542655#endif // KMP_USE_DYNAMIC_LOCK2656} // __kmpc_init_nest_lock26572658void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2659#if KMP_USE_DYNAMIC_LOCK26602661#if USE_ITT_BUILD2662kmp_user_lock_p lck;2663if (KMP_EXTRACT_D_TAG(user_lock) == 0) {2664lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;2665} else {2666lck = (kmp_user_lock_p)user_lock;2667}2668__kmp_itt_lock_destroyed(lck);2669#endif2670#if OMPT_SUPPORT && OMPT_OPTIONAL2671// This is the case, if called from omp_init_lock_with_hint:2672void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2673if (!codeptr)2674codeptr = OMPT_GET_RETURN_ADDRESS(0);2675if (ompt_enabled.ompt_callback_lock_destroy) {2676ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(2677ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2678}2679#endif2680KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);2681#else2682kmp_user_lock_p lck;26832684if ((__kmp_user_lock_kind == lk_tas) &&2685(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {2686lck = (kmp_user_lock_p)user_lock;2687}2688#if KMP_USE_FUTEX2689else if ((__kmp_user_lock_kind == lk_futex) &&2690(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {2691lck = (kmp_user_lock_p)user_lock;2692}2693#endif2694else {2695lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");2696}26972698#if OMPT_SUPPORT && OMPT_OPTIONAL2699// This is the case, if called from omp_init_lock_with_hint:2700void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2701if (!codeptr)2702codeptr = OMPT_GET_RETURN_ADDRESS(0);2703if (ompt_enabled.ompt_callback_lock_destroy) {2704ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(2705ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2706}2707#endif27082709#if USE_ITT_BUILD2710__kmp_itt_lock_destroyed(lck);2711#endif /* USE_ITT_BUILD */2712DESTROY_LOCK(lck);27132714if ((__kmp_user_lock_kind == lk_tas) &&2715(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {2716;2717}2718#if KMP_USE_FUTEX2719else if ((__kmp_user_lock_kind == lk_futex) &&2720(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {2721;2722}2723#endif2724else {2725__kmp_user_lock_free(user_lock, gtid, lck);2726}2727#endif // KMP_USE_DYNAMIC_LOCK2728} // __kmpc_destroy_lock27292730/* destroy the lock */2731void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2732#if KMP_USE_DYNAMIC_LOCK27332734#if USE_ITT_BUILD2735kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);2736__kmp_itt_lock_destroyed(ilk->lock);2737#endif2738#if OMPT_SUPPORT && OMPT_OPTIONAL2739// This is the case, if called from omp_init_lock_with_hint:2740void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2741if (!codeptr)2742codeptr = OMPT_GET_RETURN_ADDRESS(0);2743if (ompt_enabled.ompt_callback_lock_destroy) {2744ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(2745ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2746}2747#endif2748KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);27492750#else // KMP_USE_DYNAMIC_LOCK27512752kmp_user_lock_p lck;27532754if ((__kmp_user_lock_kind == lk_tas) &&2755(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=2756OMP_NEST_LOCK_T_SIZE)) {2757lck = (kmp_user_lock_p)user_lock;2758}2759#if KMP_USE_FUTEX2760else if ((__kmp_user_lock_kind == lk_futex) &&2761(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=2762OMP_NEST_LOCK_T_SIZE)) {2763lck = (kmp_user_lock_p)user_lock;2764}2765#endif2766else {2767lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");2768}27692770#if OMPT_SUPPORT && OMPT_OPTIONAL2771// This is the case, if called from omp_init_lock_with_hint:2772void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2773if (!codeptr)2774codeptr = OMPT_GET_RETURN_ADDRESS(0);2775if (ompt_enabled.ompt_callback_lock_destroy) {2776ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(2777ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2778}2779#endif27802781#if USE_ITT_BUILD2782__kmp_itt_lock_destroyed(lck);2783#endif /* USE_ITT_BUILD */27842785DESTROY_NESTED_LOCK(lck);27862787if ((__kmp_user_lock_kind == lk_tas) &&2788(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=2789OMP_NEST_LOCK_T_SIZE)) {2790;2791}2792#if KMP_USE_FUTEX2793else if ((__kmp_user_lock_kind == lk_futex) &&2794(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=2795OMP_NEST_LOCK_T_SIZE)) {2796;2797}2798#endif2799else {2800__kmp_user_lock_free(user_lock, gtid, lck);2801}2802#endif // KMP_USE_DYNAMIC_LOCK2803} // __kmpc_destroy_nest_lock28042805void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2806KMP_COUNT_BLOCK(OMP_set_lock);2807#if KMP_USE_DYNAMIC_LOCK2808int tag = KMP_EXTRACT_D_TAG(user_lock);2809#if USE_ITT_BUILD2810__kmp_itt_lock_acquiring(2811(kmp_user_lock_p)2812user_lock); // itt function will get to the right lock object.2813#endif2814#if OMPT_SUPPORT && OMPT_OPTIONAL2815// This is the case, if called from omp_init_lock_with_hint:2816void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2817if (!codeptr)2818codeptr = OMPT_GET_RETURN_ADDRESS(0);2819if (ompt_enabled.ompt_callback_mutex_acquire) {2820ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(2821ompt_mutex_lock, omp_lock_hint_none,2822__ompt_get_mutex_impl_type(user_lock),2823(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2824}2825#endif2826#if KMP_USE_INLINED_TAS2827if (tag == locktag_tas && !__kmp_env_consistency_check) {2828KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);2829} else2830#elif KMP_USE_INLINED_FUTEX2831if (tag == locktag_futex && !__kmp_env_consistency_check) {2832KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);2833} else2834#endif2835{2836__kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);2837}2838#if USE_ITT_BUILD2839__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);2840#endif2841#if OMPT_SUPPORT && OMPT_OPTIONAL2842if (ompt_enabled.ompt_callback_mutex_acquired) {2843ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(2844ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2845}2846#endif28472848#else // KMP_USE_DYNAMIC_LOCK28492850kmp_user_lock_p lck;28512852if ((__kmp_user_lock_kind == lk_tas) &&2853(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {2854lck = (kmp_user_lock_p)user_lock;2855}2856#if KMP_USE_FUTEX2857else if ((__kmp_user_lock_kind == lk_futex) &&2858(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {2859lck = (kmp_user_lock_p)user_lock;2860}2861#endif2862else {2863lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");2864}28652866#if USE_ITT_BUILD2867__kmp_itt_lock_acquiring(lck);2868#endif /* USE_ITT_BUILD */2869#if OMPT_SUPPORT && OMPT_OPTIONAL2870// This is the case, if called from omp_init_lock_with_hint:2871void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2872if (!codeptr)2873codeptr = OMPT_GET_RETURN_ADDRESS(0);2874if (ompt_enabled.ompt_callback_mutex_acquire) {2875ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(2876ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),2877(ompt_wait_id_t)(uintptr_t)lck, codeptr);2878}2879#endif28802881ACQUIRE_LOCK(lck, gtid);28822883#if USE_ITT_BUILD2884__kmp_itt_lock_acquired(lck);2885#endif /* USE_ITT_BUILD */28862887#if OMPT_SUPPORT && OMPT_OPTIONAL2888if (ompt_enabled.ompt_callback_mutex_acquired) {2889ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(2890ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);2891}2892#endif28932894#endif // KMP_USE_DYNAMIC_LOCK2895}28962897void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {2898#if KMP_USE_DYNAMIC_LOCK28992900#if USE_ITT_BUILD2901__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);2902#endif2903#if OMPT_SUPPORT && OMPT_OPTIONAL2904// This is the case, if called from omp_init_lock_with_hint:2905void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2906if (!codeptr)2907codeptr = OMPT_GET_RETURN_ADDRESS(0);2908if (ompt_enabled.enabled) {2909if (ompt_enabled.ompt_callback_mutex_acquire) {2910ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(2911ompt_mutex_nest_lock, omp_lock_hint_none,2912__ompt_get_mutex_impl_type(user_lock),2913(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2914}2915}2916#endif2917int acquire_status =2918KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);2919(void)acquire_status;2920#if USE_ITT_BUILD2921__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);2922#endif29232924#if OMPT_SUPPORT && OMPT_OPTIONAL2925if (ompt_enabled.enabled) {2926if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {2927if (ompt_enabled.ompt_callback_mutex_acquired) {2928// lock_first2929ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(2930ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,2931codeptr);2932}2933} else {2934if (ompt_enabled.ompt_callback_nest_lock) {2935// lock_next2936ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(2937ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);2938}2939}2940}2941#endif29422943#else // KMP_USE_DYNAMIC_LOCK2944int acquire_status;2945kmp_user_lock_p lck;29462947if ((__kmp_user_lock_kind == lk_tas) &&2948(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=2949OMP_NEST_LOCK_T_SIZE)) {2950lck = (kmp_user_lock_p)user_lock;2951}2952#if KMP_USE_FUTEX2953else if ((__kmp_user_lock_kind == lk_futex) &&2954(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=2955OMP_NEST_LOCK_T_SIZE)) {2956lck = (kmp_user_lock_p)user_lock;2957}2958#endif2959else {2960lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");2961}29622963#if USE_ITT_BUILD2964__kmp_itt_lock_acquiring(lck);2965#endif /* USE_ITT_BUILD */2966#if OMPT_SUPPORT && OMPT_OPTIONAL2967// This is the case, if called from omp_init_lock_with_hint:2968void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);2969if (!codeptr)2970codeptr = OMPT_GET_RETURN_ADDRESS(0);2971if (ompt_enabled.enabled) {2972if (ompt_enabled.ompt_callback_mutex_acquire) {2973ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(2974ompt_mutex_nest_lock, omp_lock_hint_none,2975__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,2976codeptr);2977}2978}2979#endif29802981ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);29822983#if USE_ITT_BUILD2984__kmp_itt_lock_acquired(lck);2985#endif /* USE_ITT_BUILD */29862987#if OMPT_SUPPORT && OMPT_OPTIONAL2988if (ompt_enabled.enabled) {2989if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {2990if (ompt_enabled.ompt_callback_mutex_acquired) {2991// lock_first2992ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(2993ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);2994}2995} else {2996if (ompt_enabled.ompt_callback_nest_lock) {2997// lock_next2998ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(2999ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3000}3001}3002}3003#endif30043005#endif // KMP_USE_DYNAMIC_LOCK3006}30073008void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {3009#if KMP_USE_DYNAMIC_LOCK30103011int tag = KMP_EXTRACT_D_TAG(user_lock);3012#if USE_ITT_BUILD3013__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);3014#endif3015#if KMP_USE_INLINED_TAS3016if (tag == locktag_tas && !__kmp_env_consistency_check) {3017KMP_RELEASE_TAS_LOCK(user_lock, gtid);3018} else3019#elif KMP_USE_INLINED_FUTEX3020if (tag == locktag_futex && !__kmp_env_consistency_check) {3021KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);3022} else3023#endif3024{3025__kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);3026}30273028#if OMPT_SUPPORT && OMPT_OPTIONAL3029// This is the case, if called from omp_init_lock_with_hint:3030void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3031if (!codeptr)3032codeptr = OMPT_GET_RETURN_ADDRESS(0);3033if (ompt_enabled.ompt_callback_mutex_released) {3034ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3035ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3036}3037#endif30383039#else // KMP_USE_DYNAMIC_LOCK30403041kmp_user_lock_p lck;30423043/* Can't use serial interval since not block structured */3044/* release the lock */30453046if ((__kmp_user_lock_kind == lk_tas) &&3047(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {3048#if KMP_OS_LINUX && \3049(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)3050// "fast" path implemented to fix customer performance issue3051#if USE_ITT_BUILD3052__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);3053#endif /* USE_ITT_BUILD */3054TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);3055KMP_MB();30563057#if OMPT_SUPPORT && OMPT_OPTIONAL3058// This is the case, if called from omp_init_lock_with_hint:3059void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3060if (!codeptr)3061codeptr = OMPT_GET_RETURN_ADDRESS(0);3062if (ompt_enabled.ompt_callback_mutex_released) {3063ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3064ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3065}3066#endif30673068return;3069#else3070lck = (kmp_user_lock_p)user_lock;3071#endif3072}3073#if KMP_USE_FUTEX3074else if ((__kmp_user_lock_kind == lk_futex) &&3075(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {3076lck = (kmp_user_lock_p)user_lock;3077}3078#endif3079else {3080lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");3081}30823083#if USE_ITT_BUILD3084__kmp_itt_lock_releasing(lck);3085#endif /* USE_ITT_BUILD */30863087RELEASE_LOCK(lck, gtid);30883089#if OMPT_SUPPORT && OMPT_OPTIONAL3090// This is the case, if called from omp_init_lock_with_hint:3091void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3092if (!codeptr)3093codeptr = OMPT_GET_RETURN_ADDRESS(0);3094if (ompt_enabled.ompt_callback_mutex_released) {3095ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3096ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3097}3098#endif30993100#endif // KMP_USE_DYNAMIC_LOCK3101}31023103/* release the lock */3104void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {3105#if KMP_USE_DYNAMIC_LOCK31063107#if USE_ITT_BUILD3108__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);3109#endif3110int release_status =3111KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);3112(void)release_status;31133114#if OMPT_SUPPORT && OMPT_OPTIONAL3115// This is the case, if called from omp_init_lock_with_hint:3116void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3117if (!codeptr)3118codeptr = OMPT_GET_RETURN_ADDRESS(0);3119if (ompt_enabled.enabled) {3120if (release_status == KMP_LOCK_RELEASED) {3121if (ompt_enabled.ompt_callback_mutex_released) {3122// release_lock_last3123ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3124ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,3125codeptr);3126}3127} else if (ompt_enabled.ompt_callback_nest_lock) {3128// release_lock_prev3129ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(3130ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3131}3132}3133#endif31343135#else // KMP_USE_DYNAMIC_LOCK31363137kmp_user_lock_p lck;31383139/* Can't use serial interval since not block structured */31403141if ((__kmp_user_lock_kind == lk_tas) &&3142(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=3143OMP_NEST_LOCK_T_SIZE)) {3144#if KMP_OS_LINUX && \3145(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)3146// "fast" path implemented to fix customer performance issue3147kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;3148#if USE_ITT_BUILD3149__kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);3150#endif /* USE_ITT_BUILD */31513152#if OMPT_SUPPORT && OMPT_OPTIONAL3153int release_status = KMP_LOCK_STILL_HELD;3154#endif31553156if (--(tl->lk.depth_locked) == 0) {3157TCW_4(tl->lk.poll, 0);3158#if OMPT_SUPPORT && OMPT_OPTIONAL3159release_status = KMP_LOCK_RELEASED;3160#endif3161}3162KMP_MB();31633164#if OMPT_SUPPORT && OMPT_OPTIONAL3165// This is the case, if called from omp_init_lock_with_hint:3166void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3167if (!codeptr)3168codeptr = OMPT_GET_RETURN_ADDRESS(0);3169if (ompt_enabled.enabled) {3170if (release_status == KMP_LOCK_RELEASED) {3171if (ompt_enabled.ompt_callback_mutex_released) {3172// release_lock_last3173ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3174ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3175}3176} else if (ompt_enabled.ompt_callback_nest_lock) {3177// release_lock_previous3178ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(3179ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3180}3181}3182#endif31833184return;3185#else3186lck = (kmp_user_lock_p)user_lock;3187#endif3188}3189#if KMP_USE_FUTEX3190else if ((__kmp_user_lock_kind == lk_futex) &&3191(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=3192OMP_NEST_LOCK_T_SIZE)) {3193lck = (kmp_user_lock_p)user_lock;3194}3195#endif3196else {3197lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");3198}31993200#if USE_ITT_BUILD3201__kmp_itt_lock_releasing(lck);3202#endif /* USE_ITT_BUILD */32033204int release_status;3205release_status = RELEASE_NESTED_LOCK(lck, gtid);3206#if OMPT_SUPPORT && OMPT_OPTIONAL3207// This is the case, if called from omp_init_lock_with_hint:3208void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3209if (!codeptr)3210codeptr = OMPT_GET_RETURN_ADDRESS(0);3211if (ompt_enabled.enabled) {3212if (release_status == KMP_LOCK_RELEASED) {3213if (ompt_enabled.ompt_callback_mutex_released) {3214// release_lock_last3215ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(3216ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3217}3218} else if (ompt_enabled.ompt_callback_nest_lock) {3219// release_lock_previous3220ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(3221ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3222}3223}3224#endif32253226#endif // KMP_USE_DYNAMIC_LOCK3227}32283229/* try to acquire the lock */3230int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {3231KMP_COUNT_BLOCK(OMP_test_lock);32323233#if KMP_USE_DYNAMIC_LOCK3234int rc;3235int tag = KMP_EXTRACT_D_TAG(user_lock);3236#if USE_ITT_BUILD3237__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);3238#endif3239#if OMPT_SUPPORT && OMPT_OPTIONAL3240// This is the case, if called from omp_init_lock_with_hint:3241void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3242if (!codeptr)3243codeptr = OMPT_GET_RETURN_ADDRESS(0);3244if (ompt_enabled.ompt_callback_mutex_acquire) {3245ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(3246ompt_mutex_test_lock, omp_lock_hint_none,3247__ompt_get_mutex_impl_type(user_lock),3248(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3249}3250#endif3251#if KMP_USE_INLINED_TAS3252if (tag == locktag_tas && !__kmp_env_consistency_check) {3253KMP_TEST_TAS_LOCK(user_lock, gtid, rc);3254} else3255#elif KMP_USE_INLINED_FUTEX3256if (tag == locktag_futex && !__kmp_env_consistency_check) {3257KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);3258} else3259#endif3260{3261rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);3262}3263if (rc) {3264#if USE_ITT_BUILD3265__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);3266#endif3267#if OMPT_SUPPORT && OMPT_OPTIONAL3268if (ompt_enabled.ompt_callback_mutex_acquired) {3269ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(3270ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3271}3272#endif3273return FTN_TRUE;3274} else {3275#if USE_ITT_BUILD3276__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);3277#endif3278return FTN_FALSE;3279}32803281#else // KMP_USE_DYNAMIC_LOCK32823283kmp_user_lock_p lck;3284int rc;32853286if ((__kmp_user_lock_kind == lk_tas) &&3287(sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {3288lck = (kmp_user_lock_p)user_lock;3289}3290#if KMP_USE_FUTEX3291else if ((__kmp_user_lock_kind == lk_futex) &&3292(sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {3293lck = (kmp_user_lock_p)user_lock;3294}3295#endif3296else {3297lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");3298}32993300#if USE_ITT_BUILD3301__kmp_itt_lock_acquiring(lck);3302#endif /* USE_ITT_BUILD */3303#if OMPT_SUPPORT && OMPT_OPTIONAL3304// This is the case, if called from omp_init_lock_with_hint:3305void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3306if (!codeptr)3307codeptr = OMPT_GET_RETURN_ADDRESS(0);3308if (ompt_enabled.ompt_callback_mutex_acquire) {3309ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(3310ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),3311(ompt_wait_id_t)(uintptr_t)lck, codeptr);3312}3313#endif33143315rc = TEST_LOCK(lck, gtid);3316#if USE_ITT_BUILD3317if (rc) {3318__kmp_itt_lock_acquired(lck);3319} else {3320__kmp_itt_lock_cancelled(lck);3321}3322#endif /* USE_ITT_BUILD */3323#if OMPT_SUPPORT && OMPT_OPTIONAL3324if (rc && ompt_enabled.ompt_callback_mutex_acquired) {3325ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(3326ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3327}3328#endif33293330return (rc ? FTN_TRUE : FTN_FALSE);33313332/* Can't use serial interval since not block structured */33333334#endif // KMP_USE_DYNAMIC_LOCK3335}33363337/* try to acquire the lock */3338int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {3339#if KMP_USE_DYNAMIC_LOCK3340int rc;3341#if USE_ITT_BUILD3342__kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);3343#endif3344#if OMPT_SUPPORT && OMPT_OPTIONAL3345// This is the case, if called from omp_init_lock_with_hint:3346void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3347if (!codeptr)3348codeptr = OMPT_GET_RETURN_ADDRESS(0);3349if (ompt_enabled.ompt_callback_mutex_acquire) {3350ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(3351ompt_mutex_test_nest_lock, omp_lock_hint_none,3352__ompt_get_mutex_impl_type(user_lock),3353(ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3354}3355#endif3356rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);3357#if USE_ITT_BUILD3358if (rc) {3359__kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);3360} else {3361__kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);3362}3363#endif3364#if OMPT_SUPPORT && OMPT_OPTIONAL3365if (ompt_enabled.enabled && rc) {3366if (rc == 1) {3367if (ompt_enabled.ompt_callback_mutex_acquired) {3368// lock_first3369ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(3370ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,3371codeptr);3372}3373} else {3374if (ompt_enabled.ompt_callback_nest_lock) {3375// lock_next3376ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(3377ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);3378}3379}3380}3381#endif3382return rc;33833384#else // KMP_USE_DYNAMIC_LOCK33853386kmp_user_lock_p lck;3387int rc;33883389if ((__kmp_user_lock_kind == lk_tas) &&3390(sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=3391OMP_NEST_LOCK_T_SIZE)) {3392lck = (kmp_user_lock_p)user_lock;3393}3394#if KMP_USE_FUTEX3395else if ((__kmp_user_lock_kind == lk_futex) &&3396(sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=3397OMP_NEST_LOCK_T_SIZE)) {3398lck = (kmp_user_lock_p)user_lock;3399}3400#endif3401else {3402lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");3403}34043405#if USE_ITT_BUILD3406__kmp_itt_lock_acquiring(lck);3407#endif /* USE_ITT_BUILD */34083409#if OMPT_SUPPORT && OMPT_OPTIONAL3410// This is the case, if called from omp_init_lock_with_hint:3411void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);3412if (!codeptr)3413codeptr = OMPT_GET_RETURN_ADDRESS(0);3414if (ompt_enabled.enabled) &&3415ompt_enabled.ompt_callback_mutex_acquire) {3416ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(3417ompt_mutex_test_nest_lock, omp_lock_hint_none,3418__ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,3419codeptr);3420}3421#endif34223423rc = TEST_NESTED_LOCK(lck, gtid);3424#if USE_ITT_BUILD3425if (rc) {3426__kmp_itt_lock_acquired(lck);3427} else {3428__kmp_itt_lock_cancelled(lck);3429}3430#endif /* USE_ITT_BUILD */3431#if OMPT_SUPPORT && OMPT_OPTIONAL3432if (ompt_enabled.enabled && rc) {3433if (rc == 1) {3434if (ompt_enabled.ompt_callback_mutex_acquired) {3435// lock_first3436ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(3437ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3438}3439} else {3440if (ompt_enabled.ompt_callback_nest_lock) {3441// lock_next3442ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(3443ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);3444}3445}3446}3447#endif3448return rc;34493450/* Can't use serial interval since not block structured */34513452#endif // KMP_USE_DYNAMIC_LOCK3453}34543455// Interface to fast scalable reduce methods routines34563457// keep the selected method in a thread local structure for cross-function3458// usage: will be used in __kmpc_end_reduce* functions;3459// another solution: to re-determine the method one more time in3460// __kmpc_end_reduce* functions (new prototype required then)3461// AT: which solution is better?3462#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \3463((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))34643465#define __KMP_GET_REDUCTION_METHOD(gtid) \3466(__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)34673468// description of the packed_reduction_method variable: look at the macros in3469// kmp.h34703471// used in a critical section reduce block3472static __forceinline void3473__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,3474kmp_critical_name *crit) {34753476// this lock was visible to a customer and to the threading profile tool as a3477// serial overhead span (although it's used for an internal purpose only)3478// why was it visible in previous implementation?3479// should we keep it visible in new reduce block?3480kmp_user_lock_p lck;34813482#if KMP_USE_DYNAMIC_LOCK34833484kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;3485// Check if it is initialized.3486if (*lk == 0) {3487if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {3488KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,3489KMP_GET_D_TAG(__kmp_user_lock_seq));3490} else {3491__kmp_init_indirect_csptr(crit, loc, global_tid,3492KMP_GET_I_TAG(__kmp_user_lock_seq));3493}3494}3495// Branch for accessing the actual lock object and set operation. This3496// branching is inevitable since this lock initialization does not follow the3497// normal dispatch path (lock table is not used).3498if (KMP_EXTRACT_D_TAG(lk) != 0) {3499lck = (kmp_user_lock_p)lk;3500KMP_DEBUG_ASSERT(lck != NULL);3501if (__kmp_env_consistency_check) {3502__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);3503}3504KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);3505} else {3506kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);3507lck = ilk->lock;3508KMP_DEBUG_ASSERT(lck != NULL);3509if (__kmp_env_consistency_check) {3510__kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);3511}3512KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);3513}35143515#else // KMP_USE_DYNAMIC_LOCK35163517// We know that the fast reduction code is only emitted by Intel compilers3518// with 32 byte critical sections. If there isn't enough space, then we3519// have to use a pointer.3520if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {3521lck = (kmp_user_lock_p)crit;3522} else {3523lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);3524}3525KMP_DEBUG_ASSERT(lck != NULL);35263527if (__kmp_env_consistency_check)3528__kmp_push_sync(global_tid, ct_critical, loc, lck);35293530__kmp_acquire_user_lock_with_checks(lck, global_tid);35313532#endif // KMP_USE_DYNAMIC_LOCK3533}35343535// used in a critical section reduce block3536static __forceinline void3537__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,3538kmp_critical_name *crit) {35393540kmp_user_lock_p lck;35413542#if KMP_USE_DYNAMIC_LOCK35433544if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {3545lck = (kmp_user_lock_p)crit;3546if (__kmp_env_consistency_check)3547__kmp_pop_sync(global_tid, ct_critical, loc);3548KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);3549} else {3550kmp_indirect_lock_t *ilk =3551(kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));3552if (__kmp_env_consistency_check)3553__kmp_pop_sync(global_tid, ct_critical, loc);3554KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);3555}35563557#else // KMP_USE_DYNAMIC_LOCK35583559// We know that the fast reduction code is only emitted by Intel compilers3560// with 32 byte critical sections. If there isn't enough space, then we have3561// to use a pointer.3562if (__kmp_base_user_lock_size > 32) {3563lck = *((kmp_user_lock_p *)crit);3564KMP_ASSERT(lck != NULL);3565} else {3566lck = (kmp_user_lock_p)crit;3567}35683569if (__kmp_env_consistency_check)3570__kmp_pop_sync(global_tid, ct_critical, loc);35713572__kmp_release_user_lock_with_checks(lck, global_tid);35733574#endif // KMP_USE_DYNAMIC_LOCK3575} // __kmp_end_critical_section_reduce_block35763577static __forceinline int3578__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,3579int *task_state) {3580kmp_team_t *team;35813582// Check if we are inside the teams construct?3583if (th->th.th_teams_microtask) {3584*team_p = team = th->th.th_team;3585if (team->t.t_level == th->th.th_teams_level) {3586// This is reduction at teams construct.3587KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 03588// Let's swap teams temporarily for the reduction.3589th->th.th_info.ds.ds_tid = team->t.t_master_tid;3590th->th.th_team = team->t.t_parent;3591th->th.th_team_nproc = th->th.th_team->t.t_nproc;3592th->th.th_task_team = th->th.th_team->t.t_task_team[0];3593*task_state = th->th.th_task_state;3594th->th.th_task_state = 0;35953596return 1;3597}3598}3599return 0;3600}36013602static __forceinline void3603__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {3604// Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.3605th->th.th_info.ds.ds_tid = 0;3606th->th.th_team = team;3607th->th.th_team_nproc = team->t.t_nproc;3608th->th.th_task_team = team->t.t_task_team[task_state];3609__kmp_type_convert(task_state, &(th->th.th_task_state));3610}36113612/* 2.a.i. Reduce Block without a terminating barrier */3613/*!3614@ingroup SYNCHRONIZATION3615@param loc source location information3616@param global_tid global thread number3617@param num_vars number of items (variables) to be reduced3618@param reduce_size size of data in bytes to be reduced3619@param reduce_data pointer to data to be reduced3620@param reduce_func callback function providing reduction operation on two3621operands and returning result of reduction in lhs_data3622@param lck pointer to the unique lock data structure3623@result 1 for the primary thread, 0 for all other team threads, 2 for all team3624threads if atomic reduction needed36253626The nowait version is used for a reduce clause with the nowait argument.3627*/3628kmp_int323629__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,3630size_t reduce_size, void *reduce_data,3631void (*reduce_func)(void *lhs_data, void *rhs_data),3632kmp_critical_name *lck) {36333634KMP_COUNT_BLOCK(REDUCE_nowait);3635int retval = 0;3636PACKED_REDUCTION_METHOD_T packed_reduction_method;3637kmp_info_t *th;3638kmp_team_t *team;3639int teams_swapped = 0, task_state;3640KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));3641__kmp_assert_valid_gtid(global_tid);36423643// why do we need this initialization here at all?3644// Reduction clause can not be used as a stand-alone directive.36453646// do not call __kmp_serial_initialize(), it will be called by3647// __kmp_parallel_initialize() if needed3648// possible detection of false-positive race by the threadchecker ???3649if (!TCR_4(__kmp_init_parallel))3650__kmp_parallel_initialize();36513652__kmp_resume_if_soft_paused();36533654// check correctness of reduce block nesting3655#if KMP_USE_DYNAMIC_LOCK3656if (__kmp_env_consistency_check)3657__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);3658#else3659if (__kmp_env_consistency_check)3660__kmp_push_sync(global_tid, ct_reduce, loc, NULL);3661#endif36623663th = __kmp_thread_from_gtid(global_tid);3664teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);36653666// packed_reduction_method value will be reused by __kmp_end_reduce* function,3667// the value should be kept in a variable3668// the variable should be either a construct-specific or thread-specific3669// property, not a team specific property3670// (a thread can reach the next reduce block on the next construct, reduce3671// method may differ on the next construct)3672// an ident_t "loc" parameter could be used as a construct-specific property3673// (what if loc == 0?)3674// (if both construct-specific and team-specific variables were shared,3675// then unness extra syncs should be needed)3676// a thread-specific variable is better regarding two issues above (next3677// construct and extra syncs)3678// a thread-specific "th_local.reduction_method" variable is used currently3679// each thread executes 'determine' and 'set' lines (no need to execute by one3680// thread, to avoid unness extra syncs)36813682packed_reduction_method = __kmp_determine_reduction_method(3683loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);3684__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);36853686OMPT_REDUCTION_DECL(th, global_tid);3687if (packed_reduction_method == critical_reduce_block) {36883689OMPT_REDUCTION_BEGIN;36903691__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);3692retval = 1;36933694} else if (packed_reduction_method == empty_reduce_block) {36953696OMPT_REDUCTION_BEGIN;36973698// usage: if team size == 1, no synchronization is required ( Intel3699// platforms only )3700retval = 1;37013702} else if (packed_reduction_method == atomic_reduce_block) {37033704retval = 2;37053706// all threads should do this pop here (because __kmpc_end_reduce_nowait()3707// won't be called by the code gen)3708// (it's not quite good, because the checking block has been closed by3709// this 'pop',3710// but atomic operation has not been executed yet, will be executed3711// slightly later, literally on next instruction)3712if (__kmp_env_consistency_check)3713__kmp_pop_sync(global_tid, ct_reduce, loc);37143715} else if (TEST_REDUCTION_METHOD(packed_reduction_method,3716tree_reduce_block)) {37173718// AT: performance issue: a real barrier here3719// AT: (if primary thread is slow, other threads are blocked here waiting for3720// the primary thread to come and release them)3721// AT: (it's not what a customer might expect specifying NOWAIT clause)3722// AT: (specifying NOWAIT won't result in improvement of performance, it'll3723// be confusing to a customer)3724// AT: another implementation of *barrier_gather*nowait() (or some other design)3725// might go faster and be more in line with sense of NOWAIT3726// AT: TO DO: do epcc test and compare times37273728// this barrier should be invisible to a customer and to the threading profile3729// tool (it's neither a terminating barrier nor customer's code, it's3730// used for an internal purpose)3731#if OMPT_SUPPORT3732// JP: can this barrier potentially leed to task scheduling?3733// JP: as long as there is a barrier in the implementation, OMPT should and3734// will provide the barrier events3735// so we set-up the necessary frame/return addresses.3736ompt_frame_t *ompt_frame;3737if (ompt_enabled.enabled) {3738__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);3739if (ompt_frame->enter_frame.ptr == NULL)3740ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);3741}3742OMPT_STORE_RETURN_ADDRESS(global_tid);3743#endif3744#if USE_ITT_NOTIFY3745__kmp_threads[global_tid]->th.th_ident = loc;3746#endif3747retval =3748__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),3749global_tid, FALSE, reduce_size, reduce_data, reduce_func);3750retval = (retval != 0) ? (0) : (1);3751#if OMPT_SUPPORT && OMPT_OPTIONAL3752if (ompt_enabled.enabled) {3753ompt_frame->enter_frame = ompt_data_none;3754}3755#endif37563757// all other workers except primary thread should do this pop here3758// ( none of other workers will get to __kmpc_end_reduce_nowait() )3759if (__kmp_env_consistency_check) {3760if (retval == 0) {3761__kmp_pop_sync(global_tid, ct_reduce, loc);3762}3763}37643765} else {37663767// should never reach this block3768KMP_ASSERT(0); // "unexpected method"3769}3770if (teams_swapped) {3771__kmp_restore_swapped_teams(th, team, task_state);3772}3773KA_TRACE(377410,3775("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",3776global_tid, packed_reduction_method, retval));37773778return retval;3779}37803781/*!3782@ingroup SYNCHRONIZATION3783@param loc source location information3784@param global_tid global thread id.3785@param lck pointer to the unique lock data structure37863787Finish the execution of a reduce nowait.3788*/3789void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,3790kmp_critical_name *lck) {37913792PACKED_REDUCTION_METHOD_T packed_reduction_method;37933794KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));3795__kmp_assert_valid_gtid(global_tid);37963797packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);37983799OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);38003801if (packed_reduction_method == critical_reduce_block) {38023803__kmp_end_critical_section_reduce_block(loc, global_tid, lck);3804OMPT_REDUCTION_END;38053806} else if (packed_reduction_method == empty_reduce_block) {38073808// usage: if team size == 1, no synchronization is required ( on Intel3809// platforms only )38103811OMPT_REDUCTION_END;38123813} else if (packed_reduction_method == atomic_reduce_block) {38143815// neither primary thread nor other workers should get here3816// (code gen does not generate this call in case 2: atomic reduce block)3817// actually it's better to remove this elseif at all;3818// after removal this value will checked by the 'else' and will assert38193820} else if (TEST_REDUCTION_METHOD(packed_reduction_method,3821tree_reduce_block)) {38223823// only primary thread gets here3824// OMPT: tree reduction is annotated in the barrier code38253826} else {38273828// should never reach this block3829KMP_ASSERT(0); // "unexpected method"3830}38313832if (__kmp_env_consistency_check)3833__kmp_pop_sync(global_tid, ct_reduce, loc);38343835KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",3836global_tid, packed_reduction_method));38373838return;3839}38403841/* 2.a.ii. Reduce Block with a terminating barrier */38423843/*!3844@ingroup SYNCHRONIZATION3845@param loc source location information3846@param global_tid global thread number3847@param num_vars number of items (variables) to be reduced3848@param reduce_size size of data in bytes to be reduced3849@param reduce_data pointer to data to be reduced3850@param reduce_func callback function providing reduction operation on two3851operands and returning result of reduction in lhs_data3852@param lck pointer to the unique lock data structure3853@result 1 for the primary thread, 0 for all other team threads, 2 for all team3854threads if atomic reduction needed38553856A blocking reduce that includes an implicit barrier.3857*/3858kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,3859size_t reduce_size, void *reduce_data,3860void (*reduce_func)(void *lhs_data, void *rhs_data),3861kmp_critical_name *lck) {3862KMP_COUNT_BLOCK(REDUCE_wait);3863int retval = 0;3864PACKED_REDUCTION_METHOD_T packed_reduction_method;3865kmp_info_t *th;3866kmp_team_t *team;3867int teams_swapped = 0, task_state;38683869KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));3870__kmp_assert_valid_gtid(global_tid);38713872// why do we need this initialization here at all?3873// Reduction clause can not be a stand-alone directive.38743875// do not call __kmp_serial_initialize(), it will be called by3876// __kmp_parallel_initialize() if needed3877// possible detection of false-positive race by the threadchecker ???3878if (!TCR_4(__kmp_init_parallel))3879__kmp_parallel_initialize();38803881__kmp_resume_if_soft_paused();38823883// check correctness of reduce block nesting3884#if KMP_USE_DYNAMIC_LOCK3885if (__kmp_env_consistency_check)3886__kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);3887#else3888if (__kmp_env_consistency_check)3889__kmp_push_sync(global_tid, ct_reduce, loc, NULL);3890#endif38913892th = __kmp_thread_from_gtid(global_tid);3893teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);38943895packed_reduction_method = __kmp_determine_reduction_method(3896loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);3897__KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);38983899OMPT_REDUCTION_DECL(th, global_tid);39003901if (packed_reduction_method == critical_reduce_block) {39023903OMPT_REDUCTION_BEGIN;3904__kmp_enter_critical_section_reduce_block(loc, global_tid, lck);3905retval = 1;39063907} else if (packed_reduction_method == empty_reduce_block) {39083909OMPT_REDUCTION_BEGIN;3910// usage: if team size == 1, no synchronization is required ( Intel3911// platforms only )3912retval = 1;39133914} else if (packed_reduction_method == atomic_reduce_block) {39153916retval = 2;39173918} else if (TEST_REDUCTION_METHOD(packed_reduction_method,3919tree_reduce_block)) {39203921// case tree_reduce_block:3922// this barrier should be visible to a customer and to the threading profile3923// tool (it's a terminating barrier on constructs if NOWAIT not specified)3924#if OMPT_SUPPORT3925ompt_frame_t *ompt_frame;3926if (ompt_enabled.enabled) {3927__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);3928if (ompt_frame->enter_frame.ptr == NULL)3929ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);3930}3931OMPT_STORE_RETURN_ADDRESS(global_tid);3932#endif3933#if USE_ITT_NOTIFY3934__kmp_threads[global_tid]->th.th_ident =3935loc; // needed for correct notification of frames3936#endif3937retval =3938__kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),3939global_tid, TRUE, reduce_size, reduce_data, reduce_func);3940retval = (retval != 0) ? (0) : (1);3941#if OMPT_SUPPORT && OMPT_OPTIONAL3942if (ompt_enabled.enabled) {3943ompt_frame->enter_frame = ompt_data_none;3944}3945#endif39463947// all other workers except primary thread should do this pop here3948// (none of other workers except primary will enter __kmpc_end_reduce())3949if (__kmp_env_consistency_check) {3950if (retval == 0) { // 0: all other workers; 1: primary thread3951__kmp_pop_sync(global_tid, ct_reduce, loc);3952}3953}39543955} else {39563957// should never reach this block3958KMP_ASSERT(0); // "unexpected method"3959}3960if (teams_swapped) {3961__kmp_restore_swapped_teams(th, team, task_state);3962}39633964KA_TRACE(10,3965("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",3966global_tid, packed_reduction_method, retval));3967return retval;3968}39693970/*!3971@ingroup SYNCHRONIZATION3972@param loc source location information3973@param global_tid global thread id.3974@param lck pointer to the unique lock data structure39753976Finish the execution of a blocking reduce.3977The <tt>lck</tt> pointer must be the same as that used in the corresponding3978start function.3979*/3980void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,3981kmp_critical_name *lck) {39823983PACKED_REDUCTION_METHOD_T packed_reduction_method;3984kmp_info_t *th;3985kmp_team_t *team;3986int teams_swapped = 0, task_state;39873988KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));3989__kmp_assert_valid_gtid(global_tid);39903991th = __kmp_thread_from_gtid(global_tid);3992teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);39933994packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);39953996// this barrier should be visible to a customer and to the threading profile3997// tool (it's a terminating barrier on constructs if NOWAIT not specified)3998OMPT_REDUCTION_DECL(th, global_tid);39994000if (packed_reduction_method == critical_reduce_block) {4001__kmp_end_critical_section_reduce_block(loc, global_tid, lck);40024003OMPT_REDUCTION_END;40044005// TODO: implicit barrier: should be exposed4006#if OMPT_SUPPORT4007ompt_frame_t *ompt_frame;4008if (ompt_enabled.enabled) {4009__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);4010if (ompt_frame->enter_frame.ptr == NULL)4011ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);4012}4013OMPT_STORE_RETURN_ADDRESS(global_tid);4014#endif4015#if USE_ITT_NOTIFY4016__kmp_threads[global_tid]->th.th_ident = loc;4017#endif4018__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);4019#if OMPT_SUPPORT && OMPT_OPTIONAL4020if (ompt_enabled.enabled) {4021ompt_frame->enter_frame = ompt_data_none;4022}4023#endif40244025} else if (packed_reduction_method == empty_reduce_block) {40264027OMPT_REDUCTION_END;40284029// usage: if team size==1, no synchronization is required (Intel platforms only)40304031// TODO: implicit barrier: should be exposed4032#if OMPT_SUPPORT4033ompt_frame_t *ompt_frame;4034if (ompt_enabled.enabled) {4035__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);4036if (ompt_frame->enter_frame.ptr == NULL)4037ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);4038}4039OMPT_STORE_RETURN_ADDRESS(global_tid);4040#endif4041#if USE_ITT_NOTIFY4042__kmp_threads[global_tid]->th.th_ident = loc;4043#endif4044__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);4045#if OMPT_SUPPORT && OMPT_OPTIONAL4046if (ompt_enabled.enabled) {4047ompt_frame->enter_frame = ompt_data_none;4048}4049#endif40504051} else if (packed_reduction_method == atomic_reduce_block) {40524053#if OMPT_SUPPORT4054ompt_frame_t *ompt_frame;4055if (ompt_enabled.enabled) {4056__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);4057if (ompt_frame->enter_frame.ptr == NULL)4058ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);4059}4060OMPT_STORE_RETURN_ADDRESS(global_tid);4061#endif4062// TODO: implicit barrier: should be exposed4063#if USE_ITT_NOTIFY4064__kmp_threads[global_tid]->th.th_ident = loc;4065#endif4066__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);4067#if OMPT_SUPPORT && OMPT_OPTIONAL4068if (ompt_enabled.enabled) {4069ompt_frame->enter_frame = ompt_data_none;4070}4071#endif40724073} else if (TEST_REDUCTION_METHOD(packed_reduction_method,4074tree_reduce_block)) {40754076// only primary thread executes here (primary releases all other workers)4077__kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),4078global_tid);40794080} else {40814082// should never reach this block4083KMP_ASSERT(0); // "unexpected method"4084}4085if (teams_swapped) {4086__kmp_restore_swapped_teams(th, team, task_state);4087}40884089if (__kmp_env_consistency_check)4090__kmp_pop_sync(global_tid, ct_reduce, loc);40914092KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",4093global_tid, packed_reduction_method));40944095return;4096}40974098#undef __KMP_GET_REDUCTION_METHOD4099#undef __KMP_SET_REDUCTION_METHOD41004101/* end of interface to fast scalable reduce routines */41024103kmp_uint64 __kmpc_get_taskid() {41044105kmp_int32 gtid;4106kmp_info_t *thread;41074108gtid = __kmp_get_gtid();4109if (gtid < 0) {4110return 0;4111}4112thread = __kmp_thread_from_gtid(gtid);4113return thread->th.th_current_task->td_task_id;41144115} // __kmpc_get_taskid41164117kmp_uint64 __kmpc_get_parent_taskid() {41184119kmp_int32 gtid;4120kmp_info_t *thread;4121kmp_taskdata_t *parent_task;41224123gtid = __kmp_get_gtid();4124if (gtid < 0) {4125return 0;4126}4127thread = __kmp_thread_from_gtid(gtid);4128parent_task = thread->th.th_current_task->td_parent;4129return (parent_task == NULL ? 0 : parent_task->td_task_id);41304131} // __kmpc_get_parent_taskid41324133/*!4134@ingroup WORK_SHARING4135@param loc source location information.4136@param gtid global thread number.4137@param num_dims number of associated doacross loops.4138@param dims info on loops bounds.41394140Initialize doacross loop information.4141Expect compiler send us inclusive bounds,4142e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2.4143*/4144void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,4145const struct kmp_dim *dims) {4146__kmp_assert_valid_gtid(gtid);4147int j, idx;4148kmp_int64 last, trace_count;4149kmp_info_t *th = __kmp_threads[gtid];4150kmp_team_t *team = th->th.th_team;4151kmp_uint32 *flags;4152kmp_disp_t *pr_buf = th->th.th_dispatch;4153dispatch_shared_info_t *sh_buf;41544155KA_TRACE(415620,4157("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",4158gtid, num_dims, !team->t.t_serialized));4159KMP_DEBUG_ASSERT(dims != NULL);4160KMP_DEBUG_ASSERT(num_dims > 0);41614162if (team->t.t_serialized) {4163KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));4164return; // no dependencies if team is serialized4165}4166KMP_DEBUG_ASSERT(team->t.t_nproc > 1);4167idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for4168// the next loop4169sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];41704171// Save bounds info into allocated private buffer4172KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);4173pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(4174th, sizeof(kmp_int64) * (4 * num_dims + 1));4175KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);4176pr_buf->th_doacross_info[0] =4177(kmp_int64)num_dims; // first element is number of dimensions4178// Save also address of num_done in order to access it later without knowing4179// the buffer index4180pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;4181pr_buf->th_doacross_info[2] = dims[0].lo;4182pr_buf->th_doacross_info[3] = dims[0].up;4183pr_buf->th_doacross_info[4] = dims[0].st;4184last = 5;4185for (j = 1; j < num_dims; ++j) {4186kmp_int644187range_length; // To keep ranges of all dimensions but the first dims[0]4188if (dims[j].st == 1) { // most common case4189// AC: should we care of ranges bigger than LLONG_MAX? (not for now)4190range_length = dims[j].up - dims[j].lo + 1;4191} else {4192if (dims[j].st > 0) {4193KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);4194range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;4195} else { // negative increment4196KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);4197range_length =4198(kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;4199}4200}4201pr_buf->th_doacross_info[last++] = range_length;4202pr_buf->th_doacross_info[last++] = dims[j].lo;4203pr_buf->th_doacross_info[last++] = dims[j].up;4204pr_buf->th_doacross_info[last++] = dims[j].st;4205}42064207// Compute total trip count.4208// Start with range of dims[0] which we don't need to keep in the buffer.4209if (dims[0].st == 1) { // most common case4210trace_count = dims[0].up - dims[0].lo + 1;4211} else if (dims[0].st > 0) {4212KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);4213trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;4214} else { // negative increment4215KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);4216trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;4217}4218for (j = 1; j < num_dims; ++j) {4219trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges4220}4221KMP_DEBUG_ASSERT(trace_count > 0);42224223// Check if shared buffer is not occupied by other loop (idx -4224// __kmp_dispatch_num_buffers)4225if (idx != sh_buf->doacross_buf_idx) {4226// Shared buffer is occupied, wait for it to be free4227__kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,4228__kmp_eq_4, NULL);4229}4230#if KMP_32_BIT_ARCH4231// Check if we are the first thread. After the CAS the first thread gets 0,4232// others get 1 if initialization is in progress, allocated pointer otherwise.4233// Treat pointer as volatile integer (value 0 or 1) until memory is allocated.4234flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(4235(volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);4236#else4237flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(4238(volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);4239#endif4240if (flags == NULL) {4241// we are the first thread, allocate the array of flags4242size_t size =4243(size_t)trace_count / 8 + 8; // in bytes, use single bit per iteration4244flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);4245KMP_MB();4246sh_buf->doacross_flags = flags;4247} else if (flags == (kmp_uint32 *)1) {4248#if KMP_32_BIT_ARCH4249// initialization is still in progress, need to wait4250while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)4251#else4252while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)4253#endif4254KMP_YIELD(TRUE);4255KMP_MB();4256} else {4257KMP_MB();4258}4259KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value4260pr_buf->th_doacross_flags =4261sh_buf->doacross_flags; // save private copy in order to not4262// touch shared buffer on each iteration4263KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));4264}42654266void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {4267__kmp_assert_valid_gtid(gtid);4268kmp_int64 shft;4269size_t num_dims, i;4270kmp_uint32 flag;4271kmp_int64 iter_number; // iteration number of "collapsed" loop nest4272kmp_info_t *th = __kmp_threads[gtid];4273kmp_team_t *team = th->th.th_team;4274kmp_disp_t *pr_buf;4275kmp_int64 lo, up, st;42764277KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));4278if (team->t.t_serialized) {4279KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));4280return; // no dependencies if team is serialized4281}42824283// calculate sequential iteration number and check out-of-bounds condition4284pr_buf = th->th.th_dispatch;4285KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);4286num_dims = (size_t)pr_buf->th_doacross_info[0];4287lo = pr_buf->th_doacross_info[2];4288up = pr_buf->th_doacross_info[3];4289st = pr_buf->th_doacross_info[4];4290#if OMPT_SUPPORT && OMPT_OPTIONAL4291SimpleVLA<ompt_dependence_t> deps(num_dims);4292#endif4293if (st == 1) { // most common case4294if (vec[0] < lo || vec[0] > up) {4295KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4296"bounds [%lld,%lld]\n",4297gtid, vec[0], lo, up));4298return;4299}4300iter_number = vec[0] - lo;4301} else if (st > 0) {4302if (vec[0] < lo || vec[0] > up) {4303KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4304"bounds [%lld,%lld]\n",4305gtid, vec[0], lo, up));4306return;4307}4308iter_number = (kmp_uint64)(vec[0] - lo) / st;4309} else { // negative increment4310if (vec[0] > lo || vec[0] < up) {4311KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4312"bounds [%lld,%lld]\n",4313gtid, vec[0], lo, up));4314return;4315}4316iter_number = (kmp_uint64)(lo - vec[0]) / (-st);4317}4318#if OMPT_SUPPORT && OMPT_OPTIONAL4319deps[0].variable.value = iter_number;4320deps[0].dependence_type = ompt_dependence_type_sink;4321#endif4322for (i = 1; i < num_dims; ++i) {4323kmp_int64 iter, ln;4324size_t j = i * 4;4325ln = pr_buf->th_doacross_info[j + 1];4326lo = pr_buf->th_doacross_info[j + 2];4327up = pr_buf->th_doacross_info[j + 3];4328st = pr_buf->th_doacross_info[j + 4];4329if (st == 1) {4330if (vec[i] < lo || vec[i] > up) {4331KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4332"bounds [%lld,%lld]\n",4333gtid, vec[i], lo, up));4334return;4335}4336iter = vec[i] - lo;4337} else if (st > 0) {4338if (vec[i] < lo || vec[i] > up) {4339KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4340"bounds [%lld,%lld]\n",4341gtid, vec[i], lo, up));4342return;4343}4344iter = (kmp_uint64)(vec[i] - lo) / st;4345} else { // st < 04346if (vec[i] > lo || vec[i] < up) {4347KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "4348"bounds [%lld,%lld]\n",4349gtid, vec[i], lo, up));4350return;4351}4352iter = (kmp_uint64)(lo - vec[i]) / (-st);4353}4354iter_number = iter + ln * iter_number;4355#if OMPT_SUPPORT && OMPT_OPTIONAL4356deps[i].variable.value = iter;4357deps[i].dependence_type = ompt_dependence_type_sink;4358#endif4359}4360shft = iter_number % 32; // use 32-bit granularity4361iter_number >>= 5; // divided by 324362flag = 1 << shft;4363while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {4364KMP_YIELD(TRUE);4365}4366KMP_MB();4367#if OMPT_SUPPORT && OMPT_OPTIONAL4368if (ompt_enabled.ompt_callback_dependences) {4369ompt_callbacks.ompt_callback(ompt_callback_dependences)(4370&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);4371}4372#endif4373KA_TRACE(20,4374("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",4375gtid, (iter_number << 5) + shft));4376}43774378void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {4379__kmp_assert_valid_gtid(gtid);4380kmp_int64 shft;4381size_t num_dims, i;4382kmp_uint32 flag;4383kmp_int64 iter_number; // iteration number of "collapsed" loop nest4384kmp_info_t *th = __kmp_threads[gtid];4385kmp_team_t *team = th->th.th_team;4386kmp_disp_t *pr_buf;4387kmp_int64 lo, st;43884389KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));4390if (team->t.t_serialized) {4391KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));4392return; // no dependencies if team is serialized4393}43944395// calculate sequential iteration number (same as in "wait" but no4396// out-of-bounds checks)4397pr_buf = th->th.th_dispatch;4398KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);4399num_dims = (size_t)pr_buf->th_doacross_info[0];4400lo = pr_buf->th_doacross_info[2];4401st = pr_buf->th_doacross_info[4];4402#if OMPT_SUPPORT && OMPT_OPTIONAL4403SimpleVLA<ompt_dependence_t> deps(num_dims);4404#endif4405if (st == 1) { // most common case4406iter_number = vec[0] - lo;4407} else if (st > 0) {4408iter_number = (kmp_uint64)(vec[0] - lo) / st;4409} else { // negative increment4410iter_number = (kmp_uint64)(lo - vec[0]) / (-st);4411}4412#if OMPT_SUPPORT && OMPT_OPTIONAL4413deps[0].variable.value = iter_number;4414deps[0].dependence_type = ompt_dependence_type_source;4415#endif4416for (i = 1; i < num_dims; ++i) {4417kmp_int64 iter, ln;4418size_t j = i * 4;4419ln = pr_buf->th_doacross_info[j + 1];4420lo = pr_buf->th_doacross_info[j + 2];4421st = pr_buf->th_doacross_info[j + 4];4422if (st == 1) {4423iter = vec[i] - lo;4424} else if (st > 0) {4425iter = (kmp_uint64)(vec[i] - lo) / st;4426} else { // st < 04427iter = (kmp_uint64)(lo - vec[i]) / (-st);4428}4429iter_number = iter + ln * iter_number;4430#if OMPT_SUPPORT && OMPT_OPTIONAL4431deps[i].variable.value = iter;4432deps[i].dependence_type = ompt_dependence_type_source;4433#endif4434}4435#if OMPT_SUPPORT && OMPT_OPTIONAL4436if (ompt_enabled.ompt_callback_dependences) {4437ompt_callbacks.ompt_callback(ompt_callback_dependences)(4438&(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);4439}4440#endif4441shft = iter_number % 32; // use 32-bit granularity4442iter_number >>= 5; // divided by 324443flag = 1 << shft;4444KMP_MB();4445if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)4446KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);4447KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,4448(iter_number << 5) + shft));4449}44504451void __kmpc_doacross_fini(ident_t *loc, int gtid) {4452__kmp_assert_valid_gtid(gtid);4453kmp_int32 num_done;4454kmp_info_t *th = __kmp_threads[gtid];4455kmp_team_t *team = th->th.th_team;4456kmp_disp_t *pr_buf = th->th.th_dispatch;44574458KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));4459if (team->t.t_serialized) {4460KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));4461return; // nothing to do4462}4463num_done =4464KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;4465if (num_done == th->th.th_team_nproc) {4466// we are the last thread, need to free shared resources4467int idx = pr_buf->th_doacross_buf_idx - 1;4468dispatch_shared_info_t *sh_buf =4469&team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];4470KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==4471(kmp_int64)&sh_buf->doacross_num_done);4472KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);4473KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);4474__kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));4475sh_buf->doacross_flags = NULL;4476sh_buf->doacross_num_done = 0;4477sh_buf->doacross_buf_idx +=4478__kmp_dispatch_num_buffers; // free buffer for future re-use4479}4480// free private resources (need to keep buffer index forever)4481pr_buf->th_doacross_flags = NULL;4482__kmp_thread_free(th, (void *)pr_buf->th_doacross_info);4483pr_buf->th_doacross_info = NULL;4484KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));4485}44864487/* OpenMP 5.1 Memory Management routines */4488void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {4489return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);4490}44914492void *omp_aligned_alloc(size_t align, size_t size,4493omp_allocator_handle_t allocator) {4494return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);4495}44964497void *omp_calloc(size_t nmemb, size_t size, omp_allocator_handle_t allocator) {4498return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);4499}45004501void *omp_aligned_calloc(size_t align, size_t nmemb, size_t size,4502omp_allocator_handle_t allocator) {4503return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);4504}45054506void *omp_realloc(void *ptr, size_t size, omp_allocator_handle_t allocator,4507omp_allocator_handle_t free_allocator) {4508return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,4509free_allocator);4510}45114512void omp_free(void *ptr, omp_allocator_handle_t allocator) {4513___kmpc_free(__kmp_entry_gtid(), ptr, allocator);4514}4515/* end of OpenMP 5.1 Memory Management routines */45164517int __kmpc_get_target_offload(void) {4518if (!__kmp_init_serial) {4519__kmp_serial_initialize();4520}4521return __kmp_target_offload;4522}45234524int __kmpc_pause_resource(kmp_pause_status_t level) {4525if (!__kmp_init_serial) {4526return 1; // Can't pause if runtime is not initialized4527}4528return __kmp_pause_resource(level);4529}45304531void __kmpc_error(ident_t *loc, int severity, const char *message) {4532if (!__kmp_init_serial)4533__kmp_serial_initialize();45344535KMP_ASSERT(severity == severity_warning || severity == severity_fatal);45364537#if OMPT_SUPPORT4538if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {4539ompt_callbacks.ompt_callback(ompt_callback_error)(4540(ompt_severity_t)severity, message, KMP_STRLEN(message),4541OMPT_GET_RETURN_ADDRESS(0));4542}4543#endif // OMPT_SUPPORT45444545char *src_loc;4546if (loc && loc->psource) {4547kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);4548src_loc =4549__kmp_str_format("%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);4550__kmp_str_loc_free(&str_loc);4551} else {4552src_loc = __kmp_str_format("unknown");4553}45544555if (severity == severity_warning)4556KMP_WARNING(UserDirectedWarning, src_loc, message);4557else4558KMP_FATAL(UserDirectedError, src_loc, message);45594560__kmp_str_free(&src_loc);4561}45624563// Mark begin of scope directive.4564void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {4565// reserved is for extension of scope directive and not used.4566#if OMPT_SUPPORT && OMPT_OPTIONAL4567if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {4568kmp_team_t *team = __kmp_threads[gtid]->th.th_team;4569int tid = __kmp_tid_from_gtid(gtid);4570ompt_callbacks.ompt_callback(ompt_callback_work)(4571ompt_work_scope, ompt_scope_begin,4572&(team->t.ompt_team_info.parallel_data),4573&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,4574OMPT_GET_RETURN_ADDRESS(0));4575}4576#endif // OMPT_SUPPORT && OMPT_OPTIONAL4577}45784579// Mark end of scope directive4580void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved) {4581// reserved is for extension of scope directive and not used.4582#if OMPT_SUPPORT && OMPT_OPTIONAL4583if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {4584kmp_team_t *team = __kmp_threads[gtid]->th.th_team;4585int tid = __kmp_tid_from_gtid(gtid);4586ompt_callbacks.ompt_callback(ompt_callback_work)(4587ompt_work_scope, ompt_scope_end,4588&(team->t.ompt_team_info.parallel_data),4589&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,4590OMPT_GET_RETURN_ADDRESS(0));4591}4592#endif // OMPT_SUPPORT && OMPT_OPTIONAL4593}45944595#ifdef KMP_USE_VERSION_SYMBOLS4596// For GOMP compatibility there are two versions of each omp_* API.4597// One is the plain C symbol and one is the Fortran symbol with an appended4598// underscore. When we implement a specific ompc_* version of an omp_*4599// function, we want the plain GOMP versioned symbol to alias the ompc_* version4600// instead of the Fortran versions in kmp_ftn_entry.h4601extern "C" {4602// Have to undef these from omp.h so they aren't translated into4603// their ompc counterparts in the KMP_VERSION_OMPC_SYMBOL macros below4604#ifdef omp_set_affinity_format4605#undef omp_set_affinity_format4606#endif4607#ifdef omp_get_affinity_format4608#undef omp_get_affinity_format4609#endif4610#ifdef omp_display_affinity4611#undef omp_display_affinity4612#endif4613#ifdef omp_capture_affinity4614#undef omp_capture_affinity4615#endif4616KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,4617"OMP_5.0");4618KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,4619"OMP_5.0");4620KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,4621"OMP_5.0");4622KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,4623"OMP_5.0");4624} // extern "C"4625#endif462646274628