Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_gsupport.cpp
35258 views
/*1* kmp_gsupport.cpp2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#include "kmp.h"13#include "kmp_atomic.h"14#include "kmp_utils.h"1516#if OMPT_SUPPORT17#include "ompt-specific.h"18#endif1920enum {21KMP_GOMP_TASK_UNTIED_FLAG = 1,22KMP_GOMP_TASK_FINAL_FLAG = 2,23KMP_GOMP_TASK_DEPENDS_FLAG = 824};2526enum {27KMP_GOMP_DEPOBJ_IN = 1,28KMP_GOMP_DEPOBJ_OUT = 2,29KMP_GOMP_DEPOBJ_INOUT = 3,30KMP_GOMP_DEPOBJ_MTXINOUTSET = 431};3233// This class helps convert gomp dependency info into34// kmp_depend_info_t structures35class kmp_gomp_depends_info_t {36void **depend;37kmp_int32 num_deps;38size_t num_out, num_mutexinout, num_in, num_depobj;39size_t offset;4041public:42kmp_gomp_depends_info_t(void **depend) : depend(depend) {43size_t ndeps = (kmp_intptr_t)depend[0];44// GOMP taskdep structure:45// if depend[0] != 0:46// depend = [ ndeps | nout | &out | ... | &out | &in | ... | &in ]47//48// if depend[0] == 0:49// depend = [ 0 | ndeps | nout | nmtx | nin | &out | ... | &out | &mtx |50// ... | &mtx | &in | ... | &in | &depobj | ... | &depobj ]51if (ndeps) {52num_out = (kmp_intptr_t)depend[1];53num_in = ndeps - num_out;54num_mutexinout = num_depobj = 0;55offset = 2;56} else {57ndeps = (kmp_intptr_t)depend[1];58num_out = (kmp_intptr_t)depend[2];59num_mutexinout = (kmp_intptr_t)depend[3];60num_in = (kmp_intptr_t)depend[4];61num_depobj = ndeps - num_out - num_mutexinout - num_in;62KMP_ASSERT(num_depobj <= ndeps);63offset = 5;64}65num_deps = static_cast<kmp_int32>(ndeps);66}67kmp_int32 get_num_deps() const { return num_deps; }68kmp_depend_info_t get_kmp_depend(size_t index) const {69kmp_depend_info_t retval;70memset(&retval, '\0', sizeof(retval));71KMP_ASSERT(index < (size_t)num_deps);72retval.len = 0;73// Because inout and out are logically equivalent,74// use inout and in dependency flags. GOMP does not provide a75// way to distinguish if user specified out vs. inout.76if (index < num_out) {77retval.flags.in = 1;78retval.flags.out = 1;79retval.base_addr = (kmp_intptr_t)depend[offset + index];80} else if (index >= num_out && index < (num_out + num_mutexinout)) {81retval.flags.mtx = 1;82retval.base_addr = (kmp_intptr_t)depend[offset + index];83} else if (index >= (num_out + num_mutexinout) &&84index < (num_out + num_mutexinout + num_in)) {85retval.flags.in = 1;86retval.base_addr = (kmp_intptr_t)depend[offset + index];87} else {88// depobj is a two element array (size of elements are size of pointer)89// depobj[0] = base_addr90// depobj[1] = type (in, out, inout, mutexinoutset, etc.)91kmp_intptr_t *depobj = (kmp_intptr_t *)depend[offset + index];92retval.base_addr = depobj[0];93switch (depobj[1]) {94case KMP_GOMP_DEPOBJ_IN:95retval.flags.in = 1;96break;97case KMP_GOMP_DEPOBJ_OUT:98retval.flags.out = 1;99break;100case KMP_GOMP_DEPOBJ_INOUT:101retval.flags.in = 1;102retval.flags.out = 1;103break;104case KMP_GOMP_DEPOBJ_MTXINOUTSET:105retval.flags.mtx = 1;106break;107default:108KMP_FATAL(GompFeatureNotSupported, "Unknown depobj type");109}110}111return retval;112}113};114115#ifdef __cplusplus116extern "C" {117#endif // __cplusplus118119#define MKLOC(loc, routine) \120static ident_t loc = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};121122#include "kmp_ftn_os.h"123124void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {125int gtid = __kmp_entry_gtid();126MKLOC(loc, "GOMP_barrier");127KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));128#if OMPT_SUPPORT && OMPT_OPTIONAL129ompt_frame_t *ompt_frame;130if (ompt_enabled.enabled) {131__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);132ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);133}134OMPT_STORE_RETURN_ADDRESS(gtid);135#endif136__kmpc_barrier(&loc, gtid);137#if OMPT_SUPPORT && OMPT_OPTIONAL138if (ompt_enabled.enabled) {139ompt_frame->enter_frame = ompt_data_none;140}141#endif142}143144// Mutual exclusion145146// The symbol that icc/ifort generates for unnamed critical sections147// - .gomp_critical_user_ - is defined using .comm in any objects reference it.148// We can't reference it directly here in C code, as the symbol contains a ".".149//150// The RTL contains an assembly language definition of .gomp_critical_user_151// with another symbol __kmp_unnamed_critical_addr initialized with it's152// address.153extern kmp_critical_name *__kmp_unnamed_critical_addr;154155void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_START)(void) {156int gtid = __kmp_entry_gtid();157MKLOC(loc, "GOMP_critical_start");158KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid));159#if OMPT_SUPPORT && OMPT_OPTIONAL160OMPT_STORE_RETURN_ADDRESS(gtid);161#endif162__kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr);163}164165void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_END)(void) {166int gtid = __kmp_get_gtid();167MKLOC(loc, "GOMP_critical_end");168KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid));169#if OMPT_SUPPORT && OMPT_OPTIONAL170OMPT_STORE_RETURN_ADDRESS(gtid);171#endif172__kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr);173}174175void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) {176int gtid = __kmp_entry_gtid();177MKLOC(loc, "GOMP_critical_name_start");178KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid));179__kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr);180}181182void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) {183int gtid = __kmp_get_gtid();184MKLOC(loc, "GOMP_critical_name_end");185KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid));186__kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr);187}188189// The Gnu codegen tries to use locked operations to perform atomic updates190// inline. If it can't, then it calls GOMP_atomic_start() before performing191// the update and GOMP_atomic_end() afterward, regardless of the data type.192void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_START)(void) {193int gtid = __kmp_entry_gtid();194KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid));195196#if OMPT_SUPPORT197__ompt_thread_assign_wait_id(0);198#endif199200__kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);201}202203void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ATOMIC_END)(void) {204int gtid = __kmp_get_gtid();205KA_TRACE(20, ("GOMP_atomic_end: T#%d\n", gtid));206__kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);207}208209int KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_START)(void) {210int gtid = __kmp_entry_gtid();211MKLOC(loc, "GOMP_single_start");212KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid));213214if (!TCR_4(__kmp_init_parallel))215__kmp_parallel_initialize();216__kmp_resume_if_soft_paused();217218// 3rd parameter == FALSE prevents kmp_enter_single from pushing a219// workshare when USE_CHECKS is defined. We need to avoid the push,220// as there is no corresponding GOMP_single_end() call.221kmp_int32 rc = __kmp_enter_single(gtid, &loc, FALSE);222223#if OMPT_SUPPORT && OMPT_OPTIONAL224kmp_info_t *this_thr = __kmp_threads[gtid];225kmp_team_t *team = this_thr->th.th_team;226int tid = __kmp_tid_from_gtid(gtid);227228if (ompt_enabled.enabled) {229if (rc) {230if (ompt_enabled.ompt_callback_work) {231ompt_callbacks.ompt_callback(ompt_callback_work)(232ompt_work_single_executor, ompt_scope_begin,233&(team->t.ompt_team_info.parallel_data),234&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),2351, OMPT_GET_RETURN_ADDRESS(0));236}237} else {238if (ompt_enabled.ompt_callback_work) {239ompt_callbacks.ompt_callback(ompt_callback_work)(240ompt_work_single_other, ompt_scope_begin,241&(team->t.ompt_team_info.parallel_data),242&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),2431, OMPT_GET_RETURN_ADDRESS(0));244ompt_callbacks.ompt_callback(ompt_callback_work)(245ompt_work_single_other, ompt_scope_end,246&(team->t.ompt_team_info.parallel_data),247&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),2481, OMPT_GET_RETURN_ADDRESS(0));249}250}251}252#endif253254return rc;255}256257void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {258void *retval;259int gtid = __kmp_entry_gtid();260MKLOC(loc, "GOMP_single_copy_start");261KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid));262263if (!TCR_4(__kmp_init_parallel))264__kmp_parallel_initialize();265__kmp_resume_if_soft_paused();266267// If this is the first thread to enter, return NULL. The generated code will268// then call GOMP_single_copy_end() for this thread only, with the269// copyprivate data pointer as an argument.270if (__kmp_enter_single(gtid, &loc, FALSE))271return NULL;272273// Wait for the first thread to set the copyprivate data pointer,274// and for all other threads to reach this point.275276#if OMPT_SUPPORT && OMPT_OPTIONAL277ompt_frame_t *ompt_frame;278if (ompt_enabled.enabled) {279__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);280ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);281}282OMPT_STORE_RETURN_ADDRESS(gtid);283#endif284__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);285286// Retrieve the value of the copyprivate data point, and wait for all287// threads to do likewise, then return.288retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data;289{290#if OMPT_SUPPORT && OMPT_OPTIONAL291OMPT_STORE_RETURN_ADDRESS(gtid);292#endif293__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);294}295#if OMPT_SUPPORT && OMPT_OPTIONAL296if (ompt_enabled.enabled) {297ompt_frame->enter_frame = ompt_data_none;298}299#endif300return retval;301}302303void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {304int gtid = __kmp_get_gtid();305KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid));306307// Set the copyprivate data pointer fo the team, then hit the barrier so that308// the other threads will continue on and read it. Hit another barrier before309// continuing, so that the know that the copyprivate data pointer has been310// propagated to all threads before trying to reuse the t_copypriv_data field.311__kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;312#if OMPT_SUPPORT && OMPT_OPTIONAL313ompt_frame_t *ompt_frame;314if (ompt_enabled.enabled) {315__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);316ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);317}318OMPT_STORE_RETURN_ADDRESS(gtid);319#endif320__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);321{322#if OMPT_SUPPORT && OMPT_OPTIONAL323OMPT_STORE_RETURN_ADDRESS(gtid);324#endif325__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);326}327#if OMPT_SUPPORT && OMPT_OPTIONAL328if (ompt_enabled.enabled) {329ompt_frame->enter_frame = ompt_data_none;330}331#endif332}333334void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_START)(void) {335int gtid = __kmp_entry_gtid();336MKLOC(loc, "GOMP_ordered_start");337KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));338#if OMPT_SUPPORT && OMPT_OPTIONAL339OMPT_STORE_RETURN_ADDRESS(gtid);340#endif341__kmpc_ordered(&loc, gtid);342}343344void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ORDERED_END)(void) {345int gtid = __kmp_get_gtid();346MKLOC(loc, "GOMP_ordered_end");347KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid));348#if OMPT_SUPPORT && OMPT_OPTIONAL349OMPT_STORE_RETURN_ADDRESS(gtid);350#endif351__kmpc_end_ordered(&loc, gtid);352}353354// Dispatch macro defs355//356// They come in two flavors: 64-bit unsigned, and either 32-bit signed357// (IA-32 architecture) or 64-bit signed (Intel(R) 64).358359#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \360KMP_ARCH_PPC || KMP_ARCH_AARCH64_32361#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4362#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4363#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4364#else365#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8366#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8367#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8368#endif /* KMP_ARCH_X86 */369370#define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u371#define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u372#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u373374// The parallel construct375376#ifndef KMP_DEBUG377static378#endif /* KMP_DEBUG */379void380__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *),381void *data) {382#if OMPT_SUPPORT383kmp_info_t *thr;384ompt_frame_t *ompt_frame;385ompt_state_t enclosing_state;386387if (ompt_enabled.enabled) {388// get pointer to thread data structure389thr = __kmp_threads[*gtid];390391// save enclosing task state; set current state for task392enclosing_state = thr->th.ompt_thread_info.state;393thr->th.ompt_thread_info.state = ompt_state_work_parallel;394395// set task frame396__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);397ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);398}399#endif400401task(data);402403#if OMPT_SUPPORT404if (ompt_enabled.enabled) {405// clear task frame406ompt_frame->exit_frame = ompt_data_none;407408// restore enclosing state409thr->th.ompt_thread_info.state = enclosing_state;410}411#endif412}413414#ifndef KMP_DEBUG415static416#endif /* KMP_DEBUG */417void418__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr,419void (*task)(void *), void *data,420unsigned num_threads, ident_t *loc,421enum sched_type schedule, long start,422long end, long incr,423long chunk_size) {424// Initialize the loop worksharing construct.425426KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size,427schedule != kmp_sch_static);428429#if OMPT_SUPPORT430kmp_info_t *thr;431ompt_frame_t *ompt_frame;432ompt_state_t enclosing_state;433434if (ompt_enabled.enabled) {435thr = __kmp_threads[*gtid];436// save enclosing task state; set current state for task437enclosing_state = thr->th.ompt_thread_info.state;438thr->th.ompt_thread_info.state = ompt_state_work_parallel;439440// set task frame441__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);442ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);443}444#endif445446// Now invoke the microtask.447task(data);448449#if OMPT_SUPPORT450if (ompt_enabled.enabled) {451// clear task frame452ompt_frame->exit_frame = ompt_data_none;453454// reset enclosing state455thr->th.ompt_thread_info.state = enclosing_state;456}457#endif458}459460static void __kmp_GOMP_fork_call(ident_t *loc, int gtid, unsigned num_threads,461unsigned flags, void (*unwrapped_task)(void *),462microtask_t wrapper, int argc, ...) {463int rc;464kmp_info_t *thr = __kmp_threads[gtid];465kmp_team_t *team = thr->th.th_team;466int tid = __kmp_tid_from_gtid(gtid);467468va_list ap;469va_start(ap, argc);470471if (num_threads != 0)472__kmp_push_num_threads(loc, gtid, num_threads);473if (flags != 0)474__kmp_push_proc_bind(loc, gtid, (kmp_proc_bind_t)flags);475rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, wrapper,476__kmp_invoke_task_func, kmp_va_addr_of(ap));477478va_end(ap);479480if (rc) {481__kmp_run_before_invoked_task(gtid, tid, thr, team);482}483484#if OMPT_SUPPORT485int ompt_team_size;486if (ompt_enabled.enabled) {487ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);488ompt_task_info_t *task_info = __ompt_get_task_info_object(0);489490// implicit task callback491if (ompt_enabled.ompt_callback_implicit_task) {492ompt_team_size = __kmp_team_from_gtid(gtid)->t.t_nproc;493ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(494ompt_scope_begin, &(team_info->parallel_data),495&(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid),496ompt_task_implicit); // TODO: Can this be ompt_task_initial?497task_info->thread_num = __kmp_tid_from_gtid(gtid);498}499thr->th.ompt_thread_info.state = ompt_state_work_parallel;500}501#endif502}503504void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),505void *data,506unsigned num_threads) {507int gtid = __kmp_entry_gtid();508509#if OMPT_SUPPORT510ompt_frame_t *parent_frame, *frame;511512if (ompt_enabled.enabled) {513__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);514parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);515}516OMPT_STORE_RETURN_ADDRESS(gtid);517#endif518519MKLOC(loc, "GOMP_parallel_start");520KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid));521__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,522(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,523data);524#if OMPT_SUPPORT525if (ompt_enabled.enabled) {526__ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);527frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);528}529#endif530#if OMPD_SUPPORT531if (ompd_state & OMPD_ENABLE_BP)532ompd_bp_parallel_begin();533#endif534}535536void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {537int gtid = __kmp_get_gtid();538kmp_info_t *thr;539540thr = __kmp_threads[gtid];541542MKLOC(loc, "GOMP_parallel_end");543KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid));544545if (!thr->th.th_team->t.t_serialized) {546__kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr,547thr->th.th_team);548}549#if OMPT_SUPPORT550if (ompt_enabled.enabled) {551// Implicit task is finished here, in the barrier we might schedule552// deferred tasks,553// these don't see the implicit task on the stack554OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;555}556#endif557558__kmp_join_call(&loc, gtid559#if OMPT_SUPPORT560,561fork_context_gnu562#endif563);564#if OMPD_SUPPORT565if (ompd_state & OMPD_ENABLE_BP)566ompd_bp_parallel_end();567#endif568}569570// Loop worksharing constructs571572// The Gnu codegen passes in an exclusive upper bound for the overall range,573// but the libguide dispatch code expects an inclusive upper bound, hence the574// "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th575// argument to __kmp_GOMP_fork_call).576//577// Conversely, KMP_DISPATCH_NEXT returns and inclusive upper bound in *p_ub,578// but the Gnu codegen expects an exclusive upper bound, so the adjustment579// "*p_ub += stride" compensates for the discrepancy.580//581// Correction: the gnu codegen always adjusts the upper bound by +-1, not the582// stride value. We adjust the dispatch parameters accordingly (by +-1), but583// we still adjust p_ub by the actual stride value.584//585// The "runtime" versions do not take a chunk_sz parameter.586//587// The profile lib cannot support construct checking of unordered loops that588// are predetermined by the compiler to be statically scheduled, as the gcc589// codegen will not always emit calls to GOMP_loop_static_next() to get the590// next iteration. Instead, it emits inline code to call omp_get_thread_num()591// num and calculate the iteration space using the result. It doesn't do this592// with ordered static loop, so they can be checked.593594#if OMPT_SUPPORT595#define IF_OMPT_SUPPORT(code) code596#else597#define IF_OMPT_SUPPORT(code)598#endif599600#define LOOP_START(func, schedule) \601int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \602long *p_ub) { \603int status; \604long stride; \605int gtid = __kmp_entry_gtid(); \606MKLOC(loc, KMP_STR(func)); \607KA_TRACE( \60820, \609(KMP_STR( \610func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \611gtid, lb, ub, str, chunk_sz)); \612\613if ((str > 0) ? (lb < ub) : (lb > ub)) { \614{ \615IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \616KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \617(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \618(schedule) != kmp_sch_static); \619} \620{ \621IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \622status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \623(kmp_int *)p_ub, (kmp_int *)&stride); \624} \625if (status) { \626KMP_DEBUG_ASSERT(stride == str); \627*p_ub += (str > 0) ? 1 : -1; \628} \629} else { \630status = 0; \631} \632\633KA_TRACE( \63420, \635(KMP_STR( \636func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \637gtid, *p_lb, *p_ub, status)); \638return status; \639}640641#define LOOP_RUNTIME_START(func, schedule) \642int func(long lb, long ub, long str, long *p_lb, long *p_ub) { \643int status; \644long stride; \645long chunk_sz = 0; \646int gtid = __kmp_entry_gtid(); \647MKLOC(loc, KMP_STR(func)); \648KA_TRACE( \64920, \650(KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \651gtid, lb, ub, str, chunk_sz)); \652\653if ((str > 0) ? (lb < ub) : (lb > ub)) { \654{ \655IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \656KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \657(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \658TRUE); \659} \660{ \661IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \662status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \663(kmp_int *)p_ub, (kmp_int *)&stride); \664} \665if (status) { \666KMP_DEBUG_ASSERT(stride == str); \667*p_ub += (str > 0) ? 1 : -1; \668} \669} else { \670status = 0; \671} \672\673KA_TRACE( \67420, \675(KMP_STR( \676func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \677gtid, *p_lb, *p_ub, status)); \678return status; \679}680681#define KMP_DOACROSS_FINI(status, gtid) \682if (!status && __kmp_threads[gtid]->th.th_dispatch->th_doacross_flags) { \683__kmpc_doacross_fini(NULL, gtid); \684}685686#define LOOP_NEXT(func, fini_code) \687int func(long *p_lb, long *p_ub) { \688int status; \689long stride; \690int gtid = __kmp_get_gtid(); \691MKLOC(loc, KMP_STR(func)); \692KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \693\694IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \695fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \696(kmp_int *)p_ub, (kmp_int *)&stride); \697if (status) { \698*p_ub += (stride > 0) ? 1 : -1; \699} \700KMP_DOACROSS_FINI(status, gtid) \701\702KA_TRACE( \70320, \704(KMP_STR(func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \705"returning %d\n", \706gtid, *p_lb, *p_ub, stride, status)); \707return status; \708}709710LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static)711LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {})712LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START),713kmp_sch_dynamic_chunked)714LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START),715kmp_sch_dynamic_chunked)716LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {})717LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT), {})718LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START),719kmp_sch_guided_chunked)720LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START),721kmp_sch_guided_chunked)722LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {})723LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT), {})724LOOP_RUNTIME_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START),725kmp_sch_runtime)726LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {})727LOOP_RUNTIME_START(728KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START),729kmp_sch_runtime)730LOOP_RUNTIME_START(731KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START),732kmp_sch_runtime)733LOOP_NEXT(734KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT), {})735LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT), {})736737LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START),738kmp_ord_static)739LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT),740{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })741LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START),742kmp_ord_dynamic_chunked)743LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT),744{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })745LOOP_START(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START),746kmp_ord_guided_chunked)747LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT),748{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })749LOOP_RUNTIME_START(750KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START),751kmp_ord_runtime)752LOOP_NEXT(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT),753{ KMP_DISPATCH_FINI_CHUNK(&loc, gtid); })754755#define LOOP_DOACROSS_START(func, schedule) \756bool func(unsigned ncounts, long *counts, long chunk_sz, long *p_lb, \757long *p_ub) { \758int status; \759long stride, lb, ub, str; \760int gtid = __kmp_entry_gtid(); \761struct kmp_dim *dims = \762(struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \763MKLOC(loc, KMP_STR(func)); \764for (unsigned i = 0; i < ncounts; ++i) { \765dims[i].lo = 0; \766dims[i].up = counts[i] - 1; \767dims[i].st = 1; \768} \769__kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \770lb = 0; \771ub = counts[0]; \772str = 1; \773KA_TRACE(20, (KMP_STR(func) ": T#%d, ncounts %u, lb 0x%lx, ub 0x%lx, str " \774"0x%lx, chunk_sz " \775"0x%lx\n", \776gtid, ncounts, lb, ub, str, chunk_sz)); \777\778if ((str > 0) ? (lb < ub) : (lb > ub)) { \779KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \780(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \781(schedule) != kmp_sch_static); \782status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \783(kmp_int *)p_ub, (kmp_int *)&stride); \784if (status) { \785KMP_DEBUG_ASSERT(stride == str); \786*p_ub += (str > 0) ? 1 : -1; \787} \788} else { \789status = 0; \790} \791KMP_DOACROSS_FINI(status, gtid); \792\793KA_TRACE( \79420, \795(KMP_STR( \796func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \797gtid, *p_lb, *p_ub, status)); \798__kmp_free(dims); \799return status; \800}801802#define LOOP_DOACROSS_RUNTIME_START(func, schedule) \803int func(unsigned ncounts, long *counts, long *p_lb, long *p_ub) { \804int status; \805long stride, lb, ub, str; \806long chunk_sz = 0; \807int gtid = __kmp_entry_gtid(); \808struct kmp_dim *dims = \809(struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \810MKLOC(loc, KMP_STR(func)); \811for (unsigned i = 0; i < ncounts; ++i) { \812dims[i].lo = 0; \813dims[i].up = counts[i] - 1; \814dims[i].st = 1; \815} \816__kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \817lb = 0; \818ub = counts[0]; \819str = 1; \820KA_TRACE( \82120, \822(KMP_STR(func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \823gtid, lb, ub, str, chunk_sz)); \824\825if ((str > 0) ? (lb < ub) : (lb > ub)) { \826KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \827(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \828status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \829(kmp_int *)p_ub, (kmp_int *)&stride); \830if (status) { \831KMP_DEBUG_ASSERT(stride == str); \832*p_ub += (str > 0) ? 1 : -1; \833} \834} else { \835status = 0; \836} \837KMP_DOACROSS_FINI(status, gtid); \838\839KA_TRACE( \84020, \841(KMP_STR( \842func) " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \843gtid, *p_lb, *p_ub, status)); \844__kmp_free(dims); \845return status; \846}847848LOOP_DOACROSS_START(849KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START),850kmp_sch_static)851LOOP_DOACROSS_START(852KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START),853kmp_sch_dynamic_chunked)854LOOP_DOACROSS_START(855KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START),856kmp_sch_guided_chunked)857LOOP_DOACROSS_RUNTIME_START(858KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START),859kmp_sch_runtime)860861void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {862int gtid = __kmp_get_gtid();863KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))864865#if OMPT_SUPPORT && OMPT_OPTIONAL866ompt_frame_t *ompt_frame;867if (ompt_enabled.enabled) {868__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);869ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);870OMPT_STORE_RETURN_ADDRESS(gtid);871}872#endif873__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);874#if OMPT_SUPPORT && OMPT_OPTIONAL875if (ompt_enabled.enabled) {876ompt_frame->enter_frame = ompt_data_none;877}878#endif879880KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid))881}882883void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) {884KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid()))885}886887// Unsigned long long loop worksharing constructs888//889// These are new with gcc 4.4890891#define LOOP_START_ULL(func, schedule) \892int func(int up, unsigned long long lb, unsigned long long ub, \893unsigned long long str, unsigned long long chunk_sz, \894unsigned long long *p_lb, unsigned long long *p_ub) { \895int status; \896long long str2 = up ? ((long long)str) : -((long long)str); \897long long stride; \898int gtid = __kmp_entry_gtid(); \899MKLOC(loc, KMP_STR(func)); \900\901KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \902"0x%llx, chunk_sz 0x%llx\n", \903gtid, up, lb, ub, str, chunk_sz)); \904\905if ((str > 0) ? (lb < ub) : (lb > ub)) { \906KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \907(str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \908(schedule) != kmp_sch_static); \909status = \910KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \911(kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \912if (status) { \913KMP_DEBUG_ASSERT(stride == str2); \914*p_ub += (str > 0) ? 1 : -1; \915} \916} else { \917status = 0; \918} \919\920KA_TRACE( \92120, \922(KMP_STR( \923func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \924gtid, *p_lb, *p_ub, status)); \925return status; \926}927928#define LOOP_RUNTIME_START_ULL(func, schedule) \929int func(int up, unsigned long long lb, unsigned long long ub, \930unsigned long long str, unsigned long long *p_lb, \931unsigned long long *p_ub) { \932int status; \933long long str2 = up ? ((long long)str) : -((long long)str); \934unsigned long long stride; \935unsigned long long chunk_sz = 0; \936int gtid = __kmp_entry_gtid(); \937MKLOC(loc, KMP_STR(func)); \938\939KA_TRACE(20, (KMP_STR(func) ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str " \940"0x%llx, chunk_sz 0x%llx\n", \941gtid, up, lb, ub, str, chunk_sz)); \942\943if ((str > 0) ? (lb < ub) : (lb > ub)) { \944KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \945(str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \946TRUE); \947status = \948KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \949(kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \950if (status) { \951KMP_DEBUG_ASSERT((long long)stride == str2); \952*p_ub += (str > 0) ? 1 : -1; \953} \954} else { \955status = 0; \956} \957\958KA_TRACE( \95920, \960(KMP_STR( \961func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \962gtid, *p_lb, *p_ub, status)); \963return status; \964}965966#define LOOP_NEXT_ULL(func, fini_code) \967int func(unsigned long long *p_lb, unsigned long long *p_ub) { \968int status; \969long long stride; \970int gtid = __kmp_get_gtid(); \971MKLOC(loc, KMP_STR(func)); \972KA_TRACE(20, (KMP_STR(func) ": T#%d\n", gtid)); \973\974fini_code status = \975KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \976(kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \977if (status) { \978*p_ub += (stride > 0) ? 1 : -1; \979} \980\981KA_TRACE( \98220, \983(KMP_STR( \984func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \985"returning %d\n", \986gtid, *p_lb, *p_ub, stride, status)); \987return status; \988}989990LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START),991kmp_sch_static)992LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {})993LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START),994kmp_sch_dynamic_chunked)995LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {})996LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START),997kmp_sch_guided_chunked)998LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {})999LOOP_START_ULL(1000KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START),1001kmp_sch_dynamic_chunked)1002LOOP_NEXT_ULL(1003KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT), {})1004LOOP_START_ULL(1005KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START),1006kmp_sch_guided_chunked)1007LOOP_NEXT_ULL(1008KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT), {})1009LOOP_RUNTIME_START_ULL(1010KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime)1011LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {})1012LOOP_RUNTIME_START_ULL(1013KMP_EXPAND_NAME(1014KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START),1015kmp_sch_runtime)1016LOOP_RUNTIME_START_ULL(1017KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START),1018kmp_sch_runtime)1019LOOP_NEXT_ULL(1020KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT),1021{})1022LOOP_NEXT_ULL(1023KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT), {})10241025LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START),1026kmp_ord_static)1027LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT),1028{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })1029LOOP_START_ULL(1030KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START),1031kmp_ord_dynamic_chunked)1032LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT),1033{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })1034LOOP_START_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START),1035kmp_ord_guided_chunked)1036LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT),1037{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })1038LOOP_RUNTIME_START_ULL(1039KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START),1040kmp_ord_runtime)1041LOOP_NEXT_ULL(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT),1042{ KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); })10431044#define LOOP_DOACROSS_START_ULL(func, schedule) \1045int func(unsigned ncounts, unsigned long long *counts, \1046unsigned long long chunk_sz, unsigned long long *p_lb, \1047unsigned long long *p_ub) { \1048int status; \1049long long stride, str, lb, ub; \1050int gtid = __kmp_entry_gtid(); \1051struct kmp_dim *dims = \1052(struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \1053MKLOC(loc, KMP_STR(func)); \1054for (unsigned i = 0; i < ncounts; ++i) { \1055dims[i].lo = 0; \1056dims[i].up = counts[i] - 1; \1057dims[i].st = 1; \1058} \1059__kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \1060lb = 0; \1061ub = counts[0]; \1062str = 1; \1063\1064KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \1065"0x%llx, chunk_sz 0x%llx\n", \1066gtid, lb, ub, str, chunk_sz)); \1067\1068if ((str > 0) ? (lb < ub) : (lb > ub)) { \1069KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \1070(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \1071(schedule) != kmp_sch_static); \1072status = \1073KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \1074(kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \1075if (status) { \1076KMP_DEBUG_ASSERT(stride == str); \1077*p_ub += (str > 0) ? 1 : -1; \1078} \1079} else { \1080status = 0; \1081} \1082KMP_DOACROSS_FINI(status, gtid); \1083\1084KA_TRACE( \108520, \1086(KMP_STR( \1087func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \1088gtid, *p_lb, *p_ub, status)); \1089__kmp_free(dims); \1090return status; \1091}10921093#define LOOP_DOACROSS_RUNTIME_START_ULL(func, schedule) \1094int func(unsigned ncounts, unsigned long long *counts, \1095unsigned long long *p_lb, unsigned long long *p_ub) { \1096int status; \1097unsigned long long stride, str, lb, ub; \1098unsigned long long chunk_sz = 0; \1099int gtid = __kmp_entry_gtid(); \1100struct kmp_dim *dims = \1101(struct kmp_dim *)__kmp_allocate(sizeof(struct kmp_dim) * ncounts); \1102MKLOC(loc, KMP_STR(func)); \1103for (unsigned i = 0; i < ncounts; ++i) { \1104dims[i].lo = 0; \1105dims[i].up = counts[i] - 1; \1106dims[i].st = 1; \1107} \1108__kmpc_doacross_init(&loc, gtid, (int)ncounts, dims); \1109lb = 0; \1110ub = counts[0]; \1111str = 1; \1112KA_TRACE(20, (KMP_STR(func) ": T#%d, lb 0x%llx, ub 0x%llx, str " \1113"0x%llx, chunk_sz 0x%llx\n", \1114gtid, lb, ub, str, chunk_sz)); \1115\1116if ((str > 0) ? (lb < ub) : (lb > ub)) { \1117KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \1118(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \1119TRUE); \1120status = \1121KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \1122(kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \1123if (status) { \1124KMP_DEBUG_ASSERT(stride == str); \1125*p_ub += (str > 0) ? 1 : -1; \1126} \1127} else { \1128status = 0; \1129} \1130KMP_DOACROSS_FINI(status, gtid); \1131\1132KA_TRACE( \113320, \1134(KMP_STR( \1135func) " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \1136gtid, *p_lb, *p_ub, status)); \1137__kmp_free(dims); \1138return status; \1139}11401141LOOP_DOACROSS_START_ULL(1142KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START),1143kmp_sch_static)1144LOOP_DOACROSS_START_ULL(1145KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START),1146kmp_sch_dynamic_chunked)1147LOOP_DOACROSS_START_ULL(1148KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START),1149kmp_sch_guided_chunked)1150LOOP_DOACROSS_RUNTIME_START_ULL(1151KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START),1152kmp_sch_runtime)11531154// Combined parallel / loop worksharing constructs1155//1156// There are no ull versions (yet).11571158#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \1159void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \1160long ub, long str, long chunk_sz) { \1161int gtid = __kmp_entry_gtid(); \1162MKLOC(loc, KMP_STR(func)); \1163KA_TRACE( \116420, \1165(KMP_STR( \1166func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \1167gtid, lb, ub, str, chunk_sz)); \1168\1169ompt_pre(); \1170\1171__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task, \1172(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \11739, task, data, num_threads, &loc, (schedule), lb, \1174(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \1175IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid)); \1176\1177KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \1178(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \1179(schedule) != kmp_sch_static); \1180\1181ompt_post(); \1182\1183KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \1184}11851186#if OMPT_SUPPORT && OMPT_OPTIONAL11871188#define OMPT_LOOP_PRE() \1189ompt_frame_t *parent_frame; \1190if (ompt_enabled.enabled) { \1191__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \1192parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \1193OMPT_STORE_RETURN_ADDRESS(gtid); \1194}11951196#define OMPT_LOOP_POST() \1197if (ompt_enabled.enabled) { \1198parent_frame->enter_frame = ompt_data_none; \1199}12001201#else12021203#define OMPT_LOOP_PRE()12041205#define OMPT_LOOP_POST()12061207#endif12081209PARALLEL_LOOP_START(1210KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START),1211kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)1212PARALLEL_LOOP_START(1213KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START),1214kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1215PARALLEL_LOOP_START(1216KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START),1217kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1218PARALLEL_LOOP_START(1219KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START),1220kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)12211222// Tasking constructs12231224void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,1225void (*copy_func)(void *, void *),1226long arg_size, long arg_align,1227bool if_cond, unsigned gomp_flags,1228void **depend) {1229MKLOC(loc, "GOMP_task");1230int gtid = __kmp_entry_gtid();1231kmp_int32 flags = 0;1232kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;12331234KA_TRACE(20, ("GOMP_task: T#%d\n", gtid));12351236// The low-order bit is the "untied" flag1237if (!(gomp_flags & KMP_GOMP_TASK_UNTIED_FLAG)) {1238input_flags->tiedness = TASK_TIED;1239}1240// The second low-order bit is the "final" flag1241if (gomp_flags & KMP_GOMP_TASK_FINAL_FLAG) {1242input_flags->final = 1;1243}1244input_flags->native = 1;1245// __kmp_task_alloc() sets up all other flags12461247if (!if_cond) {1248arg_size = 0;1249}12501251kmp_task_t *task = __kmp_task_alloc(1252&loc, gtid, input_flags, sizeof(kmp_task_t),1253arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func);12541255if (arg_size > 0) {1256if (arg_align > 0) {1257task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /1258arg_align * arg_align);1259}1260// else error??12611262if (copy_func) {1263(*copy_func)(task->shareds, data);1264} else {1265KMP_MEMCPY(task->shareds, data, arg_size);1266}1267}12681269#if OMPT_SUPPORT1270kmp_taskdata_t *current_task;1271if (ompt_enabled.enabled) {1272current_task = __kmp_threads[gtid]->th.th_current_task;1273current_task->ompt_task_info.frame.enter_frame.ptr =1274OMPT_GET_FRAME_ADDRESS(0);1275}1276OMPT_STORE_RETURN_ADDRESS(gtid);1277#endif12781279if (if_cond) {1280if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {1281KMP_ASSERT(depend);1282kmp_gomp_depends_info_t gomp_depends(depend);1283kmp_int32 ndeps = gomp_depends.get_num_deps();1284SimpleVLA<kmp_depend_info_t> dep_list(ndeps);1285for (kmp_int32 i = 0; i < ndeps; i++)1286dep_list[i] = gomp_depends.get_kmp_depend(i);1287kmp_int32 ndeps_cnv;1288__kmp_type_convert(ndeps, &ndeps_cnv);1289__kmpc_omp_task_with_deps(&loc, gtid, task, ndeps_cnv, dep_list, 0, NULL);1290} else {1291__kmpc_omp_task(&loc, gtid, task);1292}1293} else {1294#if OMPT_SUPPORT1295ompt_thread_info_t oldInfo;1296kmp_info_t *thread;1297kmp_taskdata_t *taskdata;1298if (ompt_enabled.enabled) {1299// Store the threads states and restore them after the task1300thread = __kmp_threads[gtid];1301taskdata = KMP_TASK_TO_TASKDATA(task);1302oldInfo = thread->th.ompt_thread_info;1303thread->th.ompt_thread_info.wait_id = 0;1304thread->th.ompt_thread_info.state = ompt_state_work_parallel;1305taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1306}1307OMPT_STORE_RETURN_ADDRESS(gtid);1308#endif1309if (gomp_flags & KMP_GOMP_TASK_DEPENDS_FLAG) {1310KMP_ASSERT(depend);1311kmp_gomp_depends_info_t gomp_depends(depend);1312kmp_int32 ndeps = gomp_depends.get_num_deps();1313SimpleVLA<kmp_depend_info_t> dep_list(ndeps);1314for (kmp_int32 i = 0; i < ndeps; i++)1315dep_list[i] = gomp_depends.get_kmp_depend(i);1316__kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);1317}13181319__kmpc_omp_task_begin_if0(&loc, gtid, task);1320func(data);1321__kmpc_omp_task_complete_if0(&loc, gtid, task);13221323#if OMPT_SUPPORT1324if (ompt_enabled.enabled) {1325thread->th.ompt_thread_info = oldInfo;1326taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;1327}1328#endif1329}1330#if OMPT_SUPPORT1331if (ompt_enabled.enabled) {1332current_task->ompt_task_info.frame.enter_frame = ompt_data_none;1333}1334#endif13351336KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid));1337}13381339void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT)(void) {1340MKLOC(loc, "GOMP_taskwait");1341int gtid = __kmp_entry_gtid();13421343#if OMPT_SUPPORT1344OMPT_STORE_RETURN_ADDRESS(gtid);1345#endif13461347KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid));13481349__kmpc_omp_taskwait(&loc, gtid);13501351KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid));1352}13531354// Sections worksharing constructs1355//1356// For the sections construct, we initialize a dynamically scheduled loop1357// worksharing construct with lb 1 and stride 1, and use the iteration #'s1358// that its returns as sections ids.1359//1360// There are no special entry points for ordered sections, so we always use1361// the dynamically scheduled workshare, even if the sections aren't ordered.13621363unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) {1364int status;1365kmp_int lb, ub, stride;1366int gtid = __kmp_entry_gtid();1367MKLOC(loc, "GOMP_sections_start");1368KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid));13691370KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);13711372status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);1373if (status) {1374KMP_DEBUG_ASSERT(stride == 1);1375KMP_DEBUG_ASSERT(lb > 0);1376KMP_ASSERT(lb == ub);1377} else {1378lb = 0;1379}13801381KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid,1382(unsigned)lb));1383return (unsigned)lb;1384}13851386unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) {1387int status;1388kmp_int lb, ub, stride;1389int gtid = __kmp_get_gtid();1390MKLOC(loc, "GOMP_sections_next");1391KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid));13921393#if OMPT_SUPPORT1394OMPT_STORE_RETURN_ADDRESS(gtid);1395#endif13961397status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride);1398if (status) {1399KMP_DEBUG_ASSERT(stride == 1);1400KMP_DEBUG_ASSERT(lb > 0);1401KMP_ASSERT(lb == ub);1402} else {1403lb = 0;1404}14051406KA_TRACE(140720, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb));1408return (unsigned)lb;1409}14101411void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(1412void (*task)(void *), void *data, unsigned num_threads, unsigned count) {1413int gtid = __kmp_entry_gtid();14141415#if OMPT_SUPPORT1416ompt_frame_t *parent_frame;14171418if (ompt_enabled.enabled) {1419__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);1420parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1421}1422OMPT_STORE_RETURN_ADDRESS(gtid);1423#endif14241425MKLOC(loc, "GOMP_parallel_sections_start");1426KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid));14271428__kmp_GOMP_fork_call(&loc, gtid, num_threads, 0u, task,1429(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,1430task, data, num_threads, &loc, kmp_nm_dynamic_chunked,1431(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);14321433#if OMPT_SUPPORT1434if (ompt_enabled.enabled) {1435parent_frame->enter_frame = ompt_data_none;1436}1437#endif14381439KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);14401441KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid));1442}14431444void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {1445int gtid = __kmp_get_gtid();1446KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))14471448#if OMPT_SUPPORT1449ompt_frame_t *ompt_frame;1450if (ompt_enabled.enabled) {1451__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);1452ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1453}1454OMPT_STORE_RETURN_ADDRESS(gtid);1455#endif1456__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);1457#if OMPT_SUPPORT1458if (ompt_enabled.enabled) {1459ompt_frame->enter_frame = ompt_data_none;1460}1461#endif14621463KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid))1464}14651466void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) {1467KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid()))1468}14691470// libgomp has an empty function for GOMP_taskyield as of 2013-10-101471void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKYIELD)(void) {1472KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid()))1473return;1474}14751476void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),1477void *data,1478unsigned num_threads,1479unsigned int flags) {1480int gtid = __kmp_entry_gtid();1481MKLOC(loc, "GOMP_parallel");1482KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid));14831484#if OMPT_SUPPORT1485ompt_task_info_t *parent_task_info, *task_info;1486if (ompt_enabled.enabled) {1487parent_task_info = __ompt_get_task_info_object(0);1488parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1489}1490OMPT_STORE_RETURN_ADDRESS(gtid);1491#endif1492__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,1493(microtask_t)__kmp_GOMP_microtask_wrapper, 2, task,1494data);1495#if OMPT_SUPPORT1496if (ompt_enabled.enabled) {1497task_info = __ompt_get_task_info_object(0);1498task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1499}1500#endif1501task(data);1502{1503#if OMPT_SUPPORT1504OMPT_STORE_RETURN_ADDRESS(gtid);1505#endif1506KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();1507}1508#if OMPT_SUPPORT1509if (ompt_enabled.enabled) {1510task_info->frame.exit_frame = ompt_data_none;1511parent_task_info->frame.enter_frame = ompt_data_none;1512}1513#endif1514}15151516void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),1517void *data,1518unsigned num_threads,1519unsigned count,1520unsigned flags) {1521int gtid = __kmp_entry_gtid();1522MKLOC(loc, "GOMP_parallel_sections");1523KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid));15241525#if OMPT_SUPPORT1526ompt_frame_t *task_frame;1527kmp_info_t *thr;1528if (ompt_enabled.enabled) {1529thr = __kmp_threads[gtid];1530task_frame = &(thr->th.th_current_task->ompt_task_info.frame);1531task_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1532}1533OMPT_STORE_RETURN_ADDRESS(gtid);1534#endif15351536__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,1537(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9,1538task, data, num_threads, &loc, kmp_nm_dynamic_chunked,1539(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);15401541{1542#if OMPT_SUPPORT1543OMPT_STORE_RETURN_ADDRESS(gtid);1544#endif15451546KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);1547}15481549#if OMPT_SUPPORT1550ompt_frame_t *child_frame;1551if (ompt_enabled.enabled) {1552child_frame = &(thr->th.th_current_task->ompt_task_info.frame);1553child_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1554}1555#endif15561557task(data);15581559#if OMPT_SUPPORT1560if (ompt_enabled.enabled) {1561child_frame->exit_frame = ompt_data_none;1562}1563#endif15641565KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();1566KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));15671568#if OMPT_SUPPORT1569if (ompt_enabled.enabled) {1570task_frame->enter_frame = ompt_data_none;1571}1572#endif1573}15741575#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \1576void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \1577long ub, long str, long chunk_sz, unsigned flags) { \1578int gtid = __kmp_entry_gtid(); \1579MKLOC(loc, KMP_STR(func)); \1580KA_TRACE( \158120, \1582(KMP_STR( \1583func) ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \1584gtid, lb, ub, str, chunk_sz)); \1585\1586ompt_pre(); \1587IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \1588__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task, \1589(microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \15909, task, data, num_threads, &loc, (schedule), lb, \1591(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \1592\1593{ \1594IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \1595KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \1596(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \1597(schedule) != kmp_sch_static); \1598} \1599task(data); \1600KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); \1601ompt_post(); \1602\1603KA_TRACE(20, (KMP_STR(func) " exit: T#%d\n", gtid)); \1604}16051606PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC),1607kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST)1608PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC),1609kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1610PARALLEL_LOOP(1611KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED),1612kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1613PARALLEL_LOOP(1614KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC),1615kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1616PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED),1617kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST)1618PARALLEL_LOOP(KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME),1619kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)1620PARALLEL_LOOP(1621KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME),1622kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)1623PARALLEL_LOOP(1624KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME),1625kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST)16261627void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_START)(void) {1628int gtid = __kmp_entry_gtid();1629MKLOC(loc, "GOMP_taskgroup_start");1630KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid));16311632#if OMPT_SUPPORT1633OMPT_STORE_RETURN_ADDRESS(gtid);1634#endif16351636__kmpc_taskgroup(&loc, gtid);16371638return;1639}16401641void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_END)(void) {1642int gtid = __kmp_get_gtid();1643MKLOC(loc, "GOMP_taskgroup_end");1644KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid));16451646#if OMPT_SUPPORT1647OMPT_STORE_RETURN_ADDRESS(gtid);1648#endif16491650__kmpc_end_taskgroup(&loc, gtid);16511652return;1653}16541655static kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) {1656kmp_int32 cncl_kind = 0;1657switch (gomp_kind) {1658case 1:1659cncl_kind = cancel_parallel;1660break;1661case 2:1662cncl_kind = cancel_loop;1663break;1664case 4:1665cncl_kind = cancel_sections;1666break;1667case 8:1668cncl_kind = cancel_taskgroup;1669break;1670}1671return cncl_kind;1672}16731674// Return true if cancellation should take place, false otherwise1675bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) {1676int gtid = __kmp_get_gtid();1677MKLOC(loc, "GOMP_cancellation_point");1678KA_TRACE(20, ("GOMP_cancellation_point: T#%d which:%d\n", gtid, which));1679kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);1680return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);1681}16821683// Return true if cancellation should take place, false otherwise1684bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) {1685int gtid = __kmp_get_gtid();1686MKLOC(loc, "GOMP_cancel");1687KA_TRACE(20, ("GOMP_cancel: T#%d which:%d do_cancel:%d\n", gtid, which,1688(int)do_cancel));1689kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which);16901691if (do_cancel == FALSE) {1692return __kmpc_cancellationpoint(&loc, gtid, cncl_kind);1693} else {1694return __kmpc_cancel(&loc, gtid, cncl_kind);1695}1696}16971698// Return true if cancellation should take place, false otherwise1699bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) {1700int gtid = __kmp_get_gtid();1701KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid));1702return __kmp_barrier_gomp_cancel(gtid);1703}17041705// Return true if cancellation should take place, false otherwise1706bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) {1707int gtid = __kmp_get_gtid();1708KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid));1709return __kmp_barrier_gomp_cancel(gtid);1710}17111712// Return true if cancellation should take place, false otherwise1713bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) {1714int gtid = __kmp_get_gtid();1715KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid));1716return __kmp_barrier_gomp_cancel(gtid);1717}17181719// All target functions are empty as of 2014-05-291720void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *),1721const void *openmp_target,1722size_t mapnum, void **hostaddrs,1723size_t *sizes,1724unsigned char *kinds) {1725return;1726}17271728void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_DATA)(1729int device, const void *openmp_target, size_t mapnum, void **hostaddrs,1730size_t *sizes, unsigned char *kinds) {1731return;1732}17331734void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; }17351736void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TARGET_UPDATE)(1737int device, const void *openmp_target, size_t mapnum, void **hostaddrs,1738size_t *sizes, unsigned char *kinds) {1739return;1740}17411742void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams,1743unsigned int thread_limit) {1744return;1745}17461747// Task duplication function which copies src to dest (both are1748// preallocated task structures)1749static void __kmp_gomp_task_dup(kmp_task_t *dest, kmp_task_t *src,1750kmp_int32 last_private) {1751kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(src);1752if (taskdata->td_copy_func) {1753(taskdata->td_copy_func)(dest->shareds, src->shareds);1754}1755}17561757void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(1758uintptr_t *);17591760#ifdef __cplusplus1761} // extern "C"1762#endif17631764template <typename T>1765void __GOMP_taskloop(void (*func)(void *), void *data,1766void (*copy_func)(void *, void *), long arg_size,1767long arg_align, unsigned gomp_flags,1768unsigned long num_tasks, int priority, T start, T end,1769T step) {1770typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32);1771MKLOC(loc, "GOMP_taskloop");1772int sched;1773T *loop_bounds;1774int gtid = __kmp_entry_gtid();1775kmp_int32 flags = 0;1776int if_val = gomp_flags & (1u << 10);1777int nogroup = gomp_flags & (1u << 11);1778int up = gomp_flags & (1u << 8);1779int reductions = gomp_flags & (1u << 12);1780p_task_dup_t task_dup = NULL;1781kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags;1782#ifdef KMP_DEBUG1783{1784char *buff;1785buff = __kmp_str_format(1786"GOMP_taskloop: T#%%d: func:%%p data:%%p copy_func:%%p "1787"arg_size:%%ld arg_align:%%ld gomp_flags:0x%%x num_tasks:%%lu "1788"priority:%%d start:%%%s end:%%%s step:%%%s\n",1789traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec);1790KA_TRACE(20, (buff, gtid, func, data, copy_func, arg_size, arg_align,1791gomp_flags, num_tasks, priority, start, end, step));1792__kmp_str_free(&buff);1793}1794#endif1795KMP_ASSERT((size_t)arg_size >= 2 * sizeof(T));1796KMP_ASSERT(arg_align > 0);1797// The low-order bit is the "untied" flag1798if (!(gomp_flags & 1)) {1799input_flags->tiedness = TASK_TIED;1800}1801// The second low-order bit is the "final" flag1802if (gomp_flags & 2) {1803input_flags->final = 1;1804}1805// Negative step flag1806if (!up) {1807// If step is flagged as negative, but isn't properly sign extended1808// Then manually sign extend it. Could be a short, int, char embedded1809// in a long. So cannot assume any cast.1810if (step > 0) {1811for (int i = sizeof(T) * CHAR_BIT - 1; i >= 0L; --i) {1812// break at the first 1 bit1813if (step & ((T)1 << i))1814break;1815step |= ((T)1 << i);1816}1817}1818}1819input_flags->native = 1;1820// Figure out if none/grainsize/num_tasks clause specified1821if (num_tasks > 0) {1822if (gomp_flags & (1u << 9))1823sched = 1; // grainsize specified1824else1825sched = 2; // num_tasks specified1826// neither grainsize nor num_tasks specified1827} else {1828sched = 0;1829}18301831// __kmp_task_alloc() sets up all other flags1832kmp_task_t *task =1833__kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t),1834arg_size + arg_align - 1, (kmp_routine_entry_t)func);1835kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);1836taskdata->td_copy_func = copy_func;1837taskdata->td_size_loop_bounds = sizeof(T);18381839// re-align shareds if needed and setup firstprivate copy constructors1840// through the task_dup mechanism1841task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) /1842arg_align * arg_align);1843if (copy_func) {1844task_dup = __kmp_gomp_task_dup;1845}1846KMP_MEMCPY(task->shareds, data, arg_size);18471848loop_bounds = (T *)task->shareds;1849loop_bounds[0] = start;1850loop_bounds[1] = end + (up ? -1 : 1);18511852if (!nogroup) {1853#if OMPT_SUPPORT && OMPT_OPTIONAL1854OMPT_STORE_RETURN_ADDRESS(gtid);1855#endif1856__kmpc_taskgroup(&loc, gtid);1857if (reductions) {1858// The data pointer points to lb, ub, then reduction data1859struct data_t {1860T a, b;1861uintptr_t *d;1862};1863uintptr_t *d = ((data_t *)data)->d;1864KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(d);1865}1866}1867__kmpc_taskloop(&loc, gtid, task, if_val, (kmp_uint64 *)&(loop_bounds[0]),1868(kmp_uint64 *)&(loop_bounds[1]), (kmp_int64)step, 1, sched,1869(kmp_uint64)num_tasks, (void *)task_dup);1870if (!nogroup) {1871#if OMPT_SUPPORT && OMPT_OPTIONAL1872OMPT_STORE_RETURN_ADDRESS(gtid);1873#endif1874__kmpc_end_taskgroup(&loc, gtid);1875}1876}18771878// 4 byte version of GOMP_doacross_post1879// This verison needs to create a temporary array which converts 4 byte1880// integers into 8 byte integers1881template <typename T, bool need_conversion = (sizeof(long) == 4)>1882void __kmp_GOMP_doacross_post(T *count);18831884template <> void __kmp_GOMP_doacross_post<long, true>(long *count) {1885int gtid = __kmp_entry_gtid();1886kmp_info_t *th = __kmp_threads[gtid];1887MKLOC(loc, "GOMP_doacross_post");1888kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];1889kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(1890th, (size_t)(sizeof(kmp_int64) * num_dims));1891for (kmp_int64 i = 0; i < num_dims; ++i) {1892vec[i] = (kmp_int64)count[i];1893}1894__kmpc_doacross_post(&loc, gtid, vec);1895__kmp_thread_free(th, vec);1896}18971898// 8 byte versions of GOMP_doacross_post1899// This version can just pass in the count array directly instead of creating1900// a temporary array1901template <> void __kmp_GOMP_doacross_post<long, false>(long *count) {1902int gtid = __kmp_entry_gtid();1903MKLOC(loc, "GOMP_doacross_post");1904__kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));1905}19061907template <typename T> void __kmp_GOMP_doacross_wait(T first, va_list args) {1908int gtid = __kmp_entry_gtid();1909kmp_info_t *th = __kmp_threads[gtid];1910MKLOC(loc, "GOMP_doacross_wait");1911kmp_int64 num_dims = th->th.th_dispatch->th_doacross_info[0];1912kmp_int64 *vec = (kmp_int64 *)__kmp_thread_malloc(1913th, (size_t)(sizeof(kmp_int64) * num_dims));1914vec[0] = (kmp_int64)first;1915for (kmp_int64 i = 1; i < num_dims; ++i) {1916T item = va_arg(args, T);1917vec[i] = (kmp_int64)item;1918}1919__kmpc_doacross_wait(&loc, gtid, vec);1920__kmp_thread_free(th, vec);1921return;1922}19231924#ifdef __cplusplus1925extern "C" {1926#endif // __cplusplus19271928void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP)(1929void (*func)(void *), void *data, void (*copy_func)(void *, void *),1930long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,1931int priority, long start, long end, long step) {1932__GOMP_taskloop<long>(func, data, copy_func, arg_size, arg_align, gomp_flags,1933num_tasks, priority, start, end, step);1934}19351936void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKLOOP_ULL)(1937void (*func)(void *), void *data, void (*copy_func)(void *, void *),1938long arg_size, long arg_align, unsigned gomp_flags, unsigned long num_tasks,1939int priority, unsigned long long start, unsigned long long end,1940unsigned long long step) {1941__GOMP_taskloop<unsigned long long>(func, data, copy_func, arg_size,1942arg_align, gomp_flags, num_tasks,1943priority, start, end, step);1944}19451946void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_POST)(long *count) {1947__kmp_GOMP_doacross_post(count);1948}19491950void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_WAIT)(long first, ...) {1951va_list args;1952va_start(args, first);1953__kmp_GOMP_doacross_wait<long>(first, args);1954va_end(args);1955}19561957void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_POST)(1958unsigned long long *count) {1959int gtid = __kmp_entry_gtid();1960MKLOC(loc, "GOMP_doacross_ull_post");1961__kmpc_doacross_post(&loc, gtid, RCAST(kmp_int64 *, count));1962}19631964void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT)(1965unsigned long long first, ...) {1966va_list args;1967va_start(args, first);1968__kmp_GOMP_doacross_wait<unsigned long long>(first, args);1969va_end(args);1970}19711972// fn: the function each primary thread of new team will call1973// data: argument to fn1974// num_teams, thread_limit: max bounds on respective ICV1975// flags: unused1976void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TEAMS_REG)(void (*fn)(void *),1977void *data,1978unsigned num_teams,1979unsigned thread_limit,1980unsigned flags) {1981MKLOC(loc, "GOMP_teams_reg");1982int gtid = __kmp_entry_gtid();1983KA_TRACE(20, ("GOMP_teams_reg: T#%d num_teams=%u thread_limit=%u flag=%u\n",1984gtid, num_teams, thread_limit, flags));1985__kmpc_push_num_teams(&loc, gtid, num_teams, thread_limit);1986__kmpc_fork_teams(&loc, 2, (microtask_t)__kmp_GOMP_microtask_wrapper, fn,1987data);1988KA_TRACE(20, ("GOMP_teams_reg exit: T#%d\n", gtid));1989}19901991void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKWAIT_DEPEND)(void **depend) {1992MKLOC(loc, "GOMP_taskwait_depend");1993int gtid = __kmp_entry_gtid();1994KA_TRACE(20, ("GOMP_taskwait_depend: T#%d\n", gtid));1995kmp_gomp_depends_info_t gomp_depends(depend);1996kmp_int32 ndeps = gomp_depends.get_num_deps();1997SimpleVLA<kmp_depend_info_t> dep_list(ndeps);1998for (kmp_int32 i = 0; i < ndeps; i++)1999dep_list[i] = gomp_depends.get_kmp_depend(i);2000#if OMPT_SUPPORT2001OMPT_STORE_RETURN_ADDRESS(gtid);2002#endif2003__kmpc_omp_wait_deps(&loc, gtid, ndeps, dep_list, 0, NULL);2004KA_TRACE(20, ("GOMP_taskwait_depend exit: T#%d\n", gtid));2005}20062007static inline void2008__kmp_GOMP_taskgroup_reduction_register(uintptr_t *data, kmp_taskgroup_t *tg,2009int nthreads,2010uintptr_t *allocated = nullptr) {2011KMP_ASSERT(data);2012KMP_ASSERT(nthreads > 0);2013// Have private copy pointers point to previously allocated2014// reduction data or allocate new data here2015if (allocated) {2016data[2] = allocated[2];2017data[6] = allocated[6];2018} else {2019data[2] = (uintptr_t)__kmp_allocate(nthreads * data[1]);2020data[6] = data[2] + (nthreads * data[1]);2021}2022if (tg)2023tg->gomp_data = data;2024}20252026void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER)(2027uintptr_t *data) {2028int gtid = __kmp_entry_gtid();2029KA_TRACE(20, ("GOMP_taskgroup_reduction_register: T#%d\n", gtid));2030kmp_info_t *thread = __kmp_threads[gtid];2031kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;2032int nthreads = thread->th.th_team_nproc;2033__kmp_GOMP_taskgroup_reduction_register(data, tg, nthreads);2034}20352036void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)(2037uintptr_t *data) {2038KA_TRACE(20,2039("GOMP_taskgroup_reduction_unregister: T#%d\n", __kmp_get_gtid()));2040KMP_ASSERT(data && data[2]);2041__kmp_free((void *)data[2]);2042}20432044// Search through reduction data and set ptrs[] elements2045// to proper privatized copy address2046void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP)(size_t cnt,2047size_t cntorig,2048void **ptrs) {2049int gtid = __kmp_entry_gtid();2050KA_TRACE(20, ("GOMP_task_reduction_remap: T#%d\n", gtid));2051kmp_info_t *thread = __kmp_threads[gtid];2052kmp_int32 tid = __kmp_get_tid();2053for (size_t i = 0; i < cnt; ++i) {2054uintptr_t address = (uintptr_t)ptrs[i];2055void *propagated_address = NULL;2056void *mapped_address = NULL;2057// Check taskgroups reduce data2058kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup;2059while (tg) {2060uintptr_t *gomp_data = tg->gomp_data;2061if (!gomp_data) {2062tg = tg->parent;2063continue;2064}2065// Check the shared addresses list2066size_t num_vars = (size_t)gomp_data[0];2067uintptr_t per_thread_size = gomp_data[1];2068uintptr_t reduce_data = gomp_data[2];2069uintptr_t end_reduce_data = gomp_data[6];2070for (size_t j = 0; j < num_vars; ++j) {2071uintptr_t *entry = gomp_data + 7 + 3 * j;2072if (entry[0] == address) {2073uintptr_t offset = entry[1];2074mapped_address =2075(void *)(reduce_data + tid * per_thread_size + offset);2076if (i < cntorig)2077propagated_address = (void *)entry[0];2078break;2079}2080}2081if (mapped_address)2082break;2083// Check if address is within privatized copies range2084if (!mapped_address && address >= reduce_data &&2085address < end_reduce_data) {2086uintptr_t offset = (address - reduce_data) % per_thread_size;2087mapped_address = (void *)(reduce_data + tid * per_thread_size + offset);2088if (i < cntorig) {2089for (size_t j = 0; j < num_vars; ++j) {2090uintptr_t *entry = gomp_data + 7 + 3 * j;2091if (entry[1] == offset) {2092propagated_address = (void *)entry[0];2093break;2094}2095}2096}2097}2098if (mapped_address)2099break;2100tg = tg->parent;2101}2102KMP_ASSERT(mapped_address);2103ptrs[i] = mapped_address;2104if (i < cntorig) {2105KMP_ASSERT(propagated_address);2106ptrs[cnt + i] = propagated_address;2107}2108}2109}21102111static void __kmp_GOMP_init_reductions(int gtid, uintptr_t *data, int is_ws) {2112kmp_info_t *thr = __kmp_threads[gtid];2113kmp_team_t *team = thr->th.th_team;2114// First start a taskgroup2115__kmpc_taskgroup(NULL, gtid);2116// Then setup reduction data2117void *reduce_data = KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[is_ws]);2118if (reduce_data == NULL &&2119__kmp_atomic_compare_store(&team->t.t_tg_reduce_data[is_ws], reduce_data,2120(void *)1)) {2121// Single thread enters this block to initialize common reduction data2122KMP_DEBUG_ASSERT(reduce_data == NULL);2123__kmp_GOMP_taskgroup_reduction_register(data, NULL, thr->th.th_team_nproc);2124KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[is_ws], 0);2125KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[is_ws], (void *)data);2126} else {2127// Wait for task reduction initialization2128while ((reduce_data = KMP_ATOMIC_LD_ACQ(2129&team->t.t_tg_reduce_data[is_ws])) == (void *)1) {2130KMP_CPU_PAUSE();2131}2132KMP_DEBUG_ASSERT(reduce_data > (void *)1); // should be valid pointer here2133}2134// For worksharing constructs, each thread has its own reduction structure.2135// Have each reduction structure point to same privatized copies of vars.2136// For parallel, each thread points to same reduction structure and privatized2137// copies of vars2138if (is_ws) {2139__kmp_GOMP_taskgroup_reduction_register(2140data, NULL, thr->th.th_team_nproc,2141(uintptr_t *)KMP_ATOMIC_LD_ACQ(&team->t.t_tg_reduce_data[is_ws]));2142}2143kmp_taskgroup_t *tg = thr->th.th_current_task->td_taskgroup;2144tg->gomp_data = data;2145}21462147static unsigned2148__kmp_GOMP_par_reductions_microtask_wrapper(int *gtid, int *npr,2149void (*task)(void *), void *data) {2150kmp_info_t *thr = __kmp_threads[*gtid];2151kmp_team_t *team = thr->th.th_team;2152uintptr_t *reduce_data = *(uintptr_t **)data;2153__kmp_GOMP_init_reductions(*gtid, reduce_data, 0);21542155#if OMPT_SUPPORT2156ompt_frame_t *ompt_frame;2157ompt_state_t enclosing_state;21582159if (ompt_enabled.enabled) {2160// save enclosing task state; set current state for task2161enclosing_state = thr->th.ompt_thread_info.state;2162thr->th.ompt_thread_info.state = ompt_state_work_parallel;21632164// set task frame2165__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);2166ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);2167}2168#endif21692170task(data);21712172#if OMPT_SUPPORT2173if (ompt_enabled.enabled) {2174// clear task frame2175ompt_frame->exit_frame = ompt_data_none;21762177// restore enclosing state2178thr->th.ompt_thread_info.state = enclosing_state;2179}2180#endif2181__kmpc_end_taskgroup(NULL, *gtid);2182// if last thread out, then reset the team's reduce data2183// the GOMP_taskgroup_reduction_unregister() function will deallocate2184// private copies after reduction calculations take place.2185int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[0]);2186if (count == thr->th.th_team_nproc - 1) {2187KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[0], NULL);2188KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[0], 0);2189}2190return (unsigned)thr->th.th_team_nproc;2191}21922193unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS)(2194void (*task)(void *), void *data, unsigned num_threads,2195unsigned int flags) {2196MKLOC(loc, "GOMP_parallel_reductions");2197int gtid = __kmp_entry_gtid();2198KA_TRACE(20, ("GOMP_parallel_reductions: T#%d\n", gtid));2199__kmp_GOMP_fork_call(&loc, gtid, num_threads, flags, task,2200(microtask_t)__kmp_GOMP_par_reductions_microtask_wrapper,22012, task, data);2202unsigned retval =2203__kmp_GOMP_par_reductions_microtask_wrapper(>id, NULL, task, data);2204KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();2205KA_TRACE(20, ("GOMP_parallel_reductions exit: T#%d\n", gtid));2206return retval;2207}22082209bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_START)(2210long start, long end, long incr, long sched, long chunk_size, long *istart,2211long *iend, uintptr_t *reductions, void **mem) {2212int status = 0;2213int gtid = __kmp_entry_gtid();2214KA_TRACE(20, ("GOMP_loop_start: T#%d, reductions: %p\n", gtid, reductions));2215if (reductions)2216__kmp_GOMP_init_reductions(gtid, reductions, 1);2217if (mem)2218KMP_FATAL(GompFeatureNotSupported, "scan");2219if (istart == NULL)2220return true;2221const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2222long monotonic = sched & MONOTONIC_FLAG;2223sched &= ~MONOTONIC_FLAG;2224if (sched == 0) {2225if (monotonic)2226status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_RUNTIME_START)(2227start, end, incr, istart, iend);2228else2229status = KMP_EXPAND_NAME(2230KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START)(2231start, end, incr, istart, iend);2232} else if (sched == 1) {2233status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_STATIC_START)(2234start, end, incr, chunk_size, istart, iend);2235} else if (sched == 2) {2236if (monotonic)2237status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START)(2238start, end, incr, chunk_size, istart, iend);2239else2240status =2241KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START)(2242start, end, incr, chunk_size, istart, iend);2243} else if (sched == 3) {2244if (monotonic)2245status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_GUIDED_START)(2246start, end, incr, chunk_size, istart, iend);2247else2248status =2249KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START)(2250start, end, incr, chunk_size, istart, iend);2251} else if (sched == 4) {2252status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START)(2253start, end, incr, istart, iend);2254} else {2255KMP_ASSERT(0);2256}2257return status;2258}22592260bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_START)(2261bool up, unsigned long long start, unsigned long long end,2262unsigned long long incr, long sched, unsigned long long chunk_size,2263unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,2264void **mem) {2265int status = 0;2266int gtid = __kmp_entry_gtid();2267KA_TRACE(20,2268("GOMP_loop_ull_start: T#%d, reductions: %p\n", gtid, reductions));2269if (reductions)2270__kmp_GOMP_init_reductions(gtid, reductions, 1);2271if (mem)2272KMP_FATAL(GompFeatureNotSupported, "scan");2273if (istart == NULL)2274return true;2275const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2276long monotonic = sched & MONOTONIC_FLAG;2277sched &= ~MONOTONIC_FLAG;2278if (sched == 0) {2279if (monotonic)2280status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START)(2281up, start, end, incr, istart, iend);2282else2283status = KMP_EXPAND_NAME(2284KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START)(2285up, start, end, incr, istart, iend);2286} else if (sched == 1) {2287status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START)(2288up, start, end, incr, chunk_size, istart, iend);2289} else if (sched == 2) {2290if (monotonic)2291status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START)(2292up, start, end, incr, chunk_size, istart, iend);2293else2294status = KMP_EXPAND_NAME(2295KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START)(2296up, start, end, incr, chunk_size, istart, iend);2297} else if (sched == 3) {2298if (monotonic)2299status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START)(2300up, start, end, incr, chunk_size, istart, iend);2301else2302status =2303KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START)(2304up, start, end, incr, chunk_size, istart, iend);2305} else if (sched == 4) {2306status =2307KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START)(2308up, start, end, incr, istart, iend);2309} else {2310KMP_ASSERT(0);2311}2312return status;2313}23142315bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_START)(2316unsigned ncounts, long *counts, long sched, long chunk_size, long *istart,2317long *iend, uintptr_t *reductions, void **mem) {2318int status = 0;2319int gtid = __kmp_entry_gtid();2320KA_TRACE(20, ("GOMP_loop_doacross_start: T#%d, reductions: %p\n", gtid,2321reductions));2322if (reductions)2323__kmp_GOMP_init_reductions(gtid, reductions, 1);2324if (mem)2325KMP_FATAL(GompFeatureNotSupported, "scan");2326if (istart == NULL)2327return true;2328// Ignore any monotonic flag2329const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2330sched &= ~MONOTONIC_FLAG;2331if (sched == 0) {2332status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START)(2333ncounts, counts, istart, iend);2334} else if (sched == 1) {2335status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START)(2336ncounts, counts, chunk_size, istart, iend);2337} else if (sched == 2) {2338status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START)(2339ncounts, counts, chunk_size, istart, iend);2340} else if (sched == 3) {2341status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START)(2342ncounts, counts, chunk_size, istart, iend);2343} else {2344KMP_ASSERT(0);2345}2346return status;2347}23482349bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START)(2350unsigned ncounts, unsigned long long *counts, long sched,2351unsigned long long chunk_size, unsigned long long *istart,2352unsigned long long *iend, uintptr_t *reductions, void **mem) {2353int status = 0;2354int gtid = __kmp_entry_gtid();2355KA_TRACE(20, ("GOMP_loop_ull_doacross_start: T#%d, reductions: %p\n", gtid,2356reductions));2357if (reductions)2358__kmp_GOMP_init_reductions(gtid, reductions, 1);2359if (mem)2360KMP_FATAL(GompFeatureNotSupported, "scan");2361if (istart == NULL)2362return true;2363// Ignore any monotonic flag2364const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2365sched &= ~MONOTONIC_FLAG;2366if (sched == 0) {2367status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START)(2368ncounts, counts, istart, iend);2369} else if (sched == 1) {2370status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START)(2371ncounts, counts, chunk_size, istart, iend);2372} else if (sched == 2) {2373status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START)(2374ncounts, counts, chunk_size, istart, iend);2375} else if (sched == 3) {2376status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START)(2377ncounts, counts, chunk_size, istart, iend);2378} else {2379KMP_ASSERT(0);2380}2381return status;2382}23832384bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_START)(2385long start, long end, long incr, long sched, long chunk_size, long *istart,2386long *iend, uintptr_t *reductions, void **mem) {2387int status = 0;2388int gtid = __kmp_entry_gtid();2389KA_TRACE(20, ("GOMP_loop_ordered_start: T#%d, reductions: %p\n", gtid,2390reductions));2391if (reductions)2392__kmp_GOMP_init_reductions(gtid, reductions, 1);2393if (mem)2394KMP_FATAL(GompFeatureNotSupported, "scan");2395if (istart == NULL)2396return true;2397// Ignore any monotonic flag2398const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2399sched &= ~MONOTONIC_FLAG;2400if (sched == 0) {2401status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START)(2402start, end, incr, istart, iend);2403} else if (sched == 1) {2404status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START)(2405start, end, incr, chunk_size, istart, iend);2406} else if (sched == 2) {2407status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START)(2408start, end, incr, chunk_size, istart, iend);2409} else if (sched == 3) {2410status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START)(2411start, end, incr, chunk_size, istart, iend);2412} else {2413KMP_ASSERT(0);2414}2415return status;2416}24172418bool KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START)(2419bool up, unsigned long long start, unsigned long long end,2420unsigned long long incr, long sched, unsigned long long chunk_size,2421unsigned long long *istart, unsigned long long *iend, uintptr_t *reductions,2422void **mem) {2423int status = 0;2424int gtid = __kmp_entry_gtid();2425KA_TRACE(20, ("GOMP_loop_ull_ordered_start: T#%d, reductions: %p\n", gtid,2426reductions));2427if (reductions)2428__kmp_GOMP_init_reductions(gtid, reductions, 1);2429if (mem)2430KMP_FATAL(GompFeatureNotSupported, "scan");2431if (istart == NULL)2432return true;2433// Ignore any monotonic flag2434const long MONOTONIC_FLAG = (long)(kmp_sched_monotonic);2435sched &= ~MONOTONIC_FLAG;2436if (sched == 0) {2437status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START)(2438up, start, end, incr, istart, iend);2439} else if (sched == 1) {2440status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START)(2441up, start, end, incr, chunk_size, istart, iend);2442} else if (sched == 2) {2443status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START)(2444up, start, end, incr, chunk_size, istart, iend);2445} else if (sched == 3) {2446status = KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START)(2447up, start, end, incr, chunk_size, istart, iend);2448} else {2449KMP_ASSERT(0);2450}2451return status;2452}24532454unsigned KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS2_START)(2455unsigned count, uintptr_t *reductions, void **mem) {2456int gtid = __kmp_entry_gtid();2457KA_TRACE(20,2458("GOMP_sections2_start: T#%d, reductions: %p\n", gtid, reductions));2459if (reductions)2460__kmp_GOMP_init_reductions(gtid, reductions, 1);2461if (mem)2462KMP_FATAL(GompFeatureNotSupported, "scan");2463return KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_START)(count);2464}24652466void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER)(2467bool cancelled) {2468int gtid = __kmp_get_gtid();2469MKLOC(loc, "GOMP_workshare_task_reduction_unregister");2470KA_TRACE(20, ("GOMP_workshare_task_reduction_unregister: T#%d\n", gtid));2471kmp_info_t *thr = __kmp_threads[gtid];2472kmp_team_t *team = thr->th.th_team;2473__kmpc_end_taskgroup(NULL, gtid);2474// If last thread out of workshare, then reset the team's reduce data2475// the GOMP_taskgroup_reduction_unregister() function will deallocate2476// private copies after reduction calculations take place.2477int count = KMP_ATOMIC_INC(&team->t.t_tg_fini_counter[1]);2478if (count == thr->th.th_team_nproc - 1) {2479KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER)2480((uintptr_t *)KMP_ATOMIC_LD_RLX(&team->t.t_tg_reduce_data[1]));2481KMP_ATOMIC_ST_REL(&team->t.t_tg_reduce_data[1], NULL);2482KMP_ATOMIC_ST_REL(&team->t.t_tg_fini_counter[1], 0);2483}2484if (!cancelled) {2485__kmpc_barrier(&loc, gtid);2486}2487}24882489// allocator construct2490void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_ALLOC)(size_t alignment, size_t size,2491uintptr_t allocator) {2492int gtid = __kmp_entry_gtid();2493KA_TRACE(20, ("GOMP_alloc: T#%d\n", gtid));2494#if OMPT_SUPPORT && OMPT_OPTIONAL2495OMPT_STORE_RETURN_ADDRESS(gtid);2496#endif2497return __kmp_alloc(gtid, alignment, size, (omp_allocator_handle_t)allocator);2498}24992500void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_FREE)(void *ptr, uintptr_t allocator) {2501int gtid = __kmp_entry_gtid();2502KA_TRACE(20, ("GOMP_free: T#%d\n", gtid));2503#if OMPT_SUPPORT && OMPT_OPTIONAL2504OMPT_STORE_RETURN_ADDRESS(gtid);2505#endif2506return ___kmpc_free(gtid, ptr, (omp_allocator_handle_t)allocator);2507}25082509/* The following sections of code create aliases for the GOMP_* functions, then2510create versioned symbols using the assembler directive .symver. This is only2511pertinent for ELF .so library. The KMP_VERSION_SYMBOL macro is defined in2512kmp_os.h */25132514#ifdef KMP_USE_VERSION_SYMBOLS2515// GOMP_1.0 versioned symbols2516KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_END, 10, "GOMP_1.0");2517KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ATOMIC_START, 10, "GOMP_1.0");2518KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER, 10, "GOMP_1.0");2519KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_END, 10, "GOMP_1.0");2520KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_END, 10, "GOMP_1.0");2521KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_NAME_START, 10, "GOMP_1.0");2522KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CRITICAL_START, 10, "GOMP_1.0");2523KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT, 10, "GOMP_1.0");2524KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START, 10, "GOMP_1.0");2525KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END, 10, "GOMP_1.0");2526KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_NOWAIT, 10, "GOMP_1.0");2527KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT, 10, "GOMP_1.0");2528KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_GUIDED_START, 10, "GOMP_1.0");2529KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT, 10, "GOMP_1.0");2530KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START, 10,2531"GOMP_1.0");2532KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT, 10, "GOMP_1.0");2533KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START, 10, "GOMP_1.0");2534KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT, 10, "GOMP_1.0");2535KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START, 10,2536"GOMP_1.0");2537KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT, 10, "GOMP_1.0");2538KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START, 10, "GOMP_1.0");2539KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT, 10, "GOMP_1.0");2540KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_RUNTIME_START, 10, "GOMP_1.0");2541KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT, 10, "GOMP_1.0");2542KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_STATIC_START, 10, "GOMP_1.0");2543KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_END, 10, "GOMP_1.0");2544KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ORDERED_START, 10, "GOMP_1.0");2545KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_END, 10, "GOMP_1.0");2546KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START, 10,2547"GOMP_1.0");2548KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START, 10,2549"GOMP_1.0");2550KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START, 10,2551"GOMP_1.0");2552KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START, 10,2553"GOMP_1.0");2554KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START, 10, "GOMP_1.0");2555KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_START, 10, "GOMP_1.0");2556KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END, 10, "GOMP_1.0");2557KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT, 10, "GOMP_1.0");2558KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_NEXT, 10, "GOMP_1.0");2559KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_START, 10, "GOMP_1.0");2560KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_END, 10, "GOMP_1.0");2561KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_COPY_START, 10, "GOMP_1.0");2562KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SINGLE_START, 10, "GOMP_1.0");25632564// GOMP_2.0 versioned symbols2565KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK, 20, "GOMP_2.0");2566KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT, 20, "GOMP_2.0");2567KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT, 20, "GOMP_2.0");2568KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START, 20, "GOMP_2.0");2569KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT, 20, "GOMP_2.0");2570KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START, 20, "GOMP_2.0");2571KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT, 20,2572"GOMP_2.0");2573KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START, 20,2574"GOMP_2.0");2575KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT, 20,2576"GOMP_2.0");2577KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START, 20,2578"GOMP_2.0");2579KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT, 20,2580"GOMP_2.0");2581KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START, 20,2582"GOMP_2.0");2583KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT, 20,2584"GOMP_2.0");2585KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START, 20,2586"GOMP_2.0");2587KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT, 20, "GOMP_2.0");2588KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START, 20, "GOMP_2.0");2589KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT, 20, "GOMP_2.0");2590KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START, 20, "GOMP_2.0");25912592// GOMP_3.0 versioned symbols2593KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKYIELD, 30, "GOMP_3.0");25942595// GOMP_4.0 versioned symbols2596KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL, 40, "GOMP_4.0");2597KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_SECTIONS, 40, "GOMP_4.0");2598KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC, 40, "GOMP_4.0");2599KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED, 40, "GOMP_4.0");2600KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME, 40, "GOMP_4.0");2601KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC, 40, "GOMP_4.0");2602KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_START, 40, "GOMP_4.0");2603KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_END, 40, "GOMP_4.0");2604KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_BARRIER_CANCEL, 40, "GOMP_4.0");2605KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCEL, 40, "GOMP_4.0");2606KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_CANCELLATION_POINT, 40, "GOMP_4.0");2607KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_END_CANCEL, 40, "GOMP_4.0");2608KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL, 40, "GOMP_4.0");2609KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET, 40, "GOMP_4.0");2610KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_DATA, 40, "GOMP_4.0");2611KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_END_DATA, 40, "GOMP_4.0");2612KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TARGET_UPDATE, 40, "GOMP_4.0");2613KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0");26142615// GOMP_4.5 versioned symbols2616KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP, 45, "GOMP_4.5");2617KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKLOOP_ULL, 45, "GOMP_4.5");2618KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_POST, 45, "GOMP_4.5");2619KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_WAIT, 45, "GOMP_4.5");2620KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_STATIC_START, 45,2621"GOMP_4.5");2622KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_DYNAMIC_START, 45,2623"GOMP_4.5");2624KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_GUIDED_START, 45,2625"GOMP_4.5");2626KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_RUNTIME_START, 45,2627"GOMP_4.5");2628KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_POST, 45, "GOMP_4.5");2629KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_DOACROSS_ULL_WAIT, 45, "GOMP_4.5");2630KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_STATIC_START, 45,2631"GOMP_4.5");2632KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_DYNAMIC_START, 45,2633"GOMP_4.5");2634KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_GUIDED_START, 45,2635"GOMP_4.5");2636KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_RUNTIME_START, 45,2637"GOMP_4.5");2638KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_START, 45,2639"GOMP_4.5");2640KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_DYNAMIC_NEXT, 45,2641"GOMP_4.5");2642KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_START, 45,2643"GOMP_4.5");2644KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_GUIDED_NEXT, 45,2645"GOMP_4.5");2646KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_START, 45,2647"GOMP_4.5");2648KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_DYNAMIC_NEXT, 45,2649"GOMP_4.5");2650KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_START, 45,2651"GOMP_4.5");2652KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_GUIDED_NEXT, 45,2653"GOMP_4.5");2654KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_DYNAMIC, 45,2655"GOMP_4.5");2656KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_GUIDED, 45,2657"GOMP_4.5");26582659// GOMP_5.0 versioned symbols2660KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_NEXT, 50,2661"GOMP_5.0");2662KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_MAYBE_NONMONOTONIC_RUNTIME_START, 50,2663"GOMP_5.0");2664KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_NEXT, 50,2665"GOMP_5.0");2666KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_NONMONOTONIC_RUNTIME_START, 50,2667"GOMP_5.0");2668KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_NEXT,266950, "GOMP_5.0");2670KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_MAYBE_NONMONOTONIC_RUNTIME_START,267150, "GOMP_5.0");2672KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_NEXT, 50,2673"GOMP_5.0");2674KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_NONMONOTONIC_RUNTIME_START, 50,2675"GOMP_5.0");2676KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_NONMONOTONIC_RUNTIME, 50,2677"GOMP_5.0");2678KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_LOOP_MAYBE_NONMONOTONIC_RUNTIME,267950, "GOMP_5.0");2680KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TEAMS_REG, 50, "GOMP_5.0");2681KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKWAIT_DEPEND, 50, "GOMP_5.0");2682KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_REGISTER, 50,2683"GOMP_5.0");2684KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASKGROUP_REDUCTION_UNREGISTER, 50,2685"GOMP_5.0");2686KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_TASK_REDUCTION_REMAP, 50, "GOMP_5.0");2687KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_PARALLEL_REDUCTIONS, 50, "GOMP_5.0");2688KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_START, 50, "GOMP_5.0");2689KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_START, 50, "GOMP_5.0");2690KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_DOACROSS_START, 50, "GOMP_5.0");2691KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_DOACROSS_START, 50, "GOMP_5.0");2692KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ORDERED_START, 50, "GOMP_5.0");2693KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_START, 50, "GOMP_5.0");2694KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_SECTIONS2_START, 50, "GOMP_5.0");2695KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_WORKSHARE_TASK_REDUCTION_UNREGISTER, 50,2696"GOMP_5.0");26972698// GOMP_5.0.1 versioned symbols2699KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_ALLOC, 501, "GOMP_5.0.1");2700KMP_VERSION_SYMBOL(KMP_API_NAME_GOMP_FREE, 501, "GOMP_5.0.1");2701#endif // KMP_USE_VERSION_SYMBOLS27022703#ifdef __cplusplus2704} // extern "C"2705#endif // __cplusplus270627072708