Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_runtime.cpp
35258 views
/*1* kmp_runtime.cpp -- KPTS runtime support library2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#include "kmp.h"13#include "kmp_affinity.h"14#include "kmp_atomic.h"15#include "kmp_environment.h"16#include "kmp_error.h"17#include "kmp_i18n.h"18#include "kmp_io.h"19#include "kmp_itt.h"20#include "kmp_settings.h"21#include "kmp_stats.h"22#include "kmp_str.h"23#include "kmp_wait_release.h"24#include "kmp_wrapper_getpid.h"25#include "kmp_dispatch.h"26#include "kmp_utils.h"27#if KMP_USE_HIER_SCHED28#include "kmp_dispatch_hier.h"29#endif3031#if OMPT_SUPPORT32#include "ompt-specific.h"33#endif34#if OMPD_SUPPORT35#include "ompd-specific.h"36#endif3738#if OMP_PROFILING_SUPPORT39#include "llvm/Support/TimeProfiler.h"40static char *ProfileTraceFile = nullptr;41#endif4243/* these are temporary issues to be dealt with */44#define KMP_USE_PRCTL 04546#if KMP_OS_WINDOWS47#include <process.h>48#endif4950#ifndef KMP_USE_SHM51// Windows and WASI do not need these include files as they don't use shared52// memory.53#else54#include <sys/mman.h>55#include <sys/stat.h>56#include <fcntl.h>57#define SHM_SIZE 102458#endif5960#if defined(KMP_GOMP_COMPAT)61char const __kmp_version_alt_comp[] =62KMP_VERSION_PREFIX "alternative compiler support: yes";63#endif /* defined(KMP_GOMP_COMPAT) */6465char const __kmp_version_omp_api[] =66KMP_VERSION_PREFIX "API version: 5.0 (201611)";6768#ifdef KMP_DEBUG69char const __kmp_version_lock[] =70KMP_VERSION_PREFIX "lock type: run time selectable";71#endif /* KMP_DEBUG */7273#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))7475/* ------------------------------------------------------------------------ */7677#if KMP_USE_MONITOR78kmp_info_t __kmp_monitor;79#endif8081/* Forward declarations */8283void __kmp_cleanup(void);8485static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid,86int gtid);87static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,88kmp_internal_control_t *new_icvs,89ident_t *loc);90#if KMP_AFFINITY_SUPPORTED91static void __kmp_partition_places(kmp_team_t *team,92int update_master_only = 0);93#endif94static void __kmp_do_serial_initialize(void);95void __kmp_fork_barrier(int gtid, int tid);96void __kmp_join_barrier(int gtid);97void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc,98kmp_internal_control_t *new_icvs, ident_t *loc);99100#ifdef USE_LOAD_BALANCE101static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc);102#endif103104static int __kmp_expand_threads(int nNeed);105#if KMP_OS_WINDOWS106static int __kmp_unregister_root_other_thread(int gtid);107#endif108static void __kmp_reap_thread(kmp_info_t *thread, int is_root);109kmp_info_t *__kmp_thread_pool_insert_pt = NULL;110111void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,112int new_nthreads);113void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads);114115static kmp_nested_nthreads_t *__kmp_override_nested_nth(kmp_info_t *thr,116int level) {117kmp_nested_nthreads_t *new_nested_nth =118(kmp_nested_nthreads_t *)KMP_INTERNAL_MALLOC(119sizeof(kmp_nested_nthreads_t));120int new_size = level + thr->th.th_set_nested_nth_sz;121new_nested_nth->nth = (int *)KMP_INTERNAL_MALLOC(new_size * sizeof(int));122for (int i = 0; i < level + 1; ++i)123new_nested_nth->nth[i] = 0;124for (int i = level + 1, j = 1; i < new_size; ++i, ++j)125new_nested_nth->nth[i] = thr->th.th_set_nested_nth[j];126new_nested_nth->size = new_nested_nth->used = new_size;127return new_nested_nth;128}129130/* Calculate the identifier of the current thread */131/* fast (and somewhat portable) way to get unique identifier of executing132thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */133int __kmp_get_global_thread_id() {134int i;135kmp_info_t **other_threads;136size_t stack_data;137char *stack_addr;138size_t stack_size;139char *stack_base;140141KA_TRACE(1421000,143("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",144__kmp_nth, __kmp_all_nth));145146/* JPH - to handle the case where __kmpc_end(0) is called immediately prior to147a parallel region, made it return KMP_GTID_DNE to force serial_initialize148by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee149__kmp_init_gtid for this to work. */150151if (!TCR_4(__kmp_init_gtid))152return KMP_GTID_DNE;153154#ifdef KMP_TDATA_GTID155if (TCR_4(__kmp_gtid_mode) >= 3) {156KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n"));157return __kmp_gtid;158}159#endif160if (TCR_4(__kmp_gtid_mode) >= 2) {161KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n"));162return __kmp_gtid_get_specific();163}164KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n"));165166stack_addr = (char *)&stack_data;167other_threads = __kmp_threads;168169/* ATT: The code below is a source of potential bugs due to unsynchronized170access to __kmp_threads array. For example:1711. Current thread loads other_threads[i] to thr and checks it, it is172non-NULL.1732. Current thread is suspended by OS.1743. Another thread unregisters and finishes (debug versions of free()175may fill memory with something like 0xEF).1764. Current thread is resumed.1775. Current thread reads junk from *thr.178TODO: Fix it. --ln */179180for (i = 0; i < __kmp_threads_capacity; i++) {181182kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);183if (!thr)184continue;185186stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);187stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);188189/* stack grows down -- search through all of the active threads */190191if (stack_addr <= stack_base) {192size_t stack_diff = stack_base - stack_addr;193194if (stack_diff <= stack_size) {195/* The only way we can be closer than the allocated */196/* stack size is if we are running on this thread. */197// __kmp_gtid_get_specific can return negative value because this198// function can be called by thread destructor. However, before the199// thread destructor is called, the value of the corresponding200// thread-specific data will be reset to NULL.201KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||202__kmp_gtid_get_specific() == i);203return i;204}205}206}207208/* get specific to try and determine our gtid */209KA_TRACE(1000,210("*** __kmp_get_global_thread_id: internal alg. failed to find "211"thread, using TLS\n"));212i = __kmp_gtid_get_specific();213214/*fprintf( stderr, "=== %d\n", i ); */ /* GROO */215216/* if we havn't been assigned a gtid, then return code */217if (i < 0)218return i;219220// other_threads[i] can be nullptr at this point because the corresponding221// thread could have already been destructed. It can happen when this function222// is called in end library routine.223if (!TCR_SYNC_PTR(other_threads[i]))224return i;225226/* dynamically updated stack window for uber threads to avoid get_specific227call */228if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {229KMP_FATAL(StackOverflow, i);230}231232stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;233if (stack_addr > stack_base) {234TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);235TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,236other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -237stack_base);238} else {239TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,240stack_base - stack_addr);241}242243/* Reprint stack bounds for ubermaster since they have been refined */244if (__kmp_storage_map) {245char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase;246char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;247__kmp_print_storage_map_gtid(i, stack_beg, stack_end,248other_threads[i]->th.th_info.ds.ds_stacksize,249"th_%d stack (refinement)", i);250}251return i;252}253254int __kmp_get_global_thread_id_reg() {255int gtid;256257if (!__kmp_init_serial) {258gtid = KMP_GTID_DNE;259} else260#ifdef KMP_TDATA_GTID261if (TCR_4(__kmp_gtid_mode) >= 3) {262KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n"));263gtid = __kmp_gtid;264} else265#endif266if (TCR_4(__kmp_gtid_mode) >= 2) {267KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));268gtid = __kmp_gtid_get_specific();269} else {270KA_TRACE(1000,271("*** __kmp_get_global_thread_id_reg: using internal alg.\n"));272gtid = __kmp_get_global_thread_id();273}274275/* we must be a new uber master sibling thread */276if (gtid == KMP_GTID_DNE) {277KA_TRACE(10,278("__kmp_get_global_thread_id_reg: Encountered new root thread. "279"Registering a new gtid.\n"));280__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);281if (!__kmp_init_serial) {282__kmp_do_serial_initialize();283gtid = __kmp_gtid_get_specific();284} else {285gtid = __kmp_register_root(FALSE);286}287__kmp_release_bootstrap_lock(&__kmp_initz_lock);288/*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */289}290291KMP_DEBUG_ASSERT(gtid >= 0);292293return gtid;294}295296/* caller must hold forkjoin_lock */297void __kmp_check_stack_overlap(kmp_info_t *th) {298int f;299char *stack_beg = NULL;300char *stack_end = NULL;301int gtid;302303KA_TRACE(10, ("__kmp_check_stack_overlap: called\n"));304if (__kmp_storage_map) {305stack_end = (char *)th->th.th_info.ds.ds_stackbase;306stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;307308gtid = __kmp_gtid_from_thread(th);309310if (gtid == KMP_GTID_MONITOR) {311__kmp_print_storage_map_gtid(312gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,313"th_%s stack (%s)", "mon",314(th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");315} else {316__kmp_print_storage_map_gtid(317gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,318"th_%d stack (%s)", gtid,319(th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual");320}321}322323/* No point in checking ubermaster threads since they use refinement and324* cannot overlap */325gtid = __kmp_gtid_from_thread(th);326if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {327KA_TRACE(10,328("__kmp_check_stack_overlap: performing extensive checking\n"));329if (stack_beg == NULL) {330stack_end = (char *)th->th.th_info.ds.ds_stackbase;331stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;332}333334for (f = 0; f < __kmp_threads_capacity; f++) {335kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);336337if (f_th && f_th != th) {338char *other_stack_end =339(char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);340char *other_stack_beg =341other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);342if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||343(stack_end > other_stack_beg && stack_end < other_stack_end)) {344345/* Print the other stack values before the abort */346if (__kmp_storage_map)347__kmp_print_storage_map_gtid(348-1, other_stack_beg, other_stack_end,349(size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),350"th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));351352__kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),353__kmp_msg_null);354}355}356}357}358KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n"));359}360361/* ------------------------------------------------------------------------ */362363void __kmp_infinite_loop(void) {364static int done = FALSE;365366while (!done) {367KMP_YIELD(TRUE);368}369}370371#define MAX_MESSAGE 512372373void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size,374char const *format, ...) {375char buffer[MAX_MESSAGE];376va_list ap;377378va_start(ap, format);379KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1,380p2, (unsigned long)size, format);381__kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);382__kmp_vprintf(kmp_err, buffer, ap);383#if KMP_PRINT_DATA_PLACEMENT384int node;385if (gtid >= 0) {386if (p1 <= p2 && (char *)p2 - (char *)p1 == size) {387if (__kmp_storage_map_verbose) {388node = __kmp_get_host_node(p1);389if (node < 0) /* doesn't work, so don't try this next time */390__kmp_storage_map_verbose = FALSE;391else {392char *last;393int lastNode;394int localProc = __kmp_get_cpu_from_gtid(gtid);395396const int page_size = KMP_GET_PAGE_SIZE();397398p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1));399p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1));400if (localProc >= 0)401__kmp_printf_no_lock(" GTID %d localNode %d\n", gtid,402localProc >> 1);403else404__kmp_printf_no_lock(" GTID %d\n", gtid);405#if KMP_USE_PRCTL406/* The more elaborate format is disabled for now because of the prctl407* hanging bug. */408do {409last = p1;410lastNode = node;411/* This loop collates adjacent pages with the same host node. */412do {413(char *)p1 += page_size;414} while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);415__kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1,416lastNode);417} while (p1 <= p2);418#else419__kmp_printf_no_lock(" %p-%p memNode %d\n", p1,420(char *)p1 + (page_size - 1),421__kmp_get_host_node(p1));422if (p1 < p2) {423__kmp_printf_no_lock(" %p-%p memNode %d\n", p2,424(char *)p2 + (page_size - 1),425__kmp_get_host_node(p2));426}427#endif428}429}430} else431__kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning));432}433#endif /* KMP_PRINT_DATA_PLACEMENT */434__kmp_release_bootstrap_lock(&__kmp_stdio_lock);435436va_end(ap);437}438439void __kmp_warn(char const *format, ...) {440char buffer[MAX_MESSAGE];441va_list ap;442443if (__kmp_generate_warnings == kmp_warnings_off) {444return;445}446447va_start(ap, format);448449KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format);450__kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);451__kmp_vprintf(kmp_err, buffer, ap);452__kmp_release_bootstrap_lock(&__kmp_stdio_lock);453454va_end(ap);455}456457void __kmp_abort_process() {458// Later threads may stall here, but that's ok because abort() will kill them.459__kmp_acquire_bootstrap_lock(&__kmp_exit_lock);460461if (__kmp_debug_buf) {462__kmp_dump_debug_buffer();463}464465#if KMP_OS_WINDOWS466// Let other threads know of abnormal termination and prevent deadlock467// if abort happened during library initialization or shutdown468__kmp_global.g.g_abort = SIGABRT;469470/* On Windows* OS by default abort() causes pop-up error box, which stalls471nightly testing. Unfortunately, we cannot reliably suppress pop-up error472boxes. _set_abort_behavior() works well, but this function is not473available in VS7 (this is not problem for DLL, but it is a problem for474static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not475help, at least in some versions of MS C RTL.476477It seems following sequence is the only way to simulate abort() and478avoid pop-up error box. */479raise(SIGABRT);480_exit(3); // Just in case, if signal ignored, exit anyway.481#else482__kmp_unregister_library();483abort();484#endif485486__kmp_infinite_loop();487__kmp_release_bootstrap_lock(&__kmp_exit_lock);488489} // __kmp_abort_process490491void __kmp_abort_thread(void) {492// TODO: Eliminate g_abort global variable and this function.493// In case of abort just call abort(), it will kill all the threads.494__kmp_infinite_loop();495} // __kmp_abort_thread496497/* Print out the storage map for the major kmp_info_t thread data structures498that are allocated together. */499500static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) {501__kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d",502gtid);503504__kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,505sizeof(kmp_desc_t), "th_%d.th_info", gtid);506507__kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,508sizeof(kmp_local_t), "th_%d.th_local", gtid);509510__kmp_print_storage_map_gtid(511gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],512sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid);513514__kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],515&thr->th.th_bar[bs_plain_barrier + 1],516sizeof(kmp_balign_t), "th_%d.th_bar[plain]",517gtid);518519__kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],520&thr->th.th_bar[bs_forkjoin_barrier + 1],521sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]",522gtid);523524#if KMP_FAST_REDUCTION_BARRIER525__kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],526&thr->th.th_bar[bs_reduction_barrier + 1],527sizeof(kmp_balign_t), "th_%d.th_bar[reduction]",528gtid);529#endif // KMP_FAST_REDUCTION_BARRIER530}531532/* Print out the storage map for the major kmp_team_t team data structures533that are allocated together. */534535static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team,536int team_id, int num_thr) {537int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;538__kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d",539header, team_id);540541__kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],542&team->t.t_bar[bs_last_barrier],543sizeof(kmp_balign_team_t) * bs_last_barrier,544"%s_%d.t_bar", header, team_id);545546__kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],547&team->t.t_bar[bs_plain_barrier + 1],548sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]",549header, team_id);550551__kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],552&team->t.t_bar[bs_forkjoin_barrier + 1],553sizeof(kmp_balign_team_t),554"%s_%d.t_bar[forkjoin]", header, team_id);555556#if KMP_FAST_REDUCTION_BARRIER557__kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],558&team->t.t_bar[bs_reduction_barrier + 1],559sizeof(kmp_balign_team_t),560"%s_%d.t_bar[reduction]", header, team_id);561#endif // KMP_FAST_REDUCTION_BARRIER562563__kmp_print_storage_map_gtid(564-1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],565sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id);566567__kmp_print_storage_map_gtid(568-1, &team->t.t_threads[0], &team->t.t_threads[num_thr],569sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id);570571__kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],572&team->t.t_disp_buffer[num_disp_buff],573sizeof(dispatch_shared_info_t) * num_disp_buff,574"%s_%d.t_disp_buffer", header, team_id);575}576577static void __kmp_init_allocator() {578__kmp_init_memkind();579__kmp_init_target_mem();580}581static void __kmp_fini_allocator() { __kmp_fini_memkind(); }582583/* ------------------------------------------------------------------------ */584585#if ENABLE_LIBOMPTARGET586static void __kmp_init_omptarget() {587__kmp_init_target_task();588}589#endif590591/* ------------------------------------------------------------------------ */592593#if KMP_DYNAMIC_LIB594#if KMP_OS_WINDOWS595596BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {597//__kmp_acquire_bootstrap_lock( &__kmp_initz_lock );598599switch (fdwReason) {600601case DLL_PROCESS_ATTACH:602KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n"));603604return TRUE;605606case DLL_PROCESS_DETACH:607KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));608609// According to Windows* documentation for DllMain entry point:610// for DLL_PROCESS_DETACH, lpReserved is used for telling the difference:611// lpReserved == NULL when FreeLibrary() is called,612// lpReserved != NULL when the process is terminated.613// When FreeLibrary() is called, worker threads remain alive. So the614// runtime's state is consistent and executing proper shutdown is OK.615// When the process is terminated, worker threads have exited or been616// forcefully terminated by the OS and only the shutdown thread remains.617// This can leave the runtime in an inconsistent state.618// Hence, only attempt proper cleanup when FreeLibrary() is called.619// Otherwise, rely on OS to reclaim resources.620if (lpReserved == NULL)621__kmp_internal_end_library(__kmp_gtid_get_specific());622623return TRUE;624625case DLL_THREAD_ATTACH:626KA_TRACE(10, ("DllMain: THREAD_ATTACH\n"));627628/* if we want to register new siblings all the time here call629* __kmp_get_gtid(); */630return TRUE;631632case DLL_THREAD_DETACH:633KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));634635__kmp_internal_end_thread(__kmp_gtid_get_specific());636return TRUE;637}638639return TRUE;640}641642#endif /* KMP_OS_WINDOWS */643#endif /* KMP_DYNAMIC_LIB */644645/* __kmp_parallel_deo -- Wait until it's our turn. */646void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {647int gtid = *gtid_ref;648#ifdef BUILD_PARALLEL_ORDERED649kmp_team_t *team = __kmp_team_from_gtid(gtid);650#endif /* BUILD_PARALLEL_ORDERED */651652if (__kmp_env_consistency_check) {653if (__kmp_threads[gtid]->th.th_root->r.r_active)654#if KMP_USE_DYNAMIC_LOCK655__kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);656#else657__kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);658#endif659}660#ifdef BUILD_PARALLEL_ORDERED661if (!team->t.t_serialized) {662KMP_MB();663KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,664NULL);665KMP_MB();666}667#endif /* BUILD_PARALLEL_ORDERED */668}669670/* __kmp_parallel_dxo -- Signal the next task. */671void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {672int gtid = *gtid_ref;673#ifdef BUILD_PARALLEL_ORDERED674int tid = __kmp_tid_from_gtid(gtid);675kmp_team_t *team = __kmp_team_from_gtid(gtid);676#endif /* BUILD_PARALLEL_ORDERED */677678if (__kmp_env_consistency_check) {679if (__kmp_threads[gtid]->th.th_root->r.r_active)680__kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);681}682#ifdef BUILD_PARALLEL_ORDERED683if (!team->t.t_serialized) {684KMP_MB(); /* Flush all pending memory write invalidates. */685686/* use the tid of the next thread in this team */687/* TODO replace with general release procedure */688team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);689690KMP_MB(); /* Flush all pending memory write invalidates. */691}692#endif /* BUILD_PARALLEL_ORDERED */693}694695/* ------------------------------------------------------------------------ */696/* The BARRIER for a SINGLE process section is always explicit */697698int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) {699int status;700kmp_info_t *th;701kmp_team_t *team;702703if (!TCR_4(__kmp_init_parallel))704__kmp_parallel_initialize();705__kmp_resume_if_soft_paused();706707th = __kmp_threads[gtid];708team = th->th.th_team;709status = 0;710711th->th.th_ident = id_ref;712713if (team->t.t_serialized) {714status = 1;715} else {716kmp_int32 old_this = th->th.th_local.this_construct;717718++th->th.th_local.this_construct;719/* try to set team count to thread count--success means thread got the720single block */721/* TODO: Should this be acquire or release? */722if (team->t.t_construct == old_this) {723status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,724th->th.th_local.this_construct);725}726#if USE_ITT_BUILD727if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&728KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&729team->t.t_active_level == 1) {730// Only report metadata by primary thread of active team at level 1731__kmp_itt_metadata_single(id_ref);732}733#endif /* USE_ITT_BUILD */734}735736if (__kmp_env_consistency_check) {737if (status && push_ws) {738__kmp_push_workshare(gtid, ct_psingle, id_ref);739} else {740__kmp_check_workshare(gtid, ct_psingle, id_ref);741}742}743#if USE_ITT_BUILD744if (status) {745__kmp_itt_single_start(gtid);746}747#endif /* USE_ITT_BUILD */748return status;749}750751void __kmp_exit_single(int gtid) {752#if USE_ITT_BUILD753__kmp_itt_single_end(gtid);754#endif /* USE_ITT_BUILD */755if (__kmp_env_consistency_check)756__kmp_pop_workshare(gtid, ct_psingle, NULL);757}758759/* determine if we can go parallel or must use a serialized parallel region and760* how many threads we can use761* set_nproc is the number of threads requested for the team762* returns 0 if we should serialize or only use one thread,763* otherwise the number of threads to use764* The forkjoin lock is held by the caller. */765static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,766int master_tid, int set_nthreads,767int enter_teams) {768int capacity;769int new_nthreads;770KMP_DEBUG_ASSERT(__kmp_init_serial);771KMP_DEBUG_ASSERT(root && parent_team);772kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];773774// If dyn-var is set, dynamically adjust the number of desired threads,775// according to the method specified by dynamic_mode.776new_nthreads = set_nthreads;777if (!get__dynamic_2(parent_team, master_tid)) {778;779}780#ifdef USE_LOAD_BALANCE781else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {782new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);783if (new_nthreads == 1) {784KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "785"reservation to 1 thread\n",786master_tid));787return 1;788}789if (new_nthreads < set_nthreads) {790KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced "791"reservation to %d threads\n",792master_tid, new_nthreads));793}794}795#endif /* USE_LOAD_BALANCE */796else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {797new_nthreads = __kmp_avail_proc - __kmp_nth +798(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);799if (new_nthreads <= 1) {800KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "801"reservation to 1 thread\n",802master_tid));803return 1;804}805if (new_nthreads < set_nthreads) {806KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced "807"reservation to %d threads\n",808master_tid, new_nthreads));809} else {810new_nthreads = set_nthreads;811}812} else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {813if (set_nthreads > 2) {814new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);815new_nthreads = (new_nthreads % set_nthreads) + 1;816if (new_nthreads == 1) {817KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "818"reservation to 1 thread\n",819master_tid));820return 1;821}822if (new_nthreads < set_nthreads) {823KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced "824"reservation to %d threads\n",825master_tid, new_nthreads));826}827}828} else {829KMP_ASSERT(0);830}831832// Respect KMP_ALL_THREADS/KMP_DEVICE_THREAD_LIMIT.833if (__kmp_nth + new_nthreads -834(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >835__kmp_max_nth) {836int tl_nthreads = __kmp_max_nth - __kmp_nth +837(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);838if (tl_nthreads <= 0) {839tl_nthreads = 1;840}841842// If dyn-var is false, emit a 1-time warning.843if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {844__kmp_reserve_warn = 1;845__kmp_msg(kmp_ms_warning,846KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),847KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);848}849if (tl_nthreads == 1) {850KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "851"reduced reservation to 1 thread\n",852master_tid));853return 1;854}855KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "856"reservation to %d threads\n",857master_tid, tl_nthreads));858new_nthreads = tl_nthreads;859}860861// Respect OMP_THREAD_LIMIT862int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;863int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;864if (cg_nthreads + new_nthreads -865(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >866max_cg_threads) {867int tl_nthreads = max_cg_threads - cg_nthreads +868(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);869if (tl_nthreads <= 0) {870tl_nthreads = 1;871}872873// If dyn-var is false, emit a 1-time warning.874if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {875__kmp_reserve_warn = 1;876__kmp_msg(kmp_ms_warning,877KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),878KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);879}880if (tl_nthreads == 1) {881KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "882"reduced reservation to 1 thread\n",883master_tid));884return 1;885}886KC_TRACE(10, ("__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "887"reservation to %d threads\n",888master_tid, tl_nthreads));889new_nthreads = tl_nthreads;890}891892// Check if the threads array is large enough, or needs expanding.893// See comment in __kmp_register_root() about the adjustment if894// __kmp_threads[0] == NULL.895capacity = __kmp_threads_capacity;896if (TCR_PTR(__kmp_threads[0]) == NULL) {897--capacity;898}899// If it is not for initializing the hidden helper team, we need to take900// __kmp_hidden_helper_threads_num out of the capacity because it is included901// in __kmp_threads_capacity.902if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {903capacity -= __kmp_hidden_helper_threads_num;904}905if (__kmp_nth + new_nthreads -906(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >907capacity) {908// Expand the threads array.909int slotsRequired = __kmp_nth + new_nthreads -910(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -911capacity;912int slotsAdded = __kmp_expand_threads(slotsRequired);913if (slotsAdded < slotsRequired) {914// The threads array was not expanded enough.915new_nthreads -= (slotsRequired - slotsAdded);916KMP_ASSERT(new_nthreads >= 1);917918// If dyn-var is false, emit a 1-time warning.919if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {920__kmp_reserve_warn = 1;921if (__kmp_tp_cached) {922__kmp_msg(kmp_ms_warning,923KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),924KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),925KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);926} else {927__kmp_msg(kmp_ms_warning,928KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),929KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);930}931}932}933}934935#ifdef KMP_DEBUG936if (new_nthreads == 1) {937KC_TRACE(10,938("__kmp_reserve_threads: T#%d serializing team after reclaiming "939"dead roots and rechecking; requested %d threads\n",940__kmp_get_gtid(), set_nthreads));941} else {942KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested"943" %d threads\n",944__kmp_get_gtid(), new_nthreads, set_nthreads));945}946#endif // KMP_DEBUG947948if (this_thr->th.th_nt_strict && new_nthreads < set_nthreads) {949__kmpc_error(this_thr->th.th_nt_loc, this_thr->th.th_nt_sev,950this_thr->th.th_nt_msg);951}952return new_nthreads;953}954955/* Allocate threads from the thread pool and assign them to the new team. We are956assured that there are enough threads available, because we checked on that957earlier within critical section forkjoin */958static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,959kmp_info_t *master_th, int master_gtid,960int fork_teams_workers) {961int i;962int use_hot_team;963964KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));965KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());966KMP_MB();967968/* first, let's setup the primary thread */969master_th->th.th_info.ds.ds_tid = 0;970master_th->th.th_team = team;971master_th->th.th_team_nproc = team->t.t_nproc;972master_th->th.th_team_master = master_th;973master_th->th.th_team_serialized = FALSE;974master_th->th.th_dispatch = &team->t.t_dispatch[0];975976/* make sure we are not the optimized hot team */977#if KMP_NESTED_HOT_TEAMS978use_hot_team = 0;979kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;980if (hot_teams) { // hot teams array is not allocated if981// KMP_HOT_TEAMS_MAX_LEVEL=0982int level = team->t.t_active_level - 1; // index in array of hot teams983if (master_th->th.th_teams_microtask) { // are we inside the teams?984if (master_th->th.th_teams_size.nteams > 1) {985++level; // level was not increased in teams construct for986// team_of_masters987}988if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&989master_th->th.th_teams_level == team->t.t_level) {990++level; // level was not increased in teams construct for991// team_of_workers before the parallel992} // team->t.t_level will be increased inside parallel993}994if (level < __kmp_hot_teams_max_level) {995if (hot_teams[level].hot_team) {996// hot team has already been allocated for given level997KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);998use_hot_team = 1; // the team is ready to use999} else {1000use_hot_team = 0; // AC: threads are not allocated yet1001hot_teams[level].hot_team = team; // remember new hot team1002hot_teams[level].hot_team_nth = team->t.t_nproc;1003}1004} else {1005use_hot_team = 0;1006}1007}1008#else1009use_hot_team = team == root->r.r_hot_team;1010#endif1011if (!use_hot_team) {10121013/* install the primary thread */1014team->t.t_threads[0] = master_th;1015__kmp_initialize_info(master_th, team, 0, master_gtid);10161017/* now, install the worker threads */1018for (i = 1; i < team->t.t_nproc; i++) {10191020/* fork or reallocate a new thread and install it in team */1021kmp_info_t *thr = __kmp_allocate_thread(root, team, i);1022team->t.t_threads[i] = thr;1023KMP_DEBUG_ASSERT(thr);1024KMP_DEBUG_ASSERT(thr->th.th_team == team);1025/* align team and thread arrived states */1026KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived "1027"T#%d(%d:%d) join =%llu, plain=%llu\n",1028__kmp_gtid_from_tid(0, team), team->t.t_id, 0,1029__kmp_gtid_from_tid(i, team), team->t.t_id, i,1030team->t.t_bar[bs_forkjoin_barrier].b_arrived,1031team->t.t_bar[bs_plain_barrier].b_arrived));1032thr->th.th_teams_microtask = master_th->th.th_teams_microtask;1033thr->th.th_teams_level = master_th->th.th_teams_level;1034thr->th.th_teams_size = master_th->th.th_teams_size;1035{ // Initialize threads' barrier data.1036int b;1037kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;1038for (b = 0; b < bs_last_barrier; ++b) {1039balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;1040KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);1041#if USE_DEBUGGER1042balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;1043#endif1044}1045}1046}10471048#if KMP_AFFINITY_SUPPORTED1049// Do not partition the places list for teams construct workers who1050// haven't actually been forked to do real work yet. This partitioning1051// will take place in the parallel region nested within the teams construct.1052if (!fork_teams_workers) {1053__kmp_partition_places(team);1054}1055#endif10561057if (team->t.t_nproc > 1 &&1058__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {1059team->t.b->update_num_threads(team->t.t_nproc);1060__kmp_add_threads_to_team(team, team->t.t_nproc);1061}1062}10631064// Take care of primary thread's task state1065if (__kmp_tasking_mode != tskm_immediate_exec) {1066if (use_hot_team) {1067KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team->t.t_parent, master_th);1068KA_TRACE(106920,1070("__kmp_fork_team_threads: Primary T#%d pushing task_team %p / team "1071"%p, new task_team %p / team %p\n",1072__kmp_gtid_from_thread(master_th), master_th->th.th_task_team,1073team->t.t_parent, team->t.t_task_team[master_th->th.th_task_state],1074team));10751076// Store primary thread's current task state on new team1077KMP_CHECK_UPDATE(team->t.t_primary_task_state,1078master_th->th.th_task_state);10791080// Restore primary thread's task state to hot team's state1081// by using thread 1's task state1082if (team->t.t_nproc > 1) {1083KMP_DEBUG_ASSERT(team->t.t_threads[1]->th.th_task_state == 0 ||1084team->t.t_threads[1]->th.th_task_state == 1);1085KMP_CHECK_UPDATE(master_th->th.th_task_state,1086team->t.t_threads[1]->th.th_task_state);1087} else {1088master_th->th.th_task_state = 0;1089}1090} else {1091// Store primary thread's current task_state on new team1092KMP_CHECK_UPDATE(team->t.t_primary_task_state,1093master_th->th.th_task_state);1094// Are not using hot team, so set task state to 0.1095master_th->th.th_task_state = 0;1096}1097}10981099if (__kmp_display_affinity && team->t.t_display_affinity != 1) {1100for (i = 0; i < team->t.t_nproc; i++) {1101kmp_info_t *thr = team->t.t_threads[i];1102if (thr->th.th_prev_num_threads != team->t.t_nproc ||1103thr->th.th_prev_level != team->t.t_level) {1104team->t.t_display_affinity = 1;1105break;1106}1107}1108}11091110KMP_MB();1111}11121113#if KMP_ARCH_X86 || KMP_ARCH_X86_641114// Propagate any changes to the floating point control registers out to the team1115// We try to avoid unnecessary writes to the relevant cache line in the team1116// structure, so we don't make changes unless they are needed.1117inline static void propagateFPControl(kmp_team_t *team) {1118if (__kmp_inherit_fp_control) {1119kmp_int16 x87_fpu_control_word;1120kmp_uint32 mxcsr;11211122// Get primary thread's values of FPU control flags (both X87 and vector)1123__kmp_store_x87_fpu_control_word(&x87_fpu_control_word);1124__kmp_store_mxcsr(&mxcsr);1125mxcsr &= KMP_X86_MXCSR_MASK;11261127// There is no point looking at t_fp_control_saved here.1128// If it is TRUE, we still have to update the values if they are different1129// from those we now have. If it is FALSE we didn't save anything yet, but1130// our objective is the same. We have to ensure that the values in the team1131// are the same as those we have.1132// So, this code achieves what we need whether or not t_fp_control_saved is1133// true. By checking whether the value needs updating we avoid unnecessary1134// writes that would put the cache-line into a written state, causing all1135// threads in the team to have to read it again.1136KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);1137KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);1138// Although we don't use this value, other code in the runtime wants to know1139// whether it should restore them. So we must ensure it is correct.1140KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);1141} else {1142// Similarly here. Don't write to this cache-line in the team structure1143// unless we have to.1144KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);1145}1146}11471148// Do the opposite, setting the hardware registers to the updated values from1149// the team.1150inline static void updateHWFPControl(kmp_team_t *team) {1151if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {1152// Only reset the fp control regs if they have been changed in the team.1153// the parallel region that we are exiting.1154kmp_int16 x87_fpu_control_word;1155kmp_uint32 mxcsr;1156__kmp_store_x87_fpu_control_word(&x87_fpu_control_word);1157__kmp_store_mxcsr(&mxcsr);1158mxcsr &= KMP_X86_MXCSR_MASK;11591160if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {1161__kmp_clear_x87_fpu_status_word();1162__kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);1163}11641165if (team->t.t_mxcsr != mxcsr) {1166__kmp_load_mxcsr(&team->t.t_mxcsr);1167}1168}1169}1170#else1171#define propagateFPControl(x) ((void)0)1172#define updateHWFPControl(x) ((void)0)1173#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */11741175static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team,1176int realloc); // forward declaration11771178/* Run a parallel region that has been serialized, so runs only in a team of the1179single primary thread. */1180void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {1181kmp_info_t *this_thr;1182kmp_team_t *serial_team;11831184KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid));11851186/* Skip all this code for autopar serialized loops since it results in1187unacceptable overhead */1188if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))1189return;11901191if (!TCR_4(__kmp_init_parallel))1192__kmp_parallel_initialize();1193__kmp_resume_if_soft_paused();11941195this_thr = __kmp_threads[global_tid];1196serial_team = this_thr->th.th_serial_team;11971198/* utilize the serialized team held by this thread */1199KMP_DEBUG_ASSERT(serial_team);1200KMP_MB();12011202kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;1203if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {1204proc_bind = proc_bind_false;1205} else if (proc_bind == proc_bind_default) {1206// No proc_bind clause was specified, so use the current value1207// of proc-bind-var for this parallel region.1208proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;1209}1210// Reset for next parallel region1211this_thr->th.th_set_proc_bind = proc_bind_default;12121213// Reset num_threads for next parallel region1214this_thr->th.th_set_nproc = 0;12151216#if OMPT_SUPPORT1217ompt_data_t ompt_parallel_data = ompt_data_none;1218void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);1219if (ompt_enabled.enabled &&1220this_thr->th.ompt_thread_info.state != ompt_state_overhead) {12211222ompt_task_info_t *parent_task_info;1223parent_task_info = OMPT_CUR_TASK_INFO(this_thr);12241225parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);1226if (ompt_enabled.ompt_callback_parallel_begin) {1227int team_size = 1;12281229ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(1230&(parent_task_info->task_data), &(parent_task_info->frame),1231&ompt_parallel_data, team_size,1232ompt_parallel_invoker_program | ompt_parallel_team, codeptr);1233}1234}1235#endif // OMPT_SUPPORT12361237if (this_thr->th.th_team != serial_team) {1238// Nested level will be an index in the nested nthreads array1239int level = this_thr->th.th_team->t.t_level;12401241if (serial_team->t.t_serialized) {1242/* this serial team was already used1243TODO increase performance by making this locks more specific */1244kmp_team_t *new_team;12451246__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);12471248new_team =1249__kmp_allocate_team(this_thr->th.th_root, 1, 1,1250#if OMPT_SUPPORT1251ompt_parallel_data,1252#endif1253proc_bind, &this_thr->th.th_current_task->td_icvs,12540 USE_NESTED_HOT_ARG(NULL));1255__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);1256KMP_ASSERT(new_team);12571258/* setup new serialized team and install it */1259new_team->t.t_threads[0] = this_thr;1260new_team->t.t_parent = this_thr->th.th_team;1261serial_team = new_team;1262this_thr->th.th_serial_team = serial_team;12631264KF_TRACE(126510,1266("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",1267global_tid, serial_team));12681269/* TODO the above breaks the requirement that if we run out of resources,1270then we can still guarantee that serialized teams are ok, since we may1271need to allocate a new one */1272} else {1273KF_TRACE(127410,1275("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",1276global_tid, serial_team));1277}12781279/* we have to initialize this serial team */1280KMP_DEBUG_ASSERT(serial_team->t.t_threads);1281KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);1282KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);1283serial_team->t.t_ident = loc;1284serial_team->t.t_serialized = 1;1285serial_team->t.t_nproc = 1;1286serial_team->t.t_parent = this_thr->th.th_team;1287if (this_thr->th.th_team->t.t_nested_nth)1288serial_team->t.t_nested_nth = this_thr->th.th_team->t.t_nested_nth;1289else1290serial_team->t.t_nested_nth = &__kmp_nested_nth;1291// Save previous team's task state on serial team structure1292serial_team->t.t_primary_task_state = this_thr->th.th_task_state;1293serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;1294this_thr->th.th_team = serial_team;1295serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;12961297KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,1298this_thr->th.th_current_task));1299KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);1300this_thr->th.th_current_task->td_flags.executing = 0;13011302__kmp_push_current_task_to_thread(this_thr, serial_team, 0);13031304/* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an1305implicit task for each serialized task represented by1306team->t.t_serialized? */1307copy_icvs(&this_thr->th.th_current_task->td_icvs,1308&this_thr->th.th_current_task->td_parent->td_icvs);13091310// Thread value exists in the nested nthreads array for the next nested1311// level1312kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;1313if (this_thr->th.th_team->t.t_nested_nth)1314nested_nth = this_thr->th.th_team->t.t_nested_nth;1315if (nested_nth->used && (level + 1 < nested_nth->used)) {1316this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];1317}13181319if (__kmp_nested_proc_bind.used &&1320(level + 1 < __kmp_nested_proc_bind.used)) {1321this_thr->th.th_current_task->td_icvs.proc_bind =1322__kmp_nested_proc_bind.bind_types[level + 1];1323}13241325#if USE_DEBUGGER1326serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger.1327#endif1328this_thr->th.th_info.ds.ds_tid = 0;13291330/* set thread cache values */1331this_thr->th.th_team_nproc = 1;1332this_thr->th.th_team_master = this_thr;1333this_thr->th.th_team_serialized = 1;1334this_thr->th.th_task_team = NULL;1335this_thr->th.th_task_state = 0;13361337serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;1338serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;1339serial_team->t.t_def_allocator = this_thr->th.th_def_allocator; // save13401341propagateFPControl(serial_team);13421343/* check if we need to allocate dispatch buffers stack */1344KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);1345if (!serial_team->t.t_dispatch->th_disp_buffer) {1346serial_team->t.t_dispatch->th_disp_buffer =1347(dispatch_private_info_t *)__kmp_allocate(1348sizeof(dispatch_private_info_t));1349}1350this_thr->th.th_dispatch = serial_team->t.t_dispatch;13511352KMP_MB();13531354} else {1355/* this serialized team is already being used,1356* that's fine, just add another nested level */1357KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);1358KMP_DEBUG_ASSERT(serial_team->t.t_threads);1359KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);1360++serial_team->t.t_serialized;1361this_thr->th.th_team_serialized = serial_team->t.t_serialized;13621363// Nested level will be an index in the nested nthreads array1364int level = this_thr->th.th_team->t.t_level;1365// Thread value exists in the nested nthreads array for the next nested1366// level13671368kmp_nested_nthreads_t *nested_nth = &__kmp_nested_nth;1369if (serial_team->t.t_nested_nth)1370nested_nth = serial_team->t.t_nested_nth;1371if (nested_nth->used && (level + 1 < nested_nth->used)) {1372this_thr->th.th_current_task->td_icvs.nproc = nested_nth->nth[level + 1];1373}13741375serial_team->t.t_level++;1376KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level "1377"of serial team %p to %d\n",1378global_tid, serial_team, serial_team->t.t_level));13791380/* allocate/push dispatch buffers stack */1381KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);1382{1383dispatch_private_info_t *disp_buffer =1384(dispatch_private_info_t *)__kmp_allocate(1385sizeof(dispatch_private_info_t));1386disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;1387serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;1388}1389this_thr->th.th_dispatch = serial_team->t.t_dispatch;13901391/* allocate/push task team stack */1392__kmp_push_task_team_node(this_thr, serial_team);13931394KMP_MB();1395}1396KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);13971398// Perform the display affinity functionality for1399// serialized parallel regions1400if (__kmp_display_affinity) {1401if (this_thr->th.th_prev_level != serial_team->t.t_level ||1402this_thr->th.th_prev_num_threads != 1) {1403// NULL means use the affinity-format-var ICV1404__kmp_aux_display_affinity(global_tid, NULL);1405this_thr->th.th_prev_level = serial_team->t.t_level;1406this_thr->th.th_prev_num_threads = 1;1407}1408}14091410if (__kmp_env_consistency_check)1411__kmp_push_parallel(global_tid, NULL);1412#if OMPT_SUPPORT1413serial_team->t.ompt_team_info.master_return_address = codeptr;1414if (ompt_enabled.enabled &&1415this_thr->th.ompt_thread_info.state != ompt_state_overhead) {1416OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =1417OMPT_GET_FRAME_ADDRESS(0);14181419ompt_lw_taskteam_t lw_taskteam;1420__ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,1421&ompt_parallel_data, codeptr);14221423__ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);1424// don't use lw_taskteam after linking. content was swaped14251426/* OMPT implicit task begin */1427if (ompt_enabled.ompt_callback_implicit_task) {1428ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1429ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),1430OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),1431ompt_task_implicit); // TODO: Can this be ompt_task_initial?1432OMPT_CUR_TASK_INFO(this_thr)->thread_num =1433__kmp_tid_from_gtid(global_tid);1434}14351436/* OMPT state */1437this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;1438OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =1439OMPT_GET_FRAME_ADDRESS(0);1440}1441#endif1442}14431444// Test if this fork is for a team closely nested in a teams construct1445static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,1446microtask_t microtask, int level,1447int teams_level, kmp_va_list ap) {1448return (master_th->th.th_teams_microtask && ap &&1449microtask != (microtask_t)__kmp_teams_master && level == teams_level);1450}14511452// Test if this fork is for the teams construct, i.e. to form the outer league1453// of teams1454static inline bool __kmp_is_entering_teams(int active_level, int level,1455int teams_level, kmp_va_list ap) {1456return ((ap == NULL && active_level == 0) ||1457(ap && teams_level > 0 && teams_level == level));1458}14591460// AC: This is start of parallel that is nested inside teams construct.1461// The team is actual (hot), all workers are ready at the fork barrier.1462// No lock needed to initialize the team a bit, then free workers.1463static inline int1464__kmp_fork_in_teams(ident_t *loc, int gtid, kmp_team_t *parent_team,1465kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,1466enum fork_context_e call_context, microtask_t microtask,1467launch_t invoker, int master_set_numthreads, int level,1468#if OMPT_SUPPORT1469ompt_data_t ompt_parallel_data, void *return_address,1470#endif1471kmp_va_list ap) {1472void **argv;1473int i;14741475parent_team->t.t_ident = loc;1476__kmp_alloc_argv_entries(argc, parent_team, TRUE);1477parent_team->t.t_argc = argc;1478argv = (void **)parent_team->t.t_argv;1479for (i = argc - 1; i >= 0; --i) {1480*argv++ = va_arg(kmp_va_deref(ap), void *);1481}1482// Increment our nested depth levels, but not increase the serialization1483if (parent_team == master_th->th.th_serial_team) {1484// AC: we are in serialized parallel1485__kmpc_serialized_parallel(loc, gtid);1486KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);14871488if (call_context == fork_context_gnu) {1489// AC: need to decrement t_serialized for enquiry functions to work1490// correctly, will restore at join time1491parent_team->t.t_serialized--;1492return TRUE;1493}14941495#if OMPD_SUPPORT1496parent_team->t.t_pkfn = microtask;1497#endif14981499#if OMPT_SUPPORT1500void *dummy;1501void **exit_frame_p;1502ompt_data_t *implicit_task_data;1503ompt_lw_taskteam_t lw_taskteam;15041505if (ompt_enabled.enabled) {1506__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,1507&ompt_parallel_data, return_address);1508exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);15091510__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);1511// Don't use lw_taskteam after linking. Content was swapped.15121513/* OMPT implicit task begin */1514implicit_task_data = OMPT_CUR_TASK_DATA(master_th);1515if (ompt_enabled.ompt_callback_implicit_task) {1516OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);1517ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1518ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,15191, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);1520}15211522/* OMPT state */1523master_th->th.ompt_thread_info.state = ompt_state_work_parallel;1524} else {1525exit_frame_p = &dummy;1526}1527#endif15281529// AC: need to decrement t_serialized for enquiry functions to work1530// correctly, will restore at join time1531parent_team->t.t_serialized--;15321533{1534KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);1535KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);1536__kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv1537#if OMPT_SUPPORT1538,1539exit_frame_p1540#endif1541);1542}15431544#if OMPT_SUPPORT1545if (ompt_enabled.enabled) {1546*exit_frame_p = NULL;1547OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;1548if (ompt_enabled.ompt_callback_implicit_task) {1549ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1550ompt_scope_end, NULL, implicit_task_data, 1,1551OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);1552}1553ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);1554__ompt_lw_taskteam_unlink(master_th);1555if (ompt_enabled.ompt_callback_parallel_end) {1556ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(1557&ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),1558OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);1559}1560master_th->th.ompt_thread_info.state = ompt_state_overhead;1561}1562#endif1563return TRUE;1564}15651566parent_team->t.t_pkfn = microtask;1567parent_team->t.t_invoke = invoker;1568KMP_ATOMIC_INC(&root->r.r_in_parallel);1569parent_team->t.t_active_level++;1570parent_team->t.t_level++;1571parent_team->t.t_def_allocator = master_th->th.th_def_allocator; // save15721573// If the threads allocated to the team are less than the thread limit, update1574// the thread limit here. th_teams_size.nth is specific to this team nested1575// in a teams construct, the team is fully created, and we're about to do1576// the actual fork. Best to do this here so that the subsequent uses below1577// and in the join have the correct value.1578master_th->th.th_teams_size.nth = parent_team->t.t_nproc;15791580#if OMPT_SUPPORT1581if (ompt_enabled.enabled) {1582ompt_lw_taskteam_t lw_taskteam;1583__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,1584return_address);1585__ompt_lw_taskteam_link(&lw_taskteam, master_th, 1, true);1586}1587#endif15881589/* Change number of threads in the team if requested */1590if (master_set_numthreads) { // The parallel has num_threads clause1591if (master_set_numthreads <= master_th->th.th_teams_size.nth) {1592// AC: only can reduce number of threads dynamically, can't increase1593kmp_info_t **other_threads = parent_team->t.t_threads;1594// NOTE: if using distributed barrier, we need to run this code block1595// even when the team size appears not to have changed from the max.1596int old_proc = master_th->th.th_teams_size.nth;1597if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {1598__kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);1599__kmp_add_threads_to_team(parent_team, master_set_numthreads);1600}1601parent_team->t.t_nproc = master_set_numthreads;1602for (i = 0; i < master_set_numthreads; ++i) {1603other_threads[i]->th.th_team_nproc = master_set_numthreads;1604}1605}1606// Keep extra threads hot in the team for possible next parallels1607master_th->th.th_set_nproc = 0;1608}16091610#if USE_DEBUGGER1611if (__kmp_debugging) { // Let debugger override number of threads.1612int nth = __kmp_omp_num_threads(loc);1613if (nth > 0) { // 0 means debugger doesn't want to change num threads1614master_set_numthreads = nth;1615}1616}1617#endif16181619// Figure out the proc_bind policy for the nested parallel within teams1620kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;1621// proc_bind_default means don't update1622kmp_proc_bind_t proc_bind_icv = proc_bind_default;1623if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {1624proc_bind = proc_bind_false;1625} else {1626// No proc_bind clause specified; use current proc-bind-var1627if (proc_bind == proc_bind_default) {1628proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;1629}1630/* else: The proc_bind policy was specified explicitly on parallel clause.1631This overrides proc-bind-var for this parallel region, but does not1632change proc-bind-var. */1633// Figure the value of proc-bind-var for the child threads.1634if ((level + 1 < __kmp_nested_proc_bind.used) &&1635(__kmp_nested_proc_bind.bind_types[level + 1] !=1636master_th->th.th_current_task->td_icvs.proc_bind)) {1637proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];1638}1639}1640KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);1641// Need to change the bind-var ICV to correct value for each implicit task1642if (proc_bind_icv != proc_bind_default &&1643master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {1644kmp_info_t **other_threads = parent_team->t.t_threads;1645for (i = 0; i < master_th->th.th_team_nproc; ++i) {1646other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;1647}1648}1649// Reset for next parallel region1650master_th->th.th_set_proc_bind = proc_bind_default;16511652#if USE_ITT_BUILD && USE_ITT_NOTIFY1653if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||1654KMP_ITT_DEBUG) &&1655__kmp_forkjoin_frames_mode == 3 &&1656parent_team->t.t_active_level == 1 // only report frames at level 11657&& master_th->th.th_teams_size.nteams == 1) {1658kmp_uint64 tmp_time = __itt_get_timestamp();1659master_th->th.th_frame_time = tmp_time;1660parent_team->t.t_region_time = tmp_time;1661}1662if (__itt_stack_caller_create_ptr) {1663KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);1664// create new stack stitching id before entering fork barrier1665parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();1666}1667#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */1668#if KMP_AFFINITY_SUPPORTED1669__kmp_partition_places(parent_team);1670#endif16711672KF_TRACE(10, ("__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "1673"master_th=%p, gtid=%d\n",1674root, parent_team, master_th, gtid));1675__kmp_internal_fork(loc, gtid, parent_team);1676KF_TRACE(10, ("__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "1677"master_th=%p, gtid=%d\n",1678root, parent_team, master_th, gtid));16791680if (call_context == fork_context_gnu)1681return TRUE;16821683/* Invoke microtask for PRIMARY thread */1684KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,1685parent_team->t.t_id, parent_team->t.t_pkfn));16861687if (!parent_team->t.t_invoke(gtid)) {1688KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");1689}1690KA_TRACE(20, ("__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,1691parent_team->t.t_id, parent_team->t.t_pkfn));1692KMP_MB(); /* Flush all pending memory write invalidates. */16931694KA_TRACE(20, ("__kmp_fork_in_teams: parallel exit T#%d\n", gtid));16951696return TRUE;1697}16981699// Create a serialized parallel region1700static inline int1701__kmp_serial_fork_call(ident_t *loc, int gtid, enum fork_context_e call_context,1702kmp_int32 argc, microtask_t microtask, launch_t invoker,1703kmp_info_t *master_th, kmp_team_t *parent_team,1704#if OMPT_SUPPORT1705ompt_data_t *ompt_parallel_data, void **return_address,1706ompt_data_t **parent_task_data,1707#endif1708kmp_va_list ap) {1709kmp_team_t *team;1710int i;1711void **argv;17121713/* josh todo: hypothetical question: what do we do for OS X*? */1714#if KMP_OS_LINUX && \1715(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)1716SimpleVLA<void *> args(argc);1717#else1718void **args = (void **)KMP_ALLOCA(argc * sizeof(void *));1719#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \1720KMP_ARCH_AARCH64) */17211722KA_TRACE(172320, ("__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));17241725__kmpc_serialized_parallel(loc, gtid);17261727#if OMPD_SUPPORT1728master_th->th.th_serial_team->t.t_pkfn = microtask;1729#endif17301731if (call_context == fork_context_intel) {1732/* TODO this sucks, use the compiler itself to pass args! :) */1733master_th->th.th_serial_team->t.t_ident = loc;1734if (!ap) {1735// revert change made in __kmpc_serialized_parallel()1736master_th->th.th_serial_team->t.t_level--;1737// Get args from parent team for teams construct17381739#if OMPT_SUPPORT1740void *dummy;1741void **exit_frame_p;1742ompt_task_info_t *task_info;1743ompt_lw_taskteam_t lw_taskteam;17441745if (ompt_enabled.enabled) {1746__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,1747ompt_parallel_data, *return_address);17481749__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);1750// don't use lw_taskteam after linking. content was swaped1751task_info = OMPT_CUR_TASK_INFO(master_th);1752exit_frame_p = &(task_info->frame.exit_frame.ptr);1753if (ompt_enabled.ompt_callback_implicit_task) {1754OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);1755ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1756ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),1757&(task_info->task_data), 1,1758OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);1759}17601761/* OMPT state */1762master_th->th.ompt_thread_info.state = ompt_state_work_parallel;1763} else {1764exit_frame_p = &dummy;1765}1766#endif17671768{1769KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);1770KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);1771__kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv1772#if OMPT_SUPPORT1773,1774exit_frame_p1775#endif1776);1777}17781779#if OMPT_SUPPORT1780if (ompt_enabled.enabled) {1781*exit_frame_p = NULL;1782if (ompt_enabled.ompt_callback_implicit_task) {1783ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1784ompt_scope_end, NULL, &(task_info->task_data), 1,1785OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);1786}1787*ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);1788__ompt_lw_taskteam_unlink(master_th);1789if (ompt_enabled.ompt_callback_parallel_end) {1790ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(1791ompt_parallel_data, *parent_task_data,1792OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);1793}1794master_th->th.ompt_thread_info.state = ompt_state_overhead;1795}1796#endif1797} else if (microtask == (microtask_t)__kmp_teams_master) {1798KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);1799team = master_th->th.th_team;1800// team->t.t_pkfn = microtask;1801team->t.t_invoke = invoker;1802__kmp_alloc_argv_entries(argc, team, TRUE);1803team->t.t_argc = argc;1804argv = (void **)team->t.t_argv;1805for (i = argc - 1; i >= 0; --i)1806*argv++ = va_arg(kmp_va_deref(ap), void *);1807// AC: revert change made in __kmpc_serialized_parallel()1808// because initial code in teams should have level=01809team->t.t_level--;1810// AC: call special invoker for outer "parallel" of teams construct1811invoker(gtid);1812#if OMPT_SUPPORT1813if (ompt_enabled.enabled) {1814ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);1815if (ompt_enabled.ompt_callback_implicit_task) {1816ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1817ompt_scope_end, NULL, &(task_info->task_data), 0,1818OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);1819}1820if (ompt_enabled.ompt_callback_parallel_end) {1821ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(1822ompt_parallel_data, *parent_task_data,1823OMPT_INVOKER(call_context) | ompt_parallel_league,1824*return_address);1825}1826master_th->th.ompt_thread_info.state = ompt_state_overhead;1827}1828#endif1829} else {1830argv = args;1831for (i = argc - 1; i >= 0; --i)1832*argv++ = va_arg(kmp_va_deref(ap), void *);1833KMP_MB();18341835#if OMPT_SUPPORT1836void *dummy;1837void **exit_frame_p;1838ompt_task_info_t *task_info;1839ompt_lw_taskteam_t lw_taskteam;1840ompt_data_t *implicit_task_data;18411842if (ompt_enabled.enabled) {1843__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,1844ompt_parallel_data, *return_address);1845__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);1846// don't use lw_taskteam after linking. content was swaped1847task_info = OMPT_CUR_TASK_INFO(master_th);1848exit_frame_p = &(task_info->frame.exit_frame.ptr);18491850/* OMPT implicit task begin */1851implicit_task_data = OMPT_CUR_TASK_DATA(master_th);1852if (ompt_enabled.ompt_callback_implicit_task) {1853ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1854ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),1855implicit_task_data, 1, __kmp_tid_from_gtid(gtid),1856ompt_task_implicit);1857OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);1858}18591860/* OMPT state */1861master_th->th.ompt_thread_info.state = ompt_state_work_parallel;1862} else {1863exit_frame_p = &dummy;1864}1865#endif18661867{1868KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);1869KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);1870__kmp_invoke_microtask(microtask, gtid, 0, argc, args1871#if OMPT_SUPPORT1872,1873exit_frame_p1874#endif1875);1876}18771878#if OMPT_SUPPORT1879if (ompt_enabled.enabled) {1880*exit_frame_p = NULL;1881if (ompt_enabled.ompt_callback_implicit_task) {1882ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(1883ompt_scope_end, NULL, &(task_info->task_data), 1,1884OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);1885}18861887*ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);1888__ompt_lw_taskteam_unlink(master_th);1889if (ompt_enabled.ompt_callback_parallel_end) {1890ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(1891ompt_parallel_data, *parent_task_data,1892OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);1893}1894master_th->th.ompt_thread_info.state = ompt_state_overhead;1895}1896#endif1897}1898} else if (call_context == fork_context_gnu) {1899#if OMPT_SUPPORT1900if (ompt_enabled.enabled) {1901ompt_lw_taskteam_t lwt;1902__ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,1903*return_address);19041905lwt.ompt_task_info.frame.exit_frame = ompt_data_none;1906__ompt_lw_taskteam_link(&lwt, master_th, 1);1907}1908// don't use lw_taskteam after linking. content was swaped1909#endif19101911// we were called from GNU native code1912KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));1913return FALSE;1914} else {1915KMP_ASSERT2(call_context < fork_context_last,1916"__kmp_serial_fork_call: unknown fork_context parameter");1917}19181919KA_TRACE(20, ("__kmp_serial_fork_call: T#%d serial exit\n", gtid));1920KMP_MB();1921return FALSE;1922}19231924/* most of the work for a fork */1925/* return true if we really went parallel, false if serialized */1926int __kmp_fork_call(ident_t *loc, int gtid,1927enum fork_context_e call_context, // Intel, GNU, ...1928kmp_int32 argc, microtask_t microtask, launch_t invoker,1929kmp_va_list ap) {1930void **argv;1931int i;1932int master_tid;1933int master_this_cons;1934kmp_team_t *team;1935kmp_team_t *parent_team;1936kmp_info_t *master_th;1937kmp_root_t *root;1938int nthreads;1939int master_active;1940int master_set_numthreads;1941int task_thread_limit = 0;1942int level;1943int active_level;1944int teams_level;1945#if KMP_NESTED_HOT_TEAMS1946kmp_hot_team_ptr_t **p_hot_teams;1947#endif1948{ // KMP_TIME_BLOCK1949KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);1950KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);19511952KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid));1953if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {1954/* Some systems prefer the stack for the root thread(s) to start with */1955/* some gap from the parent stack to prevent false sharing. */1956void *dummy = KMP_ALLOCA(__kmp_stkpadding);1957/* These 2 lines below are so this does not get optimized out */1958if (__kmp_stkpadding > KMP_MAX_STKPADDING)1959__kmp_stkpadding += (short)((kmp_int64)dummy);1960}19611962/* initialize if needed */1963KMP_DEBUG_ASSERT(1964__kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown1965if (!TCR_4(__kmp_init_parallel))1966__kmp_parallel_initialize();1967__kmp_resume_if_soft_paused();19681969/* setup current data */1970// AC: potentially unsafe, not in sync with library shutdown,1971// __kmp_threads can be freed1972master_th = __kmp_threads[gtid];19731974parent_team = master_th->th.th_team;1975master_tid = master_th->th.th_info.ds.ds_tid;1976master_this_cons = master_th->th.th_local.this_construct;1977root = master_th->th.th_root;1978master_active = root->r.r_active;1979master_set_numthreads = master_th->th.th_set_nproc;1980task_thread_limit =1981master_th->th.th_current_task->td_icvs.task_thread_limit;19821983#if OMPT_SUPPORT1984ompt_data_t ompt_parallel_data = ompt_data_none;1985ompt_data_t *parent_task_data;1986ompt_frame_t *ompt_frame;1987void *return_address = NULL;19881989if (ompt_enabled.enabled) {1990__ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,1991NULL, NULL);1992return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);1993}1994#endif19951996// Assign affinity to root thread if it hasn't happened yet1997__kmp_assign_root_init_mask();19981999// Nested level will be an index in the nested nthreads array2000level = parent_team->t.t_level;2001// used to launch non-serial teams even if nested is not allowed2002active_level = parent_team->t.t_active_level;2003// needed to check nesting inside the teams2004teams_level = master_th->th.th_teams_level;2005#if KMP_NESTED_HOT_TEAMS2006p_hot_teams = &master_th->th.th_hot_teams;2007if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {2008*p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(2009sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);2010(*p_hot_teams)[0].hot_team = root->r.r_hot_team;2011// it is either actual or not needed (when active_level > 0)2012(*p_hot_teams)[0].hot_team_nth = 1;2013}2014#endif20152016#if OMPT_SUPPORT2017if (ompt_enabled.enabled) {2018if (ompt_enabled.ompt_callback_parallel_begin) {2019int team_size = master_set_numthreads2020? master_set_numthreads2021: get__nproc_2(parent_team, master_tid);2022int flags = OMPT_INVOKER(call_context) |2023((microtask == (microtask_t)__kmp_teams_master)2024? ompt_parallel_league2025: ompt_parallel_team);2026ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(2027parent_task_data, ompt_frame, &ompt_parallel_data, team_size, flags,2028return_address);2029}2030master_th->th.ompt_thread_info.state = ompt_state_overhead;2031}2032#endif20332034master_th->th.th_ident = loc;20352036// Parallel closely nested in teams construct:2037if (__kmp_is_fork_in_teams(master_th, microtask, level, teams_level, ap)) {2038return __kmp_fork_in_teams(loc, gtid, parent_team, argc, master_th, root,2039call_context, microtask, invoker,2040master_set_numthreads, level,2041#if OMPT_SUPPORT2042ompt_parallel_data, return_address,2043#endif2044ap);2045} // End parallel closely nested in teams construct20462047// Need this to happen before we determine the number of threads, not while2048// we are allocating the team2049//__kmp_push_current_task_to_thread(master_th, parent_team, 0);20502051KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(parent_team, master_th);20522053// Determine the number of threads2054int enter_teams =2055__kmp_is_entering_teams(active_level, level, teams_level, ap);2056if ((!enter_teams &&2057(parent_team->t.t_active_level >=2058master_th->th.th_current_task->td_icvs.max_active_levels)) ||2059(__kmp_library == library_serial)) {2060KC_TRACE(10, ("__kmp_fork_call: T#%d serializing team\n", gtid));2061nthreads = 1;2062} else {2063nthreads = master_set_numthreads2064? master_set_numthreads2065// TODO: get nproc directly from current task2066: get__nproc_2(parent_team, master_tid);2067// Use the thread_limit set for the current target task if exists, else go2068// with the deduced nthreads2069nthreads = task_thread_limit > 0 && task_thread_limit < nthreads2070? task_thread_limit2071: nthreads;2072// Check if we need to take forkjoin lock? (no need for serialized2073// parallel out of teams construct).2074if (nthreads > 1) {2075/* determine how many new threads we can use */2076__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);2077/* AC: If we execute teams from parallel region (on host), then teams2078should be created but each can only have 1 thread if nesting is2079disabled. If teams called from serial region, then teams and their2080threads should be created regardless of the nesting setting. */2081nthreads = __kmp_reserve_threads(root, parent_team, master_tid,2082nthreads, enter_teams);2083if (nthreads == 1) {2084// Free lock for single thread execution here; for multi-thread2085// execution it will be freed later after team of threads created2086// and initialized2087__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);2088}2089}2090}2091KMP_DEBUG_ASSERT(nthreads > 0);20922093// If we temporarily changed the set number of threads then restore it now2094master_th->th.th_set_nproc = 0;20952096if (nthreads == 1) {2097return __kmp_serial_fork_call(loc, gtid, call_context, argc, microtask,2098invoker, master_th, parent_team,2099#if OMPT_SUPPORT2100&ompt_parallel_data, &return_address,2101&parent_task_data,2102#endif2103ap);2104} // if (nthreads == 1)21052106// GEH: only modify the executing flag in the case when not serialized2107// serialized case is handled in kmpc_serialized_parallel2108KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, "2109"curtask=%p, curtask_max_aclevel=%d\n",2110parent_team->t.t_active_level, master_th,2111master_th->th.th_current_task,2112master_th->th.th_current_task->td_icvs.max_active_levels));2113// TODO: GEH - cannot do this assertion because root thread not set up as2114// executing2115// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 );2116master_th->th.th_current_task->td_flags.executing = 0;21172118if (!master_th->th.th_teams_microtask || level > teams_level) {2119/* Increment our nested depth level */2120KMP_ATOMIC_INC(&root->r.r_in_parallel);2121}21222123// See if we need to make a copy of the ICVs.2124int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;2125kmp_nested_nthreads_t *nested_nth = NULL;2126if (!master_th->th.th_set_nested_nth &&2127(level + 1 < parent_team->t.t_nested_nth->used) &&2128(parent_team->t.t_nested_nth->nth[level + 1] != nthreads_icv)) {2129nthreads_icv = parent_team->t.t_nested_nth->nth[level + 1];2130} else if (master_th->th.th_set_nested_nth) {2131nested_nth = __kmp_override_nested_nth(master_th, level);2132if ((level + 1 < nested_nth->used) &&2133(nested_nth->nth[level + 1] != nthreads_icv))2134nthreads_icv = nested_nth->nth[level + 1];2135else2136nthreads_icv = 0; // don't update2137} else {2138nthreads_icv = 0; // don't update2139}21402141// Figure out the proc_bind_policy for the new team.2142kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;2143// proc_bind_default means don't update2144kmp_proc_bind_t proc_bind_icv = proc_bind_default;2145if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {2146proc_bind = proc_bind_false;2147} else {2148// No proc_bind clause specified; use current proc-bind-var for this2149// parallel region2150if (proc_bind == proc_bind_default) {2151proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;2152}2153// Have teams construct take proc_bind value from KMP_TEAMS_PROC_BIND2154if (master_th->th.th_teams_microtask &&2155microtask == (microtask_t)__kmp_teams_master) {2156proc_bind = __kmp_teams_proc_bind;2157}2158/* else: The proc_bind policy was specified explicitly on parallel clause.2159This overrides proc-bind-var for this parallel region, but does not2160change proc-bind-var. */2161// Figure the value of proc-bind-var for the child threads.2162if ((level + 1 < __kmp_nested_proc_bind.used) &&2163(__kmp_nested_proc_bind.bind_types[level + 1] !=2164master_th->th.th_current_task->td_icvs.proc_bind)) {2165// Do not modify the proc bind icv for the two teams construct forks2166// They just let the proc bind icv pass through2167if (!master_th->th.th_teams_microtask ||2168!(microtask == (microtask_t)__kmp_teams_master || ap == NULL))2169proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];2170}2171}21722173// Reset for next parallel region2174master_th->th.th_set_proc_bind = proc_bind_default;21752176if ((nthreads_icv > 0) || (proc_bind_icv != proc_bind_default)) {2177kmp_internal_control_t new_icvs;2178copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);2179new_icvs.next = NULL;2180if (nthreads_icv > 0) {2181new_icvs.nproc = nthreads_icv;2182}2183if (proc_bind_icv != proc_bind_default) {2184new_icvs.proc_bind = proc_bind_icv;2185}21862187/* allocate a new parallel team */2188KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));2189team = __kmp_allocate_team(root, nthreads, nthreads,2190#if OMPT_SUPPORT2191ompt_parallel_data,2192#endif2193proc_bind, &new_icvs,2194argc USE_NESTED_HOT_ARG(master_th));2195if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)2196copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs, &new_icvs);2197} else {2198/* allocate a new parallel team */2199KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n"));2200team = __kmp_allocate_team(root, nthreads, nthreads,2201#if OMPT_SUPPORT2202ompt_parallel_data,2203#endif2204proc_bind,2205&master_th->th.th_current_task->td_icvs,2206argc USE_NESTED_HOT_ARG(master_th));2207if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar)2208copy_icvs((kmp_internal_control_t *)team->t.b->team_icvs,2209&master_th->th.th_current_task->td_icvs);2210}2211KF_TRACE(221210, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));22132214/* setup the new team */2215KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);2216KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);2217KMP_CHECK_UPDATE(team->t.t_ident, loc);2218KMP_CHECK_UPDATE(team->t.t_parent, parent_team);2219KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);2220#if OMPT_SUPPORT2221KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,2222return_address);2223#endif2224KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe2225// TODO: parent_team->t.t_level == INT_MAX ???2226if (!master_th->th.th_teams_microtask || level > teams_level) {2227int new_level = parent_team->t.t_level + 1;2228KMP_CHECK_UPDATE(team->t.t_level, new_level);2229new_level = parent_team->t.t_active_level + 1;2230KMP_CHECK_UPDATE(team->t.t_active_level, new_level);2231} else {2232// AC: Do not increase parallel level at start of the teams construct2233int new_level = parent_team->t.t_level;2234KMP_CHECK_UPDATE(team->t.t_level, new_level);2235new_level = parent_team->t.t_active_level;2236KMP_CHECK_UPDATE(team->t.t_active_level, new_level);2237}2238kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);2239// set primary thread's schedule as new run-time schedule2240KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);22412242KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);2243KMP_CHECK_UPDATE(team->t.t_def_allocator, master_th->th.th_def_allocator);22442245// Check if hot team has potentially outdated list, and if so, free it2246if (team->t.t_nested_nth &&2247team->t.t_nested_nth != parent_team->t.t_nested_nth) {2248KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);2249KMP_INTERNAL_FREE(team->t.t_nested_nth);2250team->t.t_nested_nth = NULL;2251}2252team->t.t_nested_nth = parent_team->t.t_nested_nth;2253if (master_th->th.th_set_nested_nth) {2254if (!nested_nth)2255nested_nth = __kmp_override_nested_nth(master_th, level);2256team->t.t_nested_nth = nested_nth;2257KMP_INTERNAL_FREE(master_th->th.th_set_nested_nth);2258master_th->th.th_set_nested_nth = NULL;2259master_th->th.th_set_nested_nth_sz = 0;2260master_th->th.th_nt_strict = false;2261}22622263// Update the floating point rounding in the team if required.2264propagateFPControl(team);2265#if OMPD_SUPPORT2266if (ompd_state & OMPD_ENABLE_BP)2267ompd_bp_parallel_begin();2268#endif22692270KA_TRACE(227120,2272("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",2273gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,2274team->t.t_nproc));2275KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||2276(team->t.t_master_tid == 0 &&2277(team->t.t_parent == root->r.r_root_team ||2278team->t.t_parent->t.t_serialized)));2279KMP_MB();22802281/* now, setup the arguments */2282argv = (void **)team->t.t_argv;2283if (ap) {2284for (i = argc - 1; i >= 0; --i) {2285void *new_argv = va_arg(kmp_va_deref(ap), void *);2286KMP_CHECK_UPDATE(*argv, new_argv);2287argv++;2288}2289} else {2290for (i = 0; i < argc; ++i) {2291// Get args from parent team for teams construct2292KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);2293}2294}22952296/* now actually fork the threads */2297KMP_CHECK_UPDATE(team->t.t_master_active, master_active);2298if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong2299root->r.r_active = TRUE;23002301__kmp_fork_team_threads(root, team, master_th, gtid, !ap);2302__kmp_setup_icv_copy(team, nthreads,2303&master_th->th.th_current_task->td_icvs, loc);23042305#if OMPT_SUPPORT2306master_th->th.ompt_thread_info.state = ompt_state_work_parallel;2307#endif23082309__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);23102311#if USE_ITT_BUILD2312if (team->t.t_active_level == 1 // only report frames at level 12313&& !master_th->th.th_teams_microtask) { // not in teams construct2314#if USE_ITT_NOTIFY2315if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&2316(__kmp_forkjoin_frames_mode == 3 ||2317__kmp_forkjoin_frames_mode == 1)) {2318kmp_uint64 tmp_time = 0;2319if (__itt_get_timestamp_ptr)2320tmp_time = __itt_get_timestamp();2321// Internal fork - report frame begin2322master_th->th.th_frame_time = tmp_time;2323if (__kmp_forkjoin_frames_mode == 3)2324team->t.t_region_time = tmp_time;2325} else2326// only one notification scheme (either "submit" or "forking/joined", not both)2327#endif /* USE_ITT_NOTIFY */2328if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&2329__kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {2330// Mark start of "parallel" region for Intel(R) VTune(TM) analyzer.2331__kmp_itt_region_forking(gtid, team->t.t_nproc, 0);2332}2333}2334#endif /* USE_ITT_BUILD */23352336/* now go on and do the work */2337KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);2338KMP_MB();2339KF_TRACE(10,2340("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",2341root, team, master_th, gtid));23422343#if USE_ITT_BUILD2344if (__itt_stack_caller_create_ptr) {2345// create new stack stitching id before entering fork barrier2346if (!enter_teams) {2347KMP_DEBUG_ASSERT(team->t.t_stack_id == NULL);2348team->t.t_stack_id = __kmp_itt_stack_caller_create();2349} else if (parent_team->t.t_serialized) {2350// keep stack stitching id in the serialized parent_team;2351// current team will be used for parallel inside the teams;2352// if parent_team is active, then it already keeps stack stitching id2353// for the league of teams2354KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);2355parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();2356}2357}2358#endif /* USE_ITT_BUILD */23592360// AC: skip __kmp_internal_fork at teams construct, let only primary2361// threads execute2362if (ap) {2363__kmp_internal_fork(loc, gtid, team);2364KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, "2365"master_th=%p, gtid=%d\n",2366root, team, master_th, gtid));2367}23682369if (call_context == fork_context_gnu) {2370KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));2371return TRUE;2372}23732374/* Invoke microtask for PRIMARY thread */2375KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,2376team->t.t_id, team->t.t_pkfn));2377} // END of timer KMP_fork_call block23782379#if KMP_STATS_ENABLED2380// If beginning a teams construct, then change thread state2381stats_state_e previous_state = KMP_GET_THREAD_STATE();2382if (!ap) {2383KMP_SET_THREAD_STATE(stats_state_e::TEAMS_REGION);2384}2385#endif23862387if (!team->t.t_invoke(gtid)) {2388KMP_ASSERT2(0, "cannot invoke microtask for PRIMARY thread");2389}23902391#if KMP_STATS_ENABLED2392// If was beginning of a teams construct, then reset thread state2393if (!ap) {2394KMP_SET_THREAD_STATE(previous_state);2395}2396#endif23972398KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,2399team->t.t_id, team->t.t_pkfn));2400KMP_MB(); /* Flush all pending memory write invalidates. */24012402KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid));2403#if OMPT_SUPPORT2404if (ompt_enabled.enabled) {2405master_th->th.ompt_thread_info.state = ompt_state_overhead;2406}2407#endif24082409return TRUE;2410}24112412#if OMPT_SUPPORT2413static inline void __kmp_join_restore_state(kmp_info_t *thread,2414kmp_team_t *team) {2415// restore state outside the region2416thread->th.ompt_thread_info.state =2417((team->t.t_serialized) ? ompt_state_work_serial2418: ompt_state_work_parallel);2419}24202421static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,2422kmp_team_t *team, ompt_data_t *parallel_data,2423int flags, void *codeptr) {2424ompt_task_info_t *task_info = __ompt_get_task_info_object(0);2425if (ompt_enabled.ompt_callback_parallel_end) {2426ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(2427parallel_data, &(task_info->task_data), flags, codeptr);2428}24292430task_info->frame.enter_frame = ompt_data_none;2431__kmp_join_restore_state(thread, team);2432}2433#endif24342435void __kmp_join_call(ident_t *loc, int gtid2436#if OMPT_SUPPORT2437,2438enum fork_context_e fork_context2439#endif2440,2441int exit_teams) {2442KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);2443kmp_team_t *team;2444kmp_team_t *parent_team;2445kmp_info_t *master_th;2446kmp_root_t *root;2447int master_active;24482449KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid));24502451/* setup current data */2452master_th = __kmp_threads[gtid];2453root = master_th->th.th_root;2454team = master_th->th.th_team;2455parent_team = team->t.t_parent;24562457master_th->th.th_ident = loc;24582459#if OMPT_SUPPORT2460void *team_microtask = (void *)team->t.t_pkfn;2461// For GOMP interface with serialized parallel, need the2462// __kmpc_end_serialized_parallel to call hooks for OMPT end-implicit-task2463// and end-parallel events.2464if (ompt_enabled.enabled &&2465!(team->t.t_serialized && fork_context == fork_context_gnu)) {2466master_th->th.ompt_thread_info.state = ompt_state_overhead;2467}2468#endif24692470#if KMP_DEBUG2471if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {2472KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, "2473"th_task_team = %p\n",2474__kmp_gtid_from_thread(master_th), team,2475team->t.t_task_team[master_th->th.th_task_state],2476master_th->th.th_task_team));2477KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, master_th);2478}2479#endif24802481if (team->t.t_serialized) {2482if (master_th->th.th_teams_microtask) {2483// We are in teams construct2484int level = team->t.t_level;2485int tlevel = master_th->th.th_teams_level;2486if (level == tlevel) {2487// AC: we haven't incremented it earlier at start of teams construct,2488// so do it here - at the end of teams construct2489team->t.t_level++;2490} else if (level == tlevel + 1) {2491// AC: we are exiting parallel inside teams, need to increment2492// serialization in order to restore it in the next call to2493// __kmpc_end_serialized_parallel2494team->t.t_serialized++;2495}2496}2497__kmpc_end_serialized_parallel(loc, gtid);24982499#if OMPT_SUPPORT2500if (ompt_enabled.enabled) {2501if (fork_context == fork_context_gnu) {2502__ompt_lw_taskteam_unlink(master_th);2503}2504__kmp_join_restore_state(master_th, parent_team);2505}2506#endif25072508return;2509}25102511master_active = team->t.t_master_active;25122513if (!exit_teams) {2514// AC: No barrier for internal teams at exit from teams construct.2515// But there is barrier for external team (league).2516__kmp_internal_join(loc, gtid, team);2517#if USE_ITT_BUILD2518if (__itt_stack_caller_create_ptr) {2519KMP_DEBUG_ASSERT(team->t.t_stack_id != NULL);2520// destroy the stack stitching id after join barrier2521__kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);2522team->t.t_stack_id = NULL;2523}2524#endif2525} else {2526master_th->th.th_task_state =25270; // AC: no tasking in teams (out of any parallel)2528#if USE_ITT_BUILD2529if (__itt_stack_caller_create_ptr && parent_team->t.t_serialized) {2530KMP_DEBUG_ASSERT(parent_team->t.t_stack_id != NULL);2531// destroy the stack stitching id on exit from the teams construct2532// if parent_team is active, then the id will be destroyed later on2533// by master of the league of teams2534__kmp_itt_stack_caller_destroy((__itt_caller)parent_team->t.t_stack_id);2535parent_team->t.t_stack_id = NULL;2536}2537#endif2538}25392540KMP_MB();25412542#if OMPT_SUPPORT2543ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);2544void *codeptr = team->t.ompt_team_info.master_return_address;2545#endif25462547#if USE_ITT_BUILD2548// Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.2549if (team->t.t_active_level == 1 &&2550(!master_th->th.th_teams_microtask || /* not in teams construct */2551master_th->th.th_teams_size.nteams == 1)) {2552master_th->th.th_ident = loc;2553// only one notification scheme (either "submit" or "forking/joined", not2554// both)2555if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&2556__kmp_forkjoin_frames_mode == 3)2557__kmp_itt_frame_submit(gtid, team->t.t_region_time,2558master_th->th.th_frame_time, 0, loc,2559master_th->th.th_team_nproc, 1);2560else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&2561!__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)2562__kmp_itt_region_joined(gtid);2563} // active_level == 12564#endif /* USE_ITT_BUILD */25652566#if KMP_AFFINITY_SUPPORTED2567if (!exit_teams) {2568// Restore master thread's partition.2569master_th->th.th_first_place = team->t.t_first_place;2570master_th->th.th_last_place = team->t.t_last_place;2571}2572#endif // KMP_AFFINITY_SUPPORTED25732574if (master_th->th.th_teams_microtask && !exit_teams &&2575team->t.t_pkfn != (microtask_t)__kmp_teams_master &&2576team->t.t_level == master_th->th.th_teams_level + 1) {2577// AC: We need to leave the team structure intact at the end of parallel2578// inside the teams construct, so that at the next parallel same (hot) team2579// works, only adjust nesting levels2580#if OMPT_SUPPORT2581ompt_data_t ompt_parallel_data = ompt_data_none;2582if (ompt_enabled.enabled) {2583ompt_task_info_t *task_info = __ompt_get_task_info_object(0);2584if (ompt_enabled.ompt_callback_implicit_task) {2585int ompt_team_size = team->t.t_nproc;2586ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(2587ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,2588OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);2589}2590task_info->frame.exit_frame = ompt_data_none;2591task_info->task_data = ompt_data_none;2592ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);2593__ompt_lw_taskteam_unlink(master_th);2594}2595#endif2596/* Decrement our nested depth level */2597team->t.t_level--;2598team->t.t_active_level--;2599KMP_ATOMIC_DEC(&root->r.r_in_parallel);26002601// Restore number of threads in the team if needed. This code relies on2602// the proper adjustment of th_teams_size.nth after the fork in2603// __kmp_teams_master on each teams primary thread in the case that2604// __kmp_reserve_threads reduced it.2605if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {2606int old_num = master_th->th.th_team_nproc;2607int new_num = master_th->th.th_teams_size.nth;2608kmp_info_t **other_threads = team->t.t_threads;2609team->t.t_nproc = new_num;2610for (int i = 0; i < old_num; ++i) {2611other_threads[i]->th.th_team_nproc = new_num;2612}2613// Adjust states of non-used threads of the team2614for (int i = old_num; i < new_num; ++i) {2615// Re-initialize thread's barrier data.2616KMP_DEBUG_ASSERT(other_threads[i]);2617kmp_balign_t *balign = other_threads[i]->th.th_bar;2618for (int b = 0; b < bs_last_barrier; ++b) {2619balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;2620KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);2621#if USE_DEBUGGER2622balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;2623#endif2624}2625if (__kmp_tasking_mode != tskm_immediate_exec) {2626// Synchronize thread's task state2627other_threads[i]->th.th_task_state = master_th->th.th_task_state;2628}2629}2630}26312632#if OMPT_SUPPORT2633if (ompt_enabled.enabled) {2634__kmp_join_ompt(gtid, master_th, parent_team, &ompt_parallel_data,2635OMPT_INVOKER(fork_context) | ompt_parallel_team, codeptr);2636}2637#endif26382639return;2640}26412642/* do cleanup and restore the parent team */2643master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;2644master_th->th.th_local.this_construct = team->t.t_master_this_cons;26452646master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];26472648/* jc: The following lock has instructions with REL and ACQ semantics,2649separating the parallel user code called in this parallel region2650from the serial user code called after this function returns. */2651__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);26522653if (!master_th->th.th_teams_microtask ||2654team->t.t_level > master_th->th.th_teams_level) {2655/* Decrement our nested depth level */2656KMP_ATOMIC_DEC(&root->r.r_in_parallel);2657}2658KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);26592660#if OMPT_SUPPORT2661if (ompt_enabled.enabled) {2662ompt_task_info_t *task_info = __ompt_get_task_info_object(0);2663if (ompt_enabled.ompt_callback_implicit_task) {2664int flags = (team_microtask == (void *)__kmp_teams_master)2665? ompt_task_initial2666: ompt_task_implicit;2667int ompt_team_size = (flags == ompt_task_initial) ? 0 : team->t.t_nproc;2668ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(2669ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,2670OMPT_CUR_TASK_INFO(master_th)->thread_num, flags);2671}2672task_info->frame.exit_frame = ompt_data_none;2673task_info->task_data = ompt_data_none;2674}2675#endif26762677KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,2678master_th, team));2679__kmp_pop_current_task_from_thread(master_th);26802681master_th->th.th_def_allocator = team->t.t_def_allocator;26822683#if OMPD_SUPPORT2684if (ompd_state & OMPD_ENABLE_BP)2685ompd_bp_parallel_end();2686#endif2687updateHWFPControl(team);26882689if (root->r.r_active != master_active)2690root->r.r_active = master_active;26912692__kmp_free_team(root, team USE_NESTED_HOT_ARG(2693master_th)); // this will free worker threads26942695/* this race was fun to find. make sure the following is in the critical2696region otherwise assertions may fail occasionally since the old team may be2697reallocated and the hierarchy appears inconsistent. it is actually safe to2698run and won't cause any bugs, but will cause those assertion failures. it's2699only one deref&assign so might as well put this in the critical region */2700master_th->th.th_team = parent_team;2701master_th->th.th_team_nproc = parent_team->t.t_nproc;2702master_th->th.th_team_master = parent_team->t.t_threads[0];2703master_th->th.th_team_serialized = parent_team->t.t_serialized;27042705/* restore serialized team, if need be */2706if (parent_team->t.t_serialized &&2707parent_team != master_th->th.th_serial_team &&2708parent_team != root->r.r_root_team) {2709__kmp_free_team(root,2710master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));2711master_th->th.th_serial_team = parent_team;2712}27132714if (__kmp_tasking_mode != tskm_immediate_exec) {2715// Restore primary thread's task state from team structure2716KMP_DEBUG_ASSERT(team->t.t_primary_task_state == 0 ||2717team->t.t_primary_task_state == 1);2718master_th->th.th_task_state = (kmp_uint8)team->t.t_primary_task_state;27192720// Copy the task team from the parent team to the primary thread2721master_th->th.th_task_team =2722parent_team->t.t_task_team[master_th->th.th_task_state];2723KA_TRACE(20,2724("__kmp_join_call: Primary T#%d restoring task_team %p, team %p\n",2725__kmp_gtid_from_thread(master_th), master_th->th.th_task_team,2726parent_team));2727}27282729// TODO: GEH - cannot do this assertion because root thread not set up as2730// executing2731// KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 );2732master_th->th.th_current_task->td_flags.executing = 1;27332734__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);27352736#if KMP_AFFINITY_SUPPORTED2737if (master_th->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {2738__kmp_reset_root_init_mask(gtid);2739}2740#endif2741#if OMPT_SUPPORT2742int flags =2743OMPT_INVOKER(fork_context) |2744((team_microtask == (void *)__kmp_teams_master) ? ompt_parallel_league2745: ompt_parallel_team);2746if (ompt_enabled.enabled) {2747__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, flags,2748codeptr);2749}2750#endif27512752KMP_MB();2753KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid));2754}27552756/* Check whether we should push an internal control record onto the2757serial team stack. If so, do it. */2758void __kmp_save_internal_controls(kmp_info_t *thread) {27592760if (thread->th.th_team != thread->th.th_serial_team) {2761return;2762}2763if (thread->th.th_team->t.t_serialized > 1) {2764int push = 0;27652766if (thread->th.th_team->t.t_control_stack_top == NULL) {2767push = 1;2768} else {2769if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=2770thread->th.th_team->t.t_serialized) {2771push = 1;2772}2773}2774if (push) { /* push a record on the serial team's stack */2775kmp_internal_control_t *control =2776(kmp_internal_control_t *)__kmp_allocate(2777sizeof(kmp_internal_control_t));27782779copy_icvs(control, &thread->th.th_current_task->td_icvs);27802781control->serial_nesting_level = thread->th.th_team->t.t_serialized;27822783control->next = thread->th.th_team->t.t_control_stack_top;2784thread->th.th_team->t.t_control_stack_top = control;2785}2786}2787}27882789/* Changes set_nproc */2790void __kmp_set_num_threads(int new_nth, int gtid) {2791kmp_info_t *thread;2792kmp_root_t *root;27932794KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));2795KMP_DEBUG_ASSERT(__kmp_init_serial);27962797if (new_nth < 1)2798new_nth = 1;2799else if (new_nth > __kmp_max_nth)2800new_nth = __kmp_max_nth;28012802KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);2803thread = __kmp_threads[gtid];2804if (thread->th.th_current_task->td_icvs.nproc == new_nth)2805return; // nothing to do28062807__kmp_save_internal_controls(thread);28082809set__nproc(thread, new_nth);28102811// If this omp_set_num_threads() call will cause the hot team size to be2812// reduced (in the absence of a num_threads clause), then reduce it now,2813// rather than waiting for the next parallel region.2814root = thread->th.th_root;2815if (__kmp_init_parallel && (!root->r.r_active) &&2816(root->r.r_hot_team->t.t_nproc > new_nth)2817#if KMP_NESTED_HOT_TEAMS2818&& __kmp_hot_teams_max_level && !__kmp_hot_teams_mode2819#endif2820) {2821kmp_team_t *hot_team = root->r.r_hot_team;2822int f;28232824__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);28252826if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {2827__kmp_resize_dist_barrier(hot_team, hot_team->t.t_nproc, new_nth);2828}2829// Release the extra threads we don't need any more.2830for (f = new_nth; f < hot_team->t.t_nproc; f++) {2831KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);2832if (__kmp_tasking_mode != tskm_immediate_exec) {2833// When decreasing team size, threads no longer in the team should unref2834// task team.2835hot_team->t.t_threads[f]->th.th_task_team = NULL;2836}2837__kmp_free_thread(hot_team->t.t_threads[f]);2838hot_team->t.t_threads[f] = NULL;2839}2840hot_team->t.t_nproc = new_nth;2841#if KMP_NESTED_HOT_TEAMS2842if (thread->th.th_hot_teams) {2843KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);2844thread->th.th_hot_teams[0].hot_team_nth = new_nth;2845}2846#endif28472848if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {2849hot_team->t.b->update_num_threads(new_nth);2850__kmp_add_threads_to_team(hot_team, new_nth);2851}28522853__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);28542855// Update the t_nproc field in the threads that are still active.2856for (f = 0; f < new_nth; f++) {2857KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);2858hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;2859}2860// Special flag in case omp_set_num_threads() call2861hot_team->t.t_size_changed = -1;2862}2863}28642865/* Changes max_active_levels */2866void __kmp_set_max_active_levels(int gtid, int max_active_levels) {2867kmp_info_t *thread;28682869KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread "2870"%d = (%d)\n",2871gtid, max_active_levels));2872KMP_DEBUG_ASSERT(__kmp_init_serial);28732874// validate max_active_levels2875if (max_active_levels < 0) {2876KMP_WARNING(ActiveLevelsNegative, max_active_levels);2877// We ignore this call if the user has specified a negative value.2878// The current setting won't be changed. The last valid setting will be2879// used. A warning will be issued (if warnings are allowed as controlled by2880// the KMP_WARNINGS env var).2881KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new "2882"max_active_levels for thread %d = (%d)\n",2883gtid, max_active_levels));2884return;2885}2886if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {2887// it's OK, the max_active_levels is within the valid range: [ 0;2888// KMP_MAX_ACTIVE_LEVELS_LIMIT ]2889// We allow a zero value. (implementation defined behavior)2890} else {2891KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,2892KMP_MAX_ACTIVE_LEVELS_LIMIT);2893max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;2894// Current upper limit is MAX_INT. (implementation defined behavior)2895// If the input exceeds the upper limit, we correct the input to be the2896// upper limit. (implementation defined behavior)2897// Actually, the flow should never get here until we use MAX_INT limit.2898}2899KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new "2900"max_active_levels for thread %d = (%d)\n",2901gtid, max_active_levels));29022903thread = __kmp_threads[gtid];29042905__kmp_save_internal_controls(thread);29062907set__max_active_levels(thread, max_active_levels);2908}29092910/* Gets max_active_levels */2911int __kmp_get_max_active_levels(int gtid) {2912kmp_info_t *thread;29132914KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid));2915KMP_DEBUG_ASSERT(__kmp_init_serial);29162917thread = __kmp_threads[gtid];2918KMP_DEBUG_ASSERT(thread->th.th_current_task);2919KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, "2920"curtask_maxaclevel=%d\n",2921gtid, thread->th.th_current_task,2922thread->th.th_current_task->td_icvs.max_active_levels));2923return thread->th.th_current_task->td_icvs.max_active_levels;2924}29252926// nteams-var per-device ICV2927void __kmp_set_num_teams(int num_teams) {2928if (num_teams > 0)2929__kmp_nteams = num_teams;2930}2931int __kmp_get_max_teams(void) { return __kmp_nteams; }2932// teams-thread-limit-var per-device ICV2933void __kmp_set_teams_thread_limit(int limit) {2934if (limit > 0)2935__kmp_teams_thread_limit = limit;2936}2937int __kmp_get_teams_thread_limit(void) { return __kmp_teams_thread_limit; }29382939KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));2940KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));29412942/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */2943void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {2944kmp_info_t *thread;2945kmp_sched_t orig_kind;2946// kmp_team_t *team;29472948KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",2949gtid, (int)kind, chunk));2950KMP_DEBUG_ASSERT(__kmp_init_serial);29512952// Check if the kind parameter is valid, correct if needed.2953// Valid parameters should fit in one of two intervals - standard or extended:2954// <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>2955// 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 1032956orig_kind = kind;2957kind = __kmp_sched_without_mods(kind);29582959if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||2960(kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {2961// TODO: Hint needs attention in case we change the default schedule.2962__kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),2963KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"),2964__kmp_msg_null);2965kind = kmp_sched_default;2966chunk = 0; // ignore chunk value in case of bad kind2967}29682969thread = __kmp_threads[gtid];29702971__kmp_save_internal_controls(thread);29722973if (kind < kmp_sched_upper_std) {2974if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {2975// differ static chunked vs. unchunked: chunk should be invalid to2976// indicate unchunked schedule (which is the default)2977thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static;2978} else {2979thread->th.th_current_task->td_icvs.sched.r_sched_type =2980__kmp_sch_map[kind - kmp_sched_lower - 1];2981}2982} else {2983// __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std -2984// kmp_sched_lower - 2 ];2985thread->th.th_current_task->td_icvs.sched.r_sched_type =2986__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -2987kmp_sched_lower - 2];2988}2989__kmp_sched_apply_mods_intkind(2990orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));2991if (kind == kmp_sched_auto || chunk < 1) {2992// ignore parameter chunk for schedule auto2993thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;2994} else {2995thread->th.th_current_task->td_icvs.sched.chunk = chunk;2996}2997}29982999/* Gets def_sched_var ICV values */3000void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {3001kmp_info_t *thread;3002enum sched_type th_type;30033004KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid));3005KMP_DEBUG_ASSERT(__kmp_init_serial);30063007thread = __kmp_threads[gtid];30083009th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;3010switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {3011case kmp_sch_static:3012case kmp_sch_static_greedy:3013case kmp_sch_static_balanced:3014*kind = kmp_sched_static;3015__kmp_sched_apply_mods_stdkind(kind, th_type);3016*chunk = 0; // chunk was not set, try to show this fact via zero value3017return;3018case kmp_sch_static_chunked:3019*kind = kmp_sched_static;3020break;3021case kmp_sch_dynamic_chunked:3022*kind = kmp_sched_dynamic;3023break;3024case kmp_sch_guided_chunked:3025case kmp_sch_guided_iterative_chunked:3026case kmp_sch_guided_analytical_chunked:3027*kind = kmp_sched_guided;3028break;3029case kmp_sch_auto:3030*kind = kmp_sched_auto;3031break;3032case kmp_sch_trapezoidal:3033*kind = kmp_sched_trapezoidal;3034break;3035#if KMP_STATIC_STEAL_ENABLED3036case kmp_sch_static_steal:3037*kind = kmp_sched_static_steal;3038break;3039#endif3040default:3041KMP_FATAL(UnknownSchedulingType, th_type);3042}30433044__kmp_sched_apply_mods_stdkind(kind, th_type);3045*chunk = thread->th.th_current_task->td_icvs.sched.chunk;3046}30473048int __kmp_get_ancestor_thread_num(int gtid, int level) {30493050int ii, dd;3051kmp_team_t *team;3052kmp_info_t *thr;30533054KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));3055KMP_DEBUG_ASSERT(__kmp_init_serial);30563057// validate level3058if (level == 0)3059return 0;3060if (level < 0)3061return -1;3062thr = __kmp_threads[gtid];3063team = thr->th.th_team;3064ii = team->t.t_level;3065if (level > ii)3066return -1;30673068if (thr->th.th_teams_microtask) {3069// AC: we are in teams region where multiple nested teams have same level3070int tlevel = thr->th.th_teams_level; // the level of the teams construct3071if (level <=3072tlevel) { // otherwise usual algorithm works (will not touch the teams)3073KMP_DEBUG_ASSERT(ii >= tlevel);3074// AC: As we need to pass by the teams league, we need to artificially3075// increase ii3076if (ii == tlevel) {3077ii += 2; // three teams have same level3078} else {3079ii++; // two teams have same level3080}3081}3082}30833084if (ii == level)3085return __kmp_tid_from_gtid(gtid);30863087dd = team->t.t_serialized;3088level++;3089while (ii > level) {3090for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {3091}3092if ((team->t.t_serialized) && (!dd)) {3093team = team->t.t_parent;3094continue;3095}3096if (ii > level) {3097team = team->t.t_parent;3098dd = team->t.t_serialized;3099ii--;3100}3101}31023103return (dd > 1) ? (0) : (team->t.t_master_tid);3104}31053106int __kmp_get_team_size(int gtid, int level) {31073108int ii, dd;3109kmp_team_t *team;3110kmp_info_t *thr;31113112KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level));3113KMP_DEBUG_ASSERT(__kmp_init_serial);31143115// validate level3116if (level == 0)3117return 1;3118if (level < 0)3119return -1;3120thr = __kmp_threads[gtid];3121team = thr->th.th_team;3122ii = team->t.t_level;3123if (level > ii)3124return -1;31253126if (thr->th.th_teams_microtask) {3127// AC: we are in teams region where multiple nested teams have same level3128int tlevel = thr->th.th_teams_level; // the level of the teams construct3129if (level <=3130tlevel) { // otherwise usual algorithm works (will not touch the teams)3131KMP_DEBUG_ASSERT(ii >= tlevel);3132// AC: As we need to pass by the teams league, we need to artificially3133// increase ii3134if (ii == tlevel) {3135ii += 2; // three teams have same level3136} else {3137ii++; // two teams have same level3138}3139}3140}31413142while (ii > level) {3143for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {3144}3145if (team->t.t_serialized && (!dd)) {3146team = team->t.t_parent;3147continue;3148}3149if (ii > level) {3150team = team->t.t_parent;3151ii--;3152}3153}31543155return team->t.t_nproc;3156}31573158kmp_r_sched_t __kmp_get_schedule_global() {3159// This routine created because pairs (__kmp_sched, __kmp_chunk) and3160// (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults3161// independently. So one can get the updated schedule here.31623163kmp_r_sched_t r_sched;31643165// create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static,3166// __kmp_guided. __kmp_sched should keep original value, so that user can set3167// KMP_SCHEDULE multiple times, and thus have different run-time schedules in3168// different roots (even in OMP 2.5)3169enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);3170enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);3171if (s == kmp_sch_static) {3172// replace STATIC with more detailed schedule (balanced or greedy)3173r_sched.r_sched_type = __kmp_static;3174} else if (s == kmp_sch_guided_chunked) {3175// replace GUIDED with more detailed schedule (iterative or analytical)3176r_sched.r_sched_type = __kmp_guided;3177} else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other3178r_sched.r_sched_type = __kmp_sched;3179}3180SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);31813182if (__kmp_chunk < KMP_DEFAULT_CHUNK) {3183// __kmp_chunk may be wrong here (if it was not ever set)3184r_sched.chunk = KMP_DEFAULT_CHUNK;3185} else {3186r_sched.chunk = __kmp_chunk;3187}31883189return r_sched;3190}31913192/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE)3193at least argc number of *t_argv entries for the requested team. */3194static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) {31953196KMP_DEBUG_ASSERT(team);3197if (!realloc || argc > team->t.t_max_argc) {31983199KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, "3200"current entries=%d\n",3201team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));3202/* if previously allocated heap space for args, free them */3203if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])3204__kmp_free((void *)team->t.t_argv);32053206if (argc <= KMP_INLINE_ARGV_ENTRIES) {3207/* use unused space in the cache line for arguments */3208team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;3209KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d "3210"argv entries\n",3211team->t.t_id, team->t.t_max_argc));3212team->t.t_argv = &team->t.t_inline_argv[0];3213if (__kmp_storage_map) {3214__kmp_print_storage_map_gtid(3215-1, &team->t.t_inline_argv[0],3216&team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],3217(sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv",3218team->t.t_id);3219}3220} else {3221/* allocate space for arguments in the heap */3222team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))3223? KMP_MIN_MALLOC_ARGV_ENTRIES3224: 2 * argc;3225KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d "3226"argv entries\n",3227team->t.t_id, team->t.t_max_argc));3228team->t.t_argv =3229(void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc);3230if (__kmp_storage_map) {3231__kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],3232&team->t.t_argv[team->t.t_max_argc],3233sizeof(void *) * team->t.t_max_argc,3234"team_%d.t_argv", team->t.t_id);3235}3236}3237}3238}32393240static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) {3241int i;3242int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;3243team->t.t_threads =3244(kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth);3245team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(3246sizeof(dispatch_shared_info_t) * num_disp_buff);3247team->t.t_dispatch =3248(kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth);3249team->t.t_implicit_task_taskdata =3250(kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth);3251team->t.t_max_nproc = max_nth;32523253/* setup dispatch buffers */3254for (i = 0; i < num_disp_buff; ++i) {3255team->t.t_disp_buffer[i].buffer_index = i;3256team->t.t_disp_buffer[i].doacross_buf_idx = i;3257}3258}32593260static void __kmp_free_team_arrays(kmp_team_t *team) {3261/* Note: this does not free the threads in t_threads (__kmp_free_threads) */3262int i;3263for (i = 0; i < team->t.t_max_nproc; ++i) {3264if (team->t.t_dispatch[i].th_disp_buffer != NULL) {3265__kmp_free(team->t.t_dispatch[i].th_disp_buffer);3266team->t.t_dispatch[i].th_disp_buffer = NULL;3267}3268}3269#if KMP_USE_HIER_SCHED3270__kmp_dispatch_free_hierarchies(team);3271#endif3272__kmp_free(team->t.t_threads);3273__kmp_free(team->t.t_disp_buffer);3274__kmp_free(team->t.t_dispatch);3275__kmp_free(team->t.t_implicit_task_taskdata);3276team->t.t_threads = NULL;3277team->t.t_disp_buffer = NULL;3278team->t.t_dispatch = NULL;3279team->t.t_implicit_task_taskdata = 0;3280}32813282static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) {3283kmp_info_t **oldThreads = team->t.t_threads;32843285__kmp_free(team->t.t_disp_buffer);3286__kmp_free(team->t.t_dispatch);3287__kmp_free(team->t.t_implicit_task_taskdata);3288__kmp_allocate_team_arrays(team, max_nth);32893290KMP_MEMCPY(team->t.t_threads, oldThreads,3291team->t.t_nproc * sizeof(kmp_info_t *));32923293__kmp_free(oldThreads);3294}32953296static kmp_internal_control_t __kmp_get_global_icvs(void) {32973298kmp_r_sched_t r_sched =3299__kmp_get_schedule_global(); // get current state of scheduling globals33003301KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);33023303kmp_internal_control_t g_icvs = {33040, // int serial_nesting_level; //corresponds to value of th_team_serialized3305(kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic3306// adjustment of threads (per thread)3307(kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for3308// whether blocktime is explicitly set3309__kmp_dflt_blocktime, // int blocktime; //internal control for blocktime3310#if KMP_USE_MONITOR3311__kmp_bt_intervals, // int bt_intervals; //internal control for blocktime3312// intervals3313#endif3314__kmp_dflt_team_nth, // int nproc; //internal control for # of threads for3315// next parallel region (per thread)3316// (use a max ub on value if __kmp_parallel_initialize not called yet)3317__kmp_cg_max_nth, // int thread_limit;3318__kmp_task_max_nth, // int task_thread_limit; // to set the thread_limit3319// on task. This is used in the case of target thread_limit3320__kmp_dflt_max_active_levels, // int max_active_levels; //internal control3321// for max_active_levels3322r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule3323// {sched,chunk} pair3324__kmp_nested_proc_bind.bind_types[0],3325__kmp_default_device,3326NULL // struct kmp_internal_control *next;3327};33283329return g_icvs;3330}33313332static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) {33333334kmp_internal_control_t gx_icvs;3335gx_icvs.serial_nesting_level =33360; // probably =team->t.t_serial like in save_inter_controls3337copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);3338gx_icvs.next = NULL;33393340return gx_icvs;3341}33423343static void __kmp_initialize_root(kmp_root_t *root) {3344int f;3345kmp_team_t *root_team;3346kmp_team_t *hot_team;3347int hot_team_max_nth;3348kmp_r_sched_t r_sched =3349__kmp_get_schedule_global(); // get current state of scheduling globals3350kmp_internal_control_t r_icvs = __kmp_get_global_icvs();3351KMP_DEBUG_ASSERT(root);3352KMP_ASSERT(!root->r.r_begin);33533354/* setup the root state structure */3355__kmp_init_lock(&root->r.r_begin_lock);3356root->r.r_begin = FALSE;3357root->r.r_active = FALSE;3358root->r.r_in_parallel = 0;3359root->r.r_blocktime = __kmp_dflt_blocktime;3360#if KMP_AFFINITY_SUPPORTED3361root->r.r_affinity_assigned = FALSE;3362#endif33633364/* setup the root team for this task */3365/* allocate the root team structure */3366KF_TRACE(10, ("__kmp_initialize_root: before root_team\n"));33673368root_team =3369__kmp_allocate_team(root,33701, // new_nproc33711, // max_nproc3372#if OMPT_SUPPORT3373ompt_data_none, // root parallel id3374#endif3375__kmp_nested_proc_bind.bind_types[0], &r_icvs,33760 // argc3377USE_NESTED_HOT_ARG(NULL) // primary thread is unknown3378);3379#if USE_DEBUGGER3380// Non-NULL value should be assigned to make the debugger display the root3381// team.3382TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));3383#endif33843385KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team));33863387root->r.r_root_team = root_team;3388root_team->t.t_control_stack_top = NULL;33893390/* initialize root team */3391root_team->t.t_threads[0] = NULL;3392root_team->t.t_nproc = 1;3393root_team->t.t_serialized = 1;3394// TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;3395root_team->t.t_sched.sched = r_sched.sched;3396root_team->t.t_nested_nth = &__kmp_nested_nth;3397KA_TRACE(339820,3399("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",3400root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));34013402/* setup the hot team for this task */3403/* allocate the hot team structure */3404KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n"));34053406hot_team =3407__kmp_allocate_team(root,34081, // new_nproc3409__kmp_dflt_team_nth_ub * 2, // max_nproc3410#if OMPT_SUPPORT3411ompt_data_none, // root parallel id3412#endif3413__kmp_nested_proc_bind.bind_types[0], &r_icvs,34140 // argc3415USE_NESTED_HOT_ARG(NULL) // primary thread is unknown3416);3417KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team));34183419root->r.r_hot_team = hot_team;3420root_team->t.t_control_stack_top = NULL;34213422/* first-time initialization */3423hot_team->t.t_parent = root_team;34243425/* initialize hot team */3426hot_team_max_nth = hot_team->t.t_max_nproc;3427for (f = 0; f < hot_team_max_nth; ++f) {3428hot_team->t.t_threads[f] = NULL;3429}3430hot_team->t.t_nproc = 1;3431// TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels;3432hot_team->t.t_sched.sched = r_sched.sched;3433hot_team->t.t_size_changed = 0;3434hot_team->t.t_nested_nth = &__kmp_nested_nth;3435}34363437#ifdef KMP_DEBUG34383439typedef struct kmp_team_list_item {3440kmp_team_p const *entry;3441struct kmp_team_list_item *next;3442} kmp_team_list_item_t;3443typedef kmp_team_list_item_t *kmp_team_list_t;34443445static void __kmp_print_structure_team_accum( // Add team to list of teams.3446kmp_team_list_t list, // List of teams.3447kmp_team_p const *team // Team to add.3448) {34493450// List must terminate with item where both entry and next are NULL.3451// Team is added to the list only once.3452// List is sorted in ascending order by team id.3453// Team id is *not* a key.34543455kmp_team_list_t l;34563457KMP_DEBUG_ASSERT(list != NULL);3458if (team == NULL) {3459return;3460}34613462__kmp_print_structure_team_accum(list, team->t.t_parent);3463__kmp_print_structure_team_accum(list, team->t.t_next_pool);34643465// Search list for the team.3466l = list;3467while (l->next != NULL && l->entry != team) {3468l = l->next;3469}3470if (l->next != NULL) {3471return; // Team has been added before, exit.3472}34733474// Team is not found. Search list again for insertion point.3475l = list;3476while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {3477l = l->next;3478}34793480// Insert team.3481{3482kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(3483sizeof(kmp_team_list_item_t));3484*item = *l;3485l->entry = team;3486l->next = item;3487}3488}34893490static void __kmp_print_structure_team(char const *title, kmp_team_p const *team34913492) {3493__kmp_printf("%s", title);3494if (team != NULL) {3495__kmp_printf("%2x %p\n", team->t.t_id, team);3496} else {3497__kmp_printf(" - (nil)\n");3498}3499}35003501static void __kmp_print_structure_thread(char const *title,3502kmp_info_p const *thread) {3503__kmp_printf("%s", title);3504if (thread != NULL) {3505__kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);3506} else {3507__kmp_printf(" - (nil)\n");3508}3509}35103511void __kmp_print_structure(void) {35123513kmp_team_list_t list;35143515// Initialize list of teams.3516list =3517(kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t));3518list->entry = NULL;3519list->next = NULL;35203521__kmp_printf("\n------------------------------\nGlobal Thread "3522"Table\n------------------------------\n");3523{3524int gtid;3525for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {3526__kmp_printf("%2d", gtid);3527if (__kmp_threads != NULL) {3528__kmp_printf(" %p", __kmp_threads[gtid]);3529}3530if (__kmp_root != NULL) {3531__kmp_printf(" %p", __kmp_root[gtid]);3532}3533__kmp_printf("\n");3534}3535}35363537// Print out __kmp_threads array.3538__kmp_printf("\n------------------------------\nThreads\n--------------------"3539"----------\n");3540if (__kmp_threads != NULL) {3541int gtid;3542for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {3543kmp_info_t const *thread = __kmp_threads[gtid];3544if (thread != NULL) {3545__kmp_printf("GTID %2d %p:\n", gtid, thread);3546__kmp_printf(" Our Root: %p\n", thread->th.th_root);3547__kmp_print_structure_team(" Our Team: ", thread->th.th_team);3548__kmp_print_structure_team(" Serial Team: ",3549thread->th.th_serial_team);3550__kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc);3551__kmp_print_structure_thread(" Primary: ",3552thread->th.th_team_master);3553__kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized);3554__kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc);3555__kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);3556__kmp_print_structure_thread(" Next in pool: ",3557thread->th.th_next_pool);3558__kmp_printf("\n");3559__kmp_print_structure_team_accum(list, thread->th.th_team);3560__kmp_print_structure_team_accum(list, thread->th.th_serial_team);3561}3562}3563} else {3564__kmp_printf("Threads array is not allocated.\n");3565}35663567// Print out __kmp_root array.3568__kmp_printf("\n------------------------------\nUbers\n----------------------"3569"--------\n");3570if (__kmp_root != NULL) {3571int gtid;3572for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {3573kmp_root_t const *root = __kmp_root[gtid];3574if (root != NULL) {3575__kmp_printf("GTID %2d %p:\n", gtid, root);3576__kmp_print_structure_team(" Root Team: ", root->r.r_root_team);3577__kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team);3578__kmp_print_structure_thread(" Uber Thread: ",3579root->r.r_uber_thread);3580__kmp_printf(" Active?: %2d\n", root->r.r_active);3581__kmp_printf(" In Parallel: %2d\n",3582KMP_ATOMIC_LD_RLX(&root->r.r_in_parallel));3583__kmp_printf("\n");3584__kmp_print_structure_team_accum(list, root->r.r_root_team);3585__kmp_print_structure_team_accum(list, root->r.r_hot_team);3586}3587}3588} else {3589__kmp_printf("Ubers array is not allocated.\n");3590}35913592__kmp_printf("\n------------------------------\nTeams\n----------------------"3593"--------\n");3594while (list->next != NULL) {3595kmp_team_p const *team = list->entry;3596int i;3597__kmp_printf("Team %2x %p:\n", team->t.t_id, team);3598__kmp_print_structure_team(" Parent Team: ", team->t.t_parent);3599__kmp_printf(" Primary TID: %2d\n", team->t.t_master_tid);3600__kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc);3601__kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized);3602__kmp_printf(" Number threads: %2d\n", team->t.t_nproc);3603for (i = 0; i < team->t.t_nproc; ++i) {3604__kmp_printf(" Thread %2d: ", i);3605__kmp_print_structure_thread("", team->t.t_threads[i]);3606}3607__kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool);3608__kmp_printf("\n");3609list = list->next;3610}36113612// Print out __kmp_thread_pool and __kmp_team_pool.3613__kmp_printf("\n------------------------------\nPools\n----------------------"3614"--------\n");3615__kmp_print_structure_thread("Thread pool: ",3616CCAST(kmp_info_t *, __kmp_thread_pool));3617__kmp_print_structure_team("Team pool: ",3618CCAST(kmp_team_t *, __kmp_team_pool));3619__kmp_printf("\n");36203621// Free team list.3622while (list != NULL) {3623kmp_team_list_item_t *item = list;3624list = list->next;3625KMP_INTERNAL_FREE(item);3626}3627}36283629#endif36303631//---------------------------------------------------------------------------3632// Stuff for per-thread fast random number generator3633// Table of primes3634static const unsigned __kmp_primes[] = {36350x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,36360xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,36370x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,36380x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,36390xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,36400x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,36410x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,36420x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,36430x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,36440xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,36450x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};36463647//---------------------------------------------------------------------------3648// __kmp_get_random: Get a random number using a linear congruential method.3649unsigned short __kmp_get_random(kmp_info_t *thread) {3650unsigned x = thread->th.th_x;3651unsigned short r = (unsigned short)(x >> 16);36523653thread->th.th_x = x * thread->th.th_a + 1;36543655KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n",3656thread->th.th_info.ds.ds_tid, r));36573658return r;3659}3660//--------------------------------------------------------3661// __kmp_init_random: Initialize a random number generator3662void __kmp_init_random(kmp_info_t *thread) {3663unsigned seed = thread->th.th_info.ds.ds_tid;36643665thread->th.th_a =3666__kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))];3667thread->th.th_x = (seed + 1) * thread->th.th_a + 1;3668KA_TRACE(30,3669("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));3670}36713672#if KMP_OS_WINDOWS3673/* reclaim array entries for root threads that are already dead, returns number3674* reclaimed */3675static int __kmp_reclaim_dead_roots(void) {3676int i, r = 0;36773678for (i = 0; i < __kmp_threads_capacity; ++i) {3679if (KMP_UBER_GTID(i) &&3680!__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&3681!__kmp_root[i]3682->r.r_active) { // AC: reclaim only roots died in non-active state3683r += __kmp_unregister_root_other_thread(i);3684}3685}3686return r;3687}3688#endif36893690/* This function attempts to create free entries in __kmp_threads and3691__kmp_root, and returns the number of free entries generated.36923693For Windows* OS static library, the first mechanism used is to reclaim array3694entries for root threads that are already dead.36953696On all platforms, expansion is attempted on the arrays __kmp_threads_ and3697__kmp_root, with appropriate update to __kmp_threads_capacity. Array3698capacity is increased by doubling with clipping to __kmp_tp_capacity, if3699threadprivate cache array has been created. Synchronization with3700__kmpc_threadprivate_cached is done using __kmp_tp_cached_lock.37013702After any dead root reclamation, if the clipping value allows array expansion3703to result in the generation of a total of nNeed free slots, the function does3704that expansion. If not, nothing is done beyond the possible initial root3705thread reclamation.37063707If any argument is negative, the behavior is undefined. */3708static int __kmp_expand_threads(int nNeed) {3709int added = 0;3710int minimumRequiredCapacity;3711int newCapacity;3712kmp_info_t **newThreads;3713kmp_root_t **newRoot;37143715// All calls to __kmp_expand_threads should be under __kmp_forkjoin_lock, so3716// resizing __kmp_threads does not need additional protection if foreign3717// threads are present37183719#if KMP_OS_WINDOWS && !KMP_DYNAMIC_LIB3720/* only for Windows static library */3721/* reclaim array entries for root threads that are already dead */3722added = __kmp_reclaim_dead_roots();37233724if (nNeed) {3725nNeed -= added;3726if (nNeed < 0)3727nNeed = 0;3728}3729#endif3730if (nNeed <= 0)3731return added;37323733// Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If3734// __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the3735// user via KMP_DEVICE_THREAD_LIMIT, then __kmp_threads_capacity may become3736// > __kmp_max_nth in one of two ways:3737//3738// 1) The initialization thread (gtid = 0) exits. __kmp_threads[0]3739// may not be reused by another thread, so we may need to increase3740// __kmp_threads_capacity to __kmp_max_nth + 1.3741//3742// 2) New foreign root(s) are encountered. We always register new foreign3743// roots. This may cause a smaller # of threads to be allocated at3744// subsequent parallel regions, but the worker threads hang around (and3745// eventually go to sleep) and need slots in the __kmp_threads[] array.3746//3747// Anyway, that is the reason for moving the check to see if3748// __kmp_max_nth was exceeded into __kmp_reserve_threads()3749// instead of having it performed here. -BB37503751KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);37523753/* compute expansion headroom to check if we can expand */3754if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {3755/* possible expansion too small -- give up */3756return added;3757}3758minimumRequiredCapacity = __kmp_threads_capacity + nNeed;37593760newCapacity = __kmp_threads_capacity;3761do {3762newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)3763: __kmp_sys_max_nth;3764} while (newCapacity < minimumRequiredCapacity);3765newThreads = (kmp_info_t **)__kmp_allocate(3766(sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);3767newRoot =3768(kmp_root_t **)((char *)newThreads + sizeof(kmp_info_t *) * newCapacity);3769KMP_MEMCPY(newThreads, __kmp_threads,3770__kmp_threads_capacity * sizeof(kmp_info_t *));3771KMP_MEMCPY(newRoot, __kmp_root,3772__kmp_threads_capacity * sizeof(kmp_root_t *));3773// Put old __kmp_threads array on a list. Any ongoing references to the old3774// list will be valid. This list is cleaned up at library shutdown.3775kmp_old_threads_list_t *node =3776(kmp_old_threads_list_t *)__kmp_allocate(sizeof(kmp_old_threads_list_t));3777node->threads = __kmp_threads;3778node->next = __kmp_old_threads_list;3779__kmp_old_threads_list = node;37803781*(kmp_info_t * *volatile *)&__kmp_threads = newThreads;3782*(kmp_root_t * *volatile *)&__kmp_root = newRoot;3783added += newCapacity - __kmp_threads_capacity;3784*(volatile int *)&__kmp_threads_capacity = newCapacity;37853786if (newCapacity > __kmp_tp_capacity) {3787__kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);3788if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {3789__kmp_threadprivate_resize_cache(newCapacity);3790} else { // increase __kmp_tp_capacity to correspond with kmp_threads size3791*(volatile int *)&__kmp_tp_capacity = newCapacity;3792}3793__kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);3794}37953796return added;3797}37983799/* Register the current thread as a root thread and obtain our gtid. We must3800have the __kmp_initz_lock held at this point. Argument TRUE only if are the3801thread that calls from __kmp_do_serial_initialize() */3802int __kmp_register_root(int initial_thread) {3803kmp_info_t *root_thread;3804kmp_root_t *root;3805int gtid;3806int capacity;3807__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);3808KA_TRACE(20, ("__kmp_register_root: entered\n"));3809KMP_MB();38103811/* 2007-03-02:3812If initial thread did not invoke OpenMP RTL yet, and this thread is not an3813initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not3814work as expected -- it may return false (that means there is at least one3815empty slot in __kmp_threads array), but it is possible the only free slot3816is #0, which is reserved for initial thread and so cannot be used for this3817one. Following code workarounds this bug.38183819However, right solution seems to be not reserving slot #0 for initial3820thread because:3821(1) there is no magic in slot #0,3822(2) we cannot detect initial thread reliably (the first thread which does3823serial initialization may be not a real initial thread).3824*/3825capacity = __kmp_threads_capacity;3826if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {3827--capacity;3828}38293830// If it is not for initializing the hidden helper team, we need to take3831// __kmp_hidden_helper_threads_num out of the capacity because it is included3832// in __kmp_threads_capacity.3833if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {3834capacity -= __kmp_hidden_helper_threads_num;3835}38363837/* see if there are too many threads */3838if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {3839if (__kmp_tp_cached) {3840__kmp_fatal(KMP_MSG(CantRegisterNewThread),3841KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),3842KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);3843} else {3844__kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),3845__kmp_msg_null);3846}3847}38483849// When hidden helper task is enabled, __kmp_threads is organized as follows:3850// 0: initial thread, also a regular OpenMP thread.3851// [1, __kmp_hidden_helper_threads_num]: slots for hidden helper threads.3852// [__kmp_hidden_helper_threads_num + 1, __kmp_threads_capacity): slots for3853// regular OpenMP threads.3854if (TCR_4(__kmp_init_hidden_helper_threads)) {3855// Find an available thread slot for hidden helper thread. Slots for hidden3856// helper threads start from 1 to __kmp_hidden_helper_threads_num.3857for (gtid = 1; TCR_PTR(__kmp_threads[gtid]) != NULL &&3858gtid <= __kmp_hidden_helper_threads_num;3859gtid++)3860;3861KMP_ASSERT(gtid <= __kmp_hidden_helper_threads_num);3862KA_TRACE(1, ("__kmp_register_root: found slot in threads array for "3863"hidden helper thread: T#%d\n",3864gtid));3865} else {3866/* find an available thread slot */3867// Don't reassign the zero slot since we need that to only be used by3868// initial thread. Slots for hidden helper threads should also be skipped.3869if (initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {3870gtid = 0;3871} else {3872for (gtid = __kmp_hidden_helper_threads_num + 1;3873TCR_PTR(__kmp_threads[gtid]) != NULL; gtid++)3874;3875}3876KA_TRACE(38771, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid));3878KMP_ASSERT(gtid < __kmp_threads_capacity);3879}38803881/* update global accounting */3882__kmp_all_nth++;3883TCW_4(__kmp_nth, __kmp_nth + 1);38843885// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low3886// numbers of procs, and method #2 (keyed API call) for higher numbers.3887if (__kmp_adjust_gtid_mode) {3888if (__kmp_all_nth >= __kmp_tls_gtid_min) {3889if (TCR_4(__kmp_gtid_mode) != 2) {3890TCW_4(__kmp_gtid_mode, 2);3891}3892} else {3893if (TCR_4(__kmp_gtid_mode) != 1) {3894TCW_4(__kmp_gtid_mode, 1);3895}3896}3897}38983899#ifdef KMP_ADJUST_BLOCKTIME3900/* Adjust blocktime to zero if necessary */3901/* Middle initialization might not have occurred yet */3902if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {3903if (__kmp_nth > __kmp_avail_proc) {3904__kmp_zero_bt = TRUE;3905}3906}3907#endif /* KMP_ADJUST_BLOCKTIME */39083909/* setup this new hierarchy */3910if (!(root = __kmp_root[gtid])) {3911root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t));3912KMP_DEBUG_ASSERT(!root->r.r_root_team);3913}39143915#if KMP_STATS_ENABLED3916// Initialize stats as soon as possible (right after gtid assignment).3917__kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);3918__kmp_stats_thread_ptr->startLife();3919KMP_SET_THREAD_STATE(SERIAL_REGION);3920KMP_INIT_PARTITIONED_TIMERS(OMP_serial);3921#endif3922__kmp_initialize_root(root);39233924/* setup new root thread structure */3925if (root->r.r_uber_thread) {3926root_thread = root->r.r_uber_thread;3927} else {3928root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));3929if (__kmp_storage_map) {3930__kmp_print_thread_storage_map(root_thread, gtid);3931}3932root_thread->th.th_info.ds.ds_gtid = gtid;3933#if OMPT_SUPPORT3934root_thread->th.ompt_thread_info.thread_data = ompt_data_none;3935#endif3936root_thread->th.th_root = root;3937if (__kmp_env_consistency_check) {3938root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);3939}3940#if USE_FAST_MEMORY3941__kmp_initialize_fast_memory(root_thread);3942#endif /* USE_FAST_MEMORY */39433944#if KMP_USE_BGET3945KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);3946__kmp_initialize_bget(root_thread);3947#endif3948__kmp_init_random(root_thread); // Initialize random number generator3949}39503951/* setup the serial team held in reserve by the root thread */3952if (!root_thread->th.th_serial_team) {3953kmp_internal_control_t r_icvs = __kmp_get_global_icvs();3954KF_TRACE(10, ("__kmp_register_root: before serial_team\n"));3955root_thread->th.th_serial_team = __kmp_allocate_team(3956root, 1, 1,3957#if OMPT_SUPPORT3958ompt_data_none, // root parallel id3959#endif3960proc_bind_default, &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));3961}3962KMP_ASSERT(root_thread->th.th_serial_team);3963KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n",3964root_thread->th.th_serial_team));39653966/* drop root_thread into place */3967TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);39683969root->r.r_root_team->t.t_threads[0] = root_thread;3970root->r.r_hot_team->t.t_threads[0] = root_thread;3971root_thread->th.th_serial_team->t.t_threads[0] = root_thread;3972// AC: the team created in reserve, not for execution (it is unused for now).3973root_thread->th.th_serial_team->t.t_serialized = 0;3974root->r.r_uber_thread = root_thread;39753976/* initialize the thread, get it ready to go */3977__kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);3978TCW_4(__kmp_init_gtid, TRUE);39793980/* prepare the primary thread for get_gtid() */3981__kmp_gtid_set_specific(gtid);39823983#if USE_ITT_BUILD3984__kmp_itt_thread_name(gtid);3985#endif /* USE_ITT_BUILD */39863987#ifdef KMP_TDATA_GTID3988__kmp_gtid = gtid;3989#endif3990__kmp_create_worker(gtid, root_thread, __kmp_stksize);3991KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);39923993KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, "3994"plain=%u\n",3995gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),3996root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,3997KMP_INIT_BARRIER_STATE));3998{ // Initialize barrier data.3999int b;4000for (b = 0; b < bs_last_barrier; ++b) {4001root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;4002#if USE_DEBUGGER4003root_thread->th.th_bar[b].bb.b_worker_arrived = 0;4004#endif4005}4006}4007KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==4008KMP_INIT_BARRIER_STATE);40094010#if KMP_AFFINITY_SUPPORTED4011root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;4012root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;4013root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;4014root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;4015#endif /* KMP_AFFINITY_SUPPORTED */4016root_thread->th.th_def_allocator = __kmp_def_allocator;4017root_thread->th.th_prev_level = 0;4018root_thread->th.th_prev_num_threads = 1;40194020kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));4021tmp->cg_root = root_thread;4022tmp->cg_thread_limit = __kmp_cg_max_nth;4023tmp->cg_nthreads = 1;4024KA_TRACE(100, ("__kmp_register_root: Thread %p created node %p with"4025" cg_nthreads init to 1\n",4026root_thread, tmp));4027tmp->up = NULL;4028root_thread->th.th_cg_roots = tmp;40294030__kmp_root_counter++;40314032#if OMPT_SUPPORT4033if (ompt_enabled.enabled) {40344035kmp_info_t *root_thread = ompt_get_thread();40364037ompt_set_thread_state(root_thread, ompt_state_overhead);40384039if (ompt_enabled.ompt_callback_thread_begin) {4040ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(4041ompt_thread_initial, __ompt_get_thread_data_internal());4042}4043ompt_data_t *task_data;4044ompt_data_t *parallel_data;4045__ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,4046NULL);4047if (ompt_enabled.ompt_callback_implicit_task) {4048ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(4049ompt_scope_begin, parallel_data, task_data, 1, 1, ompt_task_initial);4050}40514052ompt_set_thread_state(root_thread, ompt_state_work_serial);4053}4054#endif4055#if OMPD_SUPPORT4056if (ompd_state & OMPD_ENABLE_BP)4057ompd_bp_thread_begin();4058#endif40594060KMP_MB();4061__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);40624063return gtid;4064}40654066#if KMP_NESTED_HOT_TEAMS4067static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level,4068const int max_level) {4069int i, n, nth;4070kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;4071if (!hot_teams || !hot_teams[level].hot_team) {4072return 0;4073}4074KMP_DEBUG_ASSERT(level < max_level);4075kmp_team_t *team = hot_teams[level].hot_team;4076nth = hot_teams[level].hot_team_nth;4077n = nth - 1; // primary thread is not freed4078if (level < max_level - 1) {4079for (i = 0; i < nth; ++i) {4080kmp_info_t *th = team->t.t_threads[i];4081n += __kmp_free_hot_teams(root, th, level + 1, max_level);4082if (i > 0 && th->th.th_hot_teams) {4083__kmp_free(th->th.th_hot_teams);4084th->th.th_hot_teams = NULL;4085}4086}4087}4088__kmp_free_team(root, team, NULL);4089return n;4090}4091#endif40924093// Resets a root thread and clear its root and hot teams.4094// Returns the number of __kmp_threads entries directly and indirectly freed.4095static int __kmp_reset_root(int gtid, kmp_root_t *root) {4096kmp_team_t *root_team = root->r.r_root_team;4097kmp_team_t *hot_team = root->r.r_hot_team;4098int n = hot_team->t.t_nproc;4099int i;41004101KMP_DEBUG_ASSERT(!root->r.r_active);41024103root->r.r_root_team = NULL;4104root->r.r_hot_team = NULL;4105// __kmp_free_team() does not free hot teams, so we have to clear r_hot_team4106// before call to __kmp_free_team().4107__kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));4108#if KMP_NESTED_HOT_TEAMS4109if (__kmp_hot_teams_max_level >41100) { // need to free nested hot teams and their threads if any4111for (i = 0; i < hot_team->t.t_nproc; ++i) {4112kmp_info_t *th = hot_team->t.t_threads[i];4113if (__kmp_hot_teams_max_level > 1) {4114n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);4115}4116if (th->th.th_hot_teams) {4117__kmp_free(th->th.th_hot_teams);4118th->th.th_hot_teams = NULL;4119}4120}4121}4122#endif4123__kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));41244125// Before we can reap the thread, we need to make certain that all other4126// threads in the teams that had this root as ancestor have stopped trying to4127// steal tasks.4128if (__kmp_tasking_mode != tskm_immediate_exec) {4129__kmp_wait_to_unref_task_teams();4130}41314132#if KMP_OS_WINDOWS4133/* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */4134KA_TRACE(413510, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC4136"\n",4137(LPVOID) & (root->r.r_uber_thread->th),4138root->r.r_uber_thread->th.th_info.ds.ds_thread));4139__kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);4140#endif /* KMP_OS_WINDOWS */41414142#if OMPD_SUPPORT4143if (ompd_state & OMPD_ENABLE_BP)4144ompd_bp_thread_end();4145#endif41464147#if OMPT_SUPPORT4148ompt_data_t *task_data;4149ompt_data_t *parallel_data;4150__ompt_get_task_info_internal(0, NULL, &task_data, NULL, ¶llel_data,4151NULL);4152if (ompt_enabled.ompt_callback_implicit_task) {4153ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(4154ompt_scope_end, parallel_data, task_data, 0, 1, ompt_task_initial);4155}4156if (ompt_enabled.ompt_callback_thread_end) {4157ompt_callbacks.ompt_callback(ompt_callback_thread_end)(4158&(root->r.r_uber_thread->th.ompt_thread_info.thread_data));4159}4160#endif41614162TCW_4(__kmp_nth,4163__kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth.4164i = root->r.r_uber_thread->th.th_cg_roots->cg_nthreads--;4165KA_TRACE(100, ("__kmp_reset_root: Thread %p decrement cg_nthreads on node %p"4166" to %d\n",4167root->r.r_uber_thread, root->r.r_uber_thread->th.th_cg_roots,4168root->r.r_uber_thread->th.th_cg_roots->cg_nthreads));4169if (i == 1) {4170// need to free contention group structure4171KMP_DEBUG_ASSERT(root->r.r_uber_thread ==4172root->r.r_uber_thread->th.th_cg_roots->cg_root);4173KMP_DEBUG_ASSERT(root->r.r_uber_thread->th.th_cg_roots->up == NULL);4174__kmp_free(root->r.r_uber_thread->th.th_cg_roots);4175root->r.r_uber_thread->th.th_cg_roots = NULL;4176}4177__kmp_reap_thread(root->r.r_uber_thread, 1);41784179// We canot put root thread to __kmp_thread_pool, so we have to reap it4180// instead of freeing.4181root->r.r_uber_thread = NULL;4182/* mark root as no longer in use */4183root->r.r_begin = FALSE;41844185return n;4186}41874188void __kmp_unregister_root_current_thread(int gtid) {4189KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid));4190/* this lock should be ok, since unregister_root_current_thread is never4191called during an abort, only during a normal close. furthermore, if you4192have the forkjoin lock, you should never try to get the initz lock */4193__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);4194if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {4195KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, "4196"exiting T#%d\n",4197gtid));4198__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);4199return;4200}4201kmp_root_t *root = __kmp_root[gtid];42024203KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);4204KMP_ASSERT(KMP_UBER_GTID(gtid));4205KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);4206KMP_ASSERT(root->r.r_active == FALSE);42074208KMP_MB();42094210kmp_info_t *thread = __kmp_threads[gtid];4211kmp_team_t *team = thread->th.th_team;4212kmp_task_team_t *task_team = thread->th.th_task_team;42134214// we need to wait for the proxy tasks before finishing the thread4215if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||4216task_team->tt.tt_hidden_helper_task_encountered)) {4217#if OMPT_SUPPORT4218// the runtime is shutting down so we won't report any events4219thread->th.ompt_thread_info.state = ompt_state_undefined;4220#endif4221__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));4222}42234224__kmp_reset_root(gtid, root);42254226KMP_MB();4227KC_TRACE(10,4228("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));42294230__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);4231}42324233#if KMP_OS_WINDOWS4234/* __kmp_forkjoin_lock must be already held4235Unregisters a root thread that is not the current thread. Returns the number4236of __kmp_threads entries freed as a result. */4237static int __kmp_unregister_root_other_thread(int gtid) {4238kmp_root_t *root = __kmp_root[gtid];4239int r;42404241KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid));4242KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);4243KMP_ASSERT(KMP_UBER_GTID(gtid));4244KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);4245KMP_ASSERT(root->r.r_active == FALSE);42464247r = __kmp_reset_root(gtid, root);4248KC_TRACE(10,4249("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));4250return r;4251}4252#endif42534254#if KMP_DEBUG4255void __kmp_task_info() {42564257kmp_int32 gtid = __kmp_entry_gtid();4258kmp_int32 tid = __kmp_tid_from_gtid(gtid);4259kmp_info_t *this_thr = __kmp_threads[gtid];4260kmp_team_t *steam = this_thr->th.th_serial_team;4261kmp_team_t *team = this_thr->th.th_team;42624263__kmp_printf(4264"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p steam=%p curtask=%p "4265"ptask=%p\n",4266gtid, tid, this_thr, team, steam, this_thr->th.th_current_task,4267team->t.t_implicit_task_taskdata[tid].td_parent);4268}4269#endif // KMP_DEBUG42704271/* TODO optimize with one big memclr, take out what isn't needed, split4272responsibility to workers as much as possible, and delay initialization of4273features as much as possible */4274static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,4275int tid, int gtid) {4276/* this_thr->th.th_info.ds.ds_gtid is setup in4277kmp_allocate_thread/create_worker.4278this_thr->th.th_serial_team is setup in __kmp_allocate_thread */4279KMP_DEBUG_ASSERT(this_thr != NULL);4280KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);4281KMP_DEBUG_ASSERT(team);4282KMP_DEBUG_ASSERT(team->t.t_threads);4283KMP_DEBUG_ASSERT(team->t.t_dispatch);4284kmp_info_t *master = team->t.t_threads[0];4285KMP_DEBUG_ASSERT(master);4286KMP_DEBUG_ASSERT(master->th.th_root);42874288KMP_MB();42894290TCW_SYNC_PTR(this_thr->th.th_team, team);42914292this_thr->th.th_info.ds.ds_tid = tid;4293this_thr->th.th_set_nproc = 0;4294if (__kmp_tasking_mode != tskm_immediate_exec)4295// When tasking is possible, threads are not safe to reap until they are4296// done tasking; this will be set when tasking code is exited in wait4297this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;4298else // no tasking --> always safe to reap4299this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;4300this_thr->th.th_set_proc_bind = proc_bind_default;43014302#if KMP_AFFINITY_SUPPORTED4303this_thr->th.th_new_place = this_thr->th.th_current_place;4304#endif4305this_thr->th.th_root = master->th.th_root;43064307/* setup the thread's cache of the team structure */4308this_thr->th.th_team_nproc = team->t.t_nproc;4309this_thr->th.th_team_master = master;4310this_thr->th.th_team_serialized = team->t.t_serialized;43114312KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);43134314KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",4315tid, gtid, this_thr, this_thr->th.th_current_task));43164317__kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,4318team, tid, TRUE);43194320KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",4321tid, gtid, this_thr, this_thr->th.th_current_task));4322// TODO: Initialize ICVs from parent; GEH - isn't that already done in4323// __kmp_initialize_team()?43244325/* TODO no worksharing in speculative threads */4326this_thr->th.th_dispatch = &team->t.t_dispatch[tid];43274328this_thr->th.th_local.this_construct = 0;43294330if (!this_thr->th.th_pri_common) {4331this_thr->th.th_pri_common =4332(struct common_table *)__kmp_allocate(sizeof(struct common_table));4333if (__kmp_storage_map) {4334__kmp_print_storage_map_gtid(4335gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,4336sizeof(struct common_table), "th_%d.th_pri_common\n", gtid);4337}4338this_thr->th.th_pri_head = NULL;4339}43404341if (this_thr != master && // Primary thread's CG root is initialized elsewhere4342this_thr->th.th_cg_roots != master->th.th_cg_roots) { // CG root not set4343// Make new thread's CG root same as primary thread's4344KMP_DEBUG_ASSERT(master->th.th_cg_roots);4345kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;4346if (tmp) {4347// worker changes CG, need to check if old CG should be freed4348int i = tmp->cg_nthreads--;4349KA_TRACE(100, ("__kmp_initialize_info: Thread %p decrement cg_nthreads"4350" on node %p of thread %p to %d\n",4351this_thr, tmp, tmp->cg_root, tmp->cg_nthreads));4352if (i == 1) {4353__kmp_free(tmp); // last thread left CG --> free it4354}4355}4356this_thr->th.th_cg_roots = master->th.th_cg_roots;4357// Increment new thread's CG root's counter to add the new thread4358this_thr->th.th_cg_roots->cg_nthreads++;4359KA_TRACE(100, ("__kmp_initialize_info: Thread %p increment cg_nthreads on"4360" node %p of thread %p to %d\n",4361this_thr, this_thr->th.th_cg_roots,4362this_thr->th.th_cg_roots->cg_root,4363this_thr->th.th_cg_roots->cg_nthreads));4364this_thr->th.th_current_task->td_icvs.thread_limit =4365this_thr->th.th_cg_roots->cg_thread_limit;4366}43674368/* Initialize dynamic dispatch */4369{4370volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;4371// Use team max_nproc since this will never change for the team.4372size_t disp_size =4373sizeof(dispatch_private_info_t) *4374(team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);4375KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,4376team->t.t_max_nproc));4377KMP_ASSERT(dispatch);4378KMP_DEBUG_ASSERT(team->t.t_dispatch);4379KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);43804381dispatch->th_disp_index = 0;4382dispatch->th_doacross_buf_idx = 0;4383if (!dispatch->th_disp_buffer) {4384dispatch->th_disp_buffer =4385(dispatch_private_info_t *)__kmp_allocate(disp_size);43864387if (__kmp_storage_map) {4388__kmp_print_storage_map_gtid(4389gtid, &dispatch->th_disp_buffer[0],4390&dispatch->th_disp_buffer[team->t.t_max_nproc == 14391? 14392: __kmp_dispatch_num_buffers],4393disp_size,4394"th_%d.th_dispatch.th_disp_buffer "4395"(team_%d.t_dispatch[%d].th_disp_buffer)",4396gtid, team->t.t_id, gtid);4397}4398} else {4399memset(&dispatch->th_disp_buffer[0], '\0', disp_size);4400}44014402dispatch->th_dispatch_pr_current = 0;4403dispatch->th_dispatch_sh_current = 0;44044405dispatch->th_deo_fcn = 0; /* ORDERED */4406dispatch->th_dxo_fcn = 0; /* END ORDERED */4407}44084409this_thr->th.th_next_pool = NULL;44104411KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);4412KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);44134414KMP_MB();4415}44164417/* allocate a new thread for the requesting team. this is only called from4418within a forkjoin critical section. we will first try to get an available4419thread from the thread pool. if none is available, we will fork a new one4420assuming we are able to create a new one. this should be assured, as the4421caller should check on this first. */4422kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,4423int new_tid) {4424kmp_team_t *serial_team;4425kmp_info_t *new_thr;4426int new_gtid;44274428KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));4429KMP_DEBUG_ASSERT(root && team);4430#if !KMP_NESTED_HOT_TEAMS4431KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));4432#endif4433KMP_MB();44344435/* first, try to get one from the thread pool unless allocating thread is4436* the main hidden helper thread. The hidden helper team should always4437* allocate new OS threads. */4438if (__kmp_thread_pool && !KMP_HIDDEN_HELPER_TEAM(team)) {4439new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);4440__kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool;4441if (new_thr == __kmp_thread_pool_insert_pt) {4442__kmp_thread_pool_insert_pt = NULL;4443}4444TCW_4(new_thr->th.th_in_pool, FALSE);4445__kmp_suspend_initialize_thread(new_thr);4446__kmp_lock_suspend_mx(new_thr);4447if (new_thr->th.th_active_in_pool == TRUE) {4448KMP_DEBUG_ASSERT(new_thr->th.th_active == TRUE);4449KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);4450new_thr->th.th_active_in_pool = FALSE;4451}4452__kmp_unlock_suspend_mx(new_thr);44534454KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n",4455__kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));4456KMP_ASSERT(!new_thr->th.th_team);4457KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);44584459/* setup the thread structure */4460__kmp_initialize_info(new_thr, team, new_tid,4461new_thr->th.th_info.ds.ds_gtid);4462KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);44634464TCW_4(__kmp_nth, __kmp_nth + 1);44654466new_thr->th.th_task_state = 0;44674468if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {4469// Make sure pool thread has transitioned to waiting on own thread struct4470KMP_DEBUG_ASSERT(new_thr->th.th_used_in_team.load() == 0);4471// Thread activated in __kmp_allocate_team when increasing team size4472}44734474#ifdef KMP_ADJUST_BLOCKTIME4475/* Adjust blocktime back to zero if necessary */4476/* Middle initialization might not have occurred yet */4477if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {4478if (__kmp_nth > __kmp_avail_proc) {4479__kmp_zero_bt = TRUE;4480}4481}4482#endif /* KMP_ADJUST_BLOCKTIME */44834484#if KMP_DEBUG4485// If thread entered pool via __kmp_free_thread, wait_flag should !=4486// KMP_BARRIER_PARENT_FLAG.4487int b;4488kmp_balign_t *balign = new_thr->th.th_bar;4489for (b = 0; b < bs_last_barrier; ++b)4490KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);4491#endif44924493KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n",4494__kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));44954496KMP_MB();4497return new_thr;4498}44994500/* no, well fork a new one */4501KMP_ASSERT(KMP_HIDDEN_HELPER_TEAM(team) || __kmp_nth == __kmp_all_nth);4502KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);45034504#if KMP_USE_MONITOR4505// If this is the first worker thread the RTL is creating, then also4506// launch the monitor thread. We try to do this as early as possible.4507if (!TCR_4(__kmp_init_monitor)) {4508__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);4509if (!TCR_4(__kmp_init_monitor)) {4510KF_TRACE(10, ("before __kmp_create_monitor\n"));4511TCW_4(__kmp_init_monitor, 1);4512__kmp_create_monitor(&__kmp_monitor);4513KF_TRACE(10, ("after __kmp_create_monitor\n"));4514#if KMP_OS_WINDOWS4515// AC: wait until monitor has started. This is a fix for CQ232808.4516// The reason is that if the library is loaded/unloaded in a loop with4517// small (parallel) work in between, then there is high probability that4518// monitor thread started after the library shutdown. At shutdown it is4519// too late to cope with the problem, because when the primary thread is4520// in DllMain (process detach) the monitor has no chances to start (it is4521// blocked), and primary thread has no means to inform the monitor that4522// the library has gone, because all the memory which the monitor can4523// access is going to be released/reset.4524while (TCR_4(__kmp_init_monitor) < 2) {4525KMP_YIELD(TRUE);4526}4527KF_TRACE(10, ("after monitor thread has started\n"));4528#endif4529}4530__kmp_release_bootstrap_lock(&__kmp_monitor_lock);4531}4532#endif45334534KMP_MB();45354536{4537int new_start_gtid = TCR_4(__kmp_init_hidden_helper_threads)4538? 14539: __kmp_hidden_helper_threads_num + 1;45404541for (new_gtid = new_start_gtid; TCR_PTR(__kmp_threads[new_gtid]) != NULL;4542++new_gtid) {4543KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);4544}45454546if (TCR_4(__kmp_init_hidden_helper_threads)) {4547KMP_DEBUG_ASSERT(new_gtid <= __kmp_hidden_helper_threads_num);4548}4549}45504551/* allocate space for it. */4552new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t));45534554new_thr->th.th_nt_strict = false;4555new_thr->th.th_nt_loc = NULL;4556new_thr->th.th_nt_sev = severity_fatal;4557new_thr->th.th_nt_msg = NULL;45584559TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);45604561#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG4562// suppress race conditions detection on synchronization flags in debug mode4563// this helps to analyze library internals eliminating false positives4564__itt_suppress_mark_range(4565__itt_suppress_range, __itt_suppress_threading_errors,4566&new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));4567__itt_suppress_mark_range(4568__itt_suppress_range, __itt_suppress_threading_errors,4569&new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));4570#if KMP_OS_WINDOWS4571__itt_suppress_mark_range(4572__itt_suppress_range, __itt_suppress_threading_errors,4573&new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));4574#else4575__itt_suppress_mark_range(__itt_suppress_range,4576__itt_suppress_threading_errors,4577&new_thr->th.th_suspend_init_count,4578sizeof(new_thr->th.th_suspend_init_count));4579#endif4580// TODO: check if we need to also suppress b_arrived flags4581__itt_suppress_mark_range(__itt_suppress_range,4582__itt_suppress_threading_errors,4583CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),4584sizeof(new_thr->th.th_bar[0].bb.b_go));4585__itt_suppress_mark_range(__itt_suppress_range,4586__itt_suppress_threading_errors,4587CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),4588sizeof(new_thr->th.th_bar[1].bb.b_go));4589__itt_suppress_mark_range(__itt_suppress_range,4590__itt_suppress_threading_errors,4591CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),4592sizeof(new_thr->th.th_bar[2].bb.b_go));4593#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */4594if (__kmp_storage_map) {4595__kmp_print_thread_storage_map(new_thr, new_gtid);4596}45974598// add the reserve serialized team, initialized from the team's primary thread4599{4600kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);4601KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n"));4602new_thr->th.th_serial_team = serial_team =4603(kmp_team_t *)__kmp_allocate_team(root, 1, 1,4604#if OMPT_SUPPORT4605ompt_data_none, // root parallel id4606#endif4607proc_bind_default, &r_icvs,46080 USE_NESTED_HOT_ARG(NULL));4609}4610KMP_ASSERT(serial_team);4611serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for4612// execution (it is unused for now).4613serial_team->t.t_threads[0] = new_thr;4614KF_TRACE(10,4615("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",4616new_thr));46174618/* setup the thread structures */4619__kmp_initialize_info(new_thr, team, new_tid, new_gtid);46204621#if USE_FAST_MEMORY4622__kmp_initialize_fast_memory(new_thr);4623#endif /* USE_FAST_MEMORY */46244625#if KMP_USE_BGET4626KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);4627__kmp_initialize_bget(new_thr);4628#endif46294630__kmp_init_random(new_thr); // Initialize random number generator46314632/* Initialize these only once when thread is grabbed for a team allocation */4633KA_TRACE(20,4634("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",4635__kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));46364637int b;4638kmp_balign_t *balign = new_thr->th.th_bar;4639for (b = 0; b < bs_last_barrier; ++b) {4640balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;4641balign[b].bb.team = NULL;4642balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;4643balign[b].bb.use_oncore_barrier = 0;4644}46454646TCW_PTR(new_thr->th.th_sleep_loc, NULL);4647new_thr->th.th_sleep_loc_type = flag_unset;46484649new_thr->th.th_spin_here = FALSE;4650new_thr->th.th_next_waiting = 0;4651#if KMP_OS_UNIX4652new_thr->th.th_blocking = false;4653#endif46544655#if KMP_AFFINITY_SUPPORTED4656new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;4657new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;4658new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;4659new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;4660#endif4661new_thr->th.th_def_allocator = __kmp_def_allocator;4662new_thr->th.th_prev_level = 0;4663new_thr->th.th_prev_num_threads = 1;46644665TCW_4(new_thr->th.th_in_pool, FALSE);4666new_thr->th.th_active_in_pool = FALSE;4667TCW_4(new_thr->th.th_active, TRUE);46684669new_thr->th.th_set_nested_nth = NULL;4670new_thr->th.th_set_nested_nth_sz = 0;46714672/* adjust the global counters */4673__kmp_all_nth++;4674__kmp_nth++;46754676// if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low4677// numbers of procs, and method #2 (keyed API call) for higher numbers.4678if (__kmp_adjust_gtid_mode) {4679if (__kmp_all_nth >= __kmp_tls_gtid_min) {4680if (TCR_4(__kmp_gtid_mode) != 2) {4681TCW_4(__kmp_gtid_mode, 2);4682}4683} else {4684if (TCR_4(__kmp_gtid_mode) != 1) {4685TCW_4(__kmp_gtid_mode, 1);4686}4687}4688}46894690#ifdef KMP_ADJUST_BLOCKTIME4691/* Adjust blocktime back to zero if necessary */4692/* Middle initialization might not have occurred yet */4693if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {4694if (__kmp_nth > __kmp_avail_proc) {4695__kmp_zero_bt = TRUE;4696}4697}4698#endif /* KMP_ADJUST_BLOCKTIME */46994700#if KMP_AFFINITY_SUPPORTED4701// Set the affinity and topology information for new thread4702__kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);4703#endif47044705/* actually fork it and create the new worker thread */4706KF_TRACE(470710, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));4708__kmp_create_worker(new_gtid, new_thr, __kmp_stksize);4709KF_TRACE(10,4710("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));47114712KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),4713new_gtid));4714KMP_MB();4715return new_thr;4716}47174718/* Reinitialize team for reuse.4719The hot team code calls this case at every fork barrier, so EPCC barrier4720test are extremely sensitive to changes in it, esp. writes to the team4721struct, which cause a cache invalidation in all threads.4722IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */4723static void __kmp_reinitialize_team(kmp_team_t *team,4724kmp_internal_control_t *new_icvs,4725ident_t *loc) {4726KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n",4727team->t.t_threads[0], team));4728KMP_DEBUG_ASSERT(team && new_icvs);4729KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);4730KMP_CHECK_UPDATE(team->t.t_ident, loc);47314732KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());4733// Copy ICVs to the primary thread's implicit taskdata4734__kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);4735copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);47364737KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n",4738team->t.t_threads[0], team));4739}47404741/* Initialize the team data structure.4742This assumes the t_threads and t_max_nproc are already set.4743Also, we don't touch the arguments */4744static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,4745kmp_internal_control_t *new_icvs,4746ident_t *loc) {4747KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team));47484749/* verify */4750KMP_DEBUG_ASSERT(team);4751KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);4752KMP_DEBUG_ASSERT(team->t.t_threads);4753KMP_MB();47544755team->t.t_master_tid = 0; /* not needed */4756/* team->t.t_master_bar; not needed */4757team->t.t_serialized = new_nproc > 1 ? 0 : 1;4758team->t.t_nproc = new_nproc;47594760/* team->t.t_parent = NULL; TODO not needed & would mess up hot team */4761team->t.t_next_pool = NULL;4762/* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess4763* up hot team */47644765TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */4766team->t.t_invoke = NULL; /* not needed */47674768// TODO???: team->t.t_max_active_levels = new_max_active_levels;4769team->t.t_sched.sched = new_icvs->sched.sched;47704771#if KMP_ARCH_X86 || KMP_ARCH_X86_644772team->t.t_fp_control_saved = FALSE; /* not needed */4773team->t.t_x87_fpu_control_word = 0; /* not needed */4774team->t.t_mxcsr = 0; /* not needed */4775#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */47764777team->t.t_construct = 0;47784779team->t.t_ordered.dt.t_value = 0;4780team->t.t_master_active = FALSE;47814782#ifdef KMP_DEBUG4783team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */4784#endif4785#if KMP_OS_WINDOWS4786team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */4787#endif47884789team->t.t_control_stack_top = NULL;47904791__kmp_reinitialize_team(team, new_icvs, loc);47924793KMP_MB();4794KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team));4795}47964797#if KMP_AFFINITY_SUPPORTED4798static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,4799int first, int last, int newp) {4800th->th.th_first_place = first;4801th->th.th_last_place = last;4802th->th.th_new_place = newp;4803if (newp != th->th.th_current_place) {4804if (__kmp_display_affinity && team->t.t_display_affinity != 1)4805team->t.t_display_affinity = 1;4806// Copy topology information associated with the new place4807th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];4808th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];4809}4810}48114812// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.4813// It calculates the worker + primary thread's partition based upon the parent4814// thread's partition, and binds each worker to a thread in their partition.4815// The primary thread's partition should already include its current binding.4816static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {4817// Do not partition places for the hidden helper team4818if (KMP_HIDDEN_HELPER_TEAM(team))4819return;4820// Copy the primary thread's place partition to the team struct4821kmp_info_t *master_th = team->t.t_threads[0];4822KMP_DEBUG_ASSERT(master_th != NULL);4823kmp_proc_bind_t proc_bind = team->t.t_proc_bind;4824int first_place = master_th->th.th_first_place;4825int last_place = master_th->th.th_last_place;4826int masters_place = master_th->th.th_current_place;4827int num_masks = __kmp_affinity.num_masks;4828team->t.t_first_place = first_place;4829team->t.t_last_place = last_place;48304831KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) "4832"bound to place %d partition = [%d,%d]\n",4833proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),4834team->t.t_id, masters_place, first_place, last_place));48354836switch (proc_bind) {48374838case proc_bind_default:4839// Serial teams might have the proc_bind policy set to proc_bind_default.4840// Not an issue -- we don't rebind primary thread for any proc_bind policy.4841KMP_DEBUG_ASSERT(team->t.t_nproc == 1);4842break;48434844case proc_bind_primary: {4845int f;4846int n_th = team->t.t_nproc;4847for (f = 1; f < n_th; f++) {4848kmp_info_t *th = team->t.t_threads[f];4849KMP_DEBUG_ASSERT(th != NULL);4850__kmp_set_thread_place(team, th, first_place, last_place, masters_place);48514852KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "4853"partition = [%d,%d]\n",4854__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,4855f, masters_place, first_place, last_place));4856}4857} break;48584859case proc_bind_close: {4860int f;4861int n_th = team->t.t_nproc;4862int n_places;4863if (first_place <= last_place) {4864n_places = last_place - first_place + 1;4865} else {4866n_places = num_masks - first_place + last_place + 1;4867}4868if (n_th <= n_places) {4869int place = masters_place;4870for (f = 1; f < n_th; f++) {4871kmp_info_t *th = team->t.t_threads[f];4872KMP_DEBUG_ASSERT(th != NULL);48734874if (place == last_place) {4875place = first_place;4876} else if (place == (num_masks - 1)) {4877place = 0;4878} else {4879place++;4880}4881__kmp_set_thread_place(team, th, first_place, last_place, place);48824883KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "4884"partition = [%d,%d]\n",4885__kmp_gtid_from_thread(team->t.t_threads[f]),4886team->t.t_id, f, place, first_place, last_place));4887}4888} else {4889int S, rem, gap, s_count;4890S = n_th / n_places;4891s_count = 0;4892rem = n_th - (S * n_places);4893gap = rem > 0 ? n_places / rem : n_places;4894int place = masters_place;4895int gap_ct = gap;4896for (f = 0; f < n_th; f++) {4897kmp_info_t *th = team->t.t_threads[f];4898KMP_DEBUG_ASSERT(th != NULL);48994900__kmp_set_thread_place(team, th, first_place, last_place, place);4901s_count++;49024903if ((s_count == S) && rem && (gap_ct == gap)) {4904// do nothing, add an extra thread to place on next iteration4905} else if ((s_count == S + 1) && rem && (gap_ct == gap)) {4906// we added an extra thread to this place; move to next place4907if (place == last_place) {4908place = first_place;4909} else if (place == (num_masks - 1)) {4910place = 0;4911} else {4912place++;4913}4914s_count = 0;4915gap_ct = 1;4916rem--;4917} else if (s_count == S) { // place full; don't add extra4918if (place == last_place) {4919place = first_place;4920} else if (place == (num_masks - 1)) {4921place = 0;4922} else {4923place++;4924}4925gap_ct++;4926s_count = 0;4927}49284929KA_TRACE(100,4930("__kmp_partition_places: close: T#%d(%d:%d) place %d "4931"partition = [%d,%d]\n",4932__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,4933th->th.th_new_place, first_place, last_place));4934}4935KMP_DEBUG_ASSERT(place == masters_place);4936}4937} break;49384939case proc_bind_spread: {4940int f;4941int n_th = team->t.t_nproc;4942int n_places;4943int thidx;4944if (first_place <= last_place) {4945n_places = last_place - first_place + 1;4946} else {4947n_places = num_masks - first_place + last_place + 1;4948}4949if (n_th <= n_places) {4950int place = -1;49514952if (n_places != num_masks) {4953int S = n_places / n_th;4954int s_count, rem, gap, gap_ct;49554956place = masters_place;4957rem = n_places - n_th * S;4958gap = rem ? n_th / rem : 1;4959gap_ct = gap;4960thidx = n_th;4961if (update_master_only == 1)4962thidx = 1;4963for (f = 0; f < thidx; f++) {4964kmp_info_t *th = team->t.t_threads[f];4965KMP_DEBUG_ASSERT(th != NULL);49664967int fplace = place, nplace = place;4968s_count = 1;4969while (s_count < S) {4970if (place == last_place) {4971place = first_place;4972} else if (place == (num_masks - 1)) {4973place = 0;4974} else {4975place++;4976}4977s_count++;4978}4979if (rem && (gap_ct == gap)) {4980if (place == last_place) {4981place = first_place;4982} else if (place == (num_masks - 1)) {4983place = 0;4984} else {4985place++;4986}4987rem--;4988gap_ct = 0;4989}4990__kmp_set_thread_place(team, th, fplace, place, nplace);4991gap_ct++;49924993if (place == last_place) {4994place = first_place;4995} else if (place == (num_masks - 1)) {4996place = 0;4997} else {4998place++;4999}50005001KA_TRACE(100,5002("__kmp_partition_places: spread: T#%d(%d:%d) place %d "5003"partition = [%d,%d], num_masks: %u\n",5004__kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,5005f, th->th.th_new_place, th->th.th_first_place,5006th->th.th_last_place, num_masks));5007}5008} else {5009/* Having uniform space of available computation places I can create5010T partitions of round(P/T) size and put threads into the first5011place of each partition. */5012double current = static_cast<double>(masters_place);5013double spacing =5014(static_cast<double>(n_places + 1) / static_cast<double>(n_th));5015int first, last;5016kmp_info_t *th;50175018thidx = n_th + 1;5019if (update_master_only == 1)5020thidx = 1;5021for (f = 0; f < thidx; f++) {5022first = static_cast<int>(current);5023last = static_cast<int>(current + spacing) - 1;5024KMP_DEBUG_ASSERT(last >= first);5025if (first >= n_places) {5026if (masters_place) {5027first -= n_places;5028last -= n_places;5029if (first == (masters_place + 1)) {5030KMP_DEBUG_ASSERT(f == n_th);5031first--;5032}5033if (last == masters_place) {5034KMP_DEBUG_ASSERT(f == (n_th - 1));5035last--;5036}5037} else {5038KMP_DEBUG_ASSERT(f == n_th);5039first = 0;5040last = 0;5041}5042}5043if (last >= n_places) {5044last = (n_places - 1);5045}5046place = first;5047current += spacing;5048if (f < n_th) {5049KMP_DEBUG_ASSERT(0 <= first);5050KMP_DEBUG_ASSERT(n_places > first);5051KMP_DEBUG_ASSERT(0 <= last);5052KMP_DEBUG_ASSERT(n_places > last);5053KMP_DEBUG_ASSERT(last_place >= first_place);5054th = team->t.t_threads[f];5055KMP_DEBUG_ASSERT(th);5056__kmp_set_thread_place(team, th, first, last, place);5057KA_TRACE(100,5058("__kmp_partition_places: spread: T#%d(%d:%d) place %d "5059"partition = [%d,%d], spacing = %.4f\n",5060__kmp_gtid_from_thread(team->t.t_threads[f]),5061team->t.t_id, f, th->th.th_new_place,5062th->th.th_first_place, th->th.th_last_place, spacing));5063}5064}5065}5066KMP_DEBUG_ASSERT(update_master_only || place == masters_place);5067} else {5068int S, rem, gap, s_count;5069S = n_th / n_places;5070s_count = 0;5071rem = n_th - (S * n_places);5072gap = rem > 0 ? n_places / rem : n_places;5073int place = masters_place;5074int gap_ct = gap;5075thidx = n_th;5076if (update_master_only == 1)5077thidx = 1;5078for (f = 0; f < thidx; f++) {5079kmp_info_t *th = team->t.t_threads[f];5080KMP_DEBUG_ASSERT(th != NULL);50815082__kmp_set_thread_place(team, th, place, place, place);5083s_count++;50845085if ((s_count == S) && rem && (gap_ct == gap)) {5086// do nothing, add an extra thread to place on next iteration5087} else if ((s_count == S + 1) && rem && (gap_ct == gap)) {5088// we added an extra thread to this place; move on to next place5089if (place == last_place) {5090place = first_place;5091} else if (place == (num_masks - 1)) {5092place = 0;5093} else {5094place++;5095}5096s_count = 0;5097gap_ct = 1;5098rem--;5099} else if (s_count == S) { // place is full; don't add extra thread5100if (place == last_place) {5101place = first_place;5102} else if (place == (num_masks - 1)) {5103place = 0;5104} else {5105place++;5106}5107gap_ct++;5108s_count = 0;5109}51105111KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d "5112"partition = [%d,%d]\n",5113__kmp_gtid_from_thread(team->t.t_threads[f]),5114team->t.t_id, f, th->th.th_new_place,5115th->th.th_first_place, th->th.th_last_place));5116}5117KMP_DEBUG_ASSERT(update_master_only || place == masters_place);5118}5119} break;51205121default:5122break;5123}51245125KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id));5126}51275128#endif // KMP_AFFINITY_SUPPORTED51295130/* allocate a new team data structure to use. take one off of the free pool if5131available */5132kmp_team_t *5133__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,5134#if OMPT_SUPPORT5135ompt_data_t ompt_parallel_data,5136#endif5137kmp_proc_bind_t new_proc_bind,5138kmp_internal_control_t *new_icvs,5139int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {5140KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);5141int f;5142kmp_team_t *team;5143int use_hot_team = !root->r.r_active;5144int level = 0;5145int do_place_partition = 1;51465147KA_TRACE(20, ("__kmp_allocate_team: called\n"));5148KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);5149KMP_DEBUG_ASSERT(max_nproc >= new_nproc);5150KMP_MB();51515152#if KMP_NESTED_HOT_TEAMS5153kmp_hot_team_ptr_t *hot_teams;5154if (master) {5155team = master->th.th_team;5156level = team->t.t_active_level;5157if (master->th.th_teams_microtask) { // in teams construct?5158if (master->th.th_teams_size.nteams > 1 &&5159( // #teams > 15160team->t.t_pkfn ==5161(microtask_t)__kmp_teams_master || // inner fork of the teams5162master->th.th_teams_level <5163team->t.t_level)) { // or nested parallel inside the teams5164++level; // not increment if #teams==1, or for outer fork of the teams;5165// increment otherwise5166}5167// Do not perform the place partition if inner fork of the teams5168// Wait until nested parallel region encountered inside teams construct5169if ((master->th.th_teams_size.nteams == 1 &&5170master->th.th_teams_level >= team->t.t_level) ||5171(team->t.t_pkfn == (microtask_t)__kmp_teams_master))5172do_place_partition = 0;5173}5174hot_teams = master->th.th_hot_teams;5175if (level < __kmp_hot_teams_max_level && hot_teams &&5176hot_teams[level].hot_team) {5177// hot team has already been allocated for given level5178use_hot_team = 1;5179} else {5180use_hot_team = 0;5181}5182} else {5183// check we won't access uninitialized hot_teams, just in case5184KMP_DEBUG_ASSERT(new_nproc == 1);5185}5186#endif5187// Optimization to use a "hot" team5188if (use_hot_team && new_nproc > 1) {5189KMP_DEBUG_ASSERT(new_nproc <= max_nproc);5190#if KMP_NESTED_HOT_TEAMS5191team = hot_teams[level].hot_team;5192#else5193team = root->r.r_hot_team;5194#endif5195#if KMP_DEBUG5196if (__kmp_tasking_mode != tskm_immediate_exec) {5197KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "5198"task_team[1] = %p before reinit\n",5199team->t.t_task_team[0], team->t.t_task_team[1]));5200}5201#endif52025203if (team->t.t_nproc != new_nproc &&5204__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5205// Distributed barrier may need a resize5206int old_nthr = team->t.t_nproc;5207__kmp_resize_dist_barrier(team, old_nthr, new_nproc);5208}52095210// If not doing the place partition, then reset the team's proc bind5211// to indicate that partitioning of all threads still needs to take place5212if (do_place_partition == 0)5213team->t.t_proc_bind = proc_bind_default;5214// Has the number of threads changed?5215/* Let's assume the most common case is that the number of threads is5216unchanged, and put that case first. */5217if (team->t.t_nproc == new_nproc) { // Check changes in number of threads5218KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n"));5219// This case can mean that omp_set_num_threads() was called and the hot5220// team size was already reduced, so we check the special flag5221if (team->t.t_size_changed == -1) {5222team->t.t_size_changed = 1;5223} else {5224KMP_CHECK_UPDATE(team->t.t_size_changed, 0);5225}52265227// TODO???: team->t.t_max_active_levels = new_max_active_levels;5228kmp_r_sched_t new_sched = new_icvs->sched;5229// set primary thread's schedule as new run-time schedule5230KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);52315232__kmp_reinitialize_team(team, new_icvs,5233root->r.r_uber_thread->th.th_ident);52345235KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,5236team->t.t_threads[0], team));5237__kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);52385239#if KMP_AFFINITY_SUPPORTED5240if ((team->t.t_size_changed == 0) &&5241(team->t.t_proc_bind == new_proc_bind)) {5242if (new_proc_bind == proc_bind_spread) {5243if (do_place_partition) {5244// add flag to update only master for spread5245__kmp_partition_places(team, 1);5246}5247}5248KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: "5249"proc_bind = %d, partition = [%d,%d]\n",5250team->t.t_id, new_proc_bind, team->t.t_first_place,5251team->t.t_last_place));5252} else {5253if (do_place_partition) {5254KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);5255__kmp_partition_places(team);5256}5257}5258#else5259KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);5260#endif /* KMP_AFFINITY_SUPPORTED */5261} else if (team->t.t_nproc > new_nproc) {5262KA_TRACE(20,5263("__kmp_allocate_team: decreasing hot team thread count to %d\n",5264new_nproc));52655266team->t.t_size_changed = 1;5267if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5268// Barrier size already reduced earlier in this function5269// Activate team threads via th_used_in_team5270__kmp_add_threads_to_team(team, new_nproc);5271}5272// When decreasing team size, threads no longer in the team should5273// unref task team.5274if (__kmp_tasking_mode != tskm_immediate_exec) {5275for (f = new_nproc; f < team->t.t_nproc; f++) {5276kmp_info_t *th = team->t.t_threads[f];5277KMP_DEBUG_ASSERT(th);5278th->th.th_task_team = NULL;5279}5280}5281#if KMP_NESTED_HOT_TEAMS5282if (__kmp_hot_teams_mode == 0) {5283// AC: saved number of threads should correspond to team's value in this5284// mode, can be bigger in mode 1, when hot team has threads in reserve5285KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);5286hot_teams[level].hot_team_nth = new_nproc;5287#endif // KMP_NESTED_HOT_TEAMS5288/* release the extra threads we don't need any more */5289for (f = new_nproc; f < team->t.t_nproc; f++) {5290KMP_DEBUG_ASSERT(team->t.t_threads[f]);5291__kmp_free_thread(team->t.t_threads[f]);5292team->t.t_threads[f] = NULL;5293}5294#if KMP_NESTED_HOT_TEAMS5295} // (__kmp_hot_teams_mode == 0)5296else {5297// When keeping extra threads in team, switch threads to wait on own5298// b_go flag5299for (f = new_nproc; f < team->t.t_nproc; ++f) {5300KMP_DEBUG_ASSERT(team->t.t_threads[f]);5301kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;5302for (int b = 0; b < bs_last_barrier; ++b) {5303if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {5304balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;5305}5306KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);5307}5308}5309}5310#endif // KMP_NESTED_HOT_TEAMS5311team->t.t_nproc = new_nproc;5312// TODO???: team->t.t_max_active_levels = new_max_active_levels;5313KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);5314__kmp_reinitialize_team(team, new_icvs,5315root->r.r_uber_thread->th.th_ident);53165317// Update remaining threads5318for (f = 0; f < new_nproc; ++f) {5319team->t.t_threads[f]->th.th_team_nproc = new_nproc;5320}53215322// restore the current task state of the primary thread: should be the5323// implicit task5324KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,5325team->t.t_threads[0], team));53265327__kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);53285329#ifdef KMP_DEBUG5330for (f = 0; f < team->t.t_nproc; f++) {5331KMP_DEBUG_ASSERT(team->t.t_threads[f] &&5332team->t.t_threads[f]->th.th_team_nproc ==5333team->t.t_nproc);5334}5335#endif53365337if (do_place_partition) {5338KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);5339#if KMP_AFFINITY_SUPPORTED5340__kmp_partition_places(team);5341#endif5342}5343} else { // team->t.t_nproc < new_nproc53445345KA_TRACE(20,5346("__kmp_allocate_team: increasing hot team thread count to %d\n",5347new_nproc));5348int old_nproc = team->t.t_nproc; // save old value and use to update only5349team->t.t_size_changed = 1;53505351#if KMP_NESTED_HOT_TEAMS5352int avail_threads = hot_teams[level].hot_team_nth;5353if (new_nproc < avail_threads)5354avail_threads = new_nproc;5355kmp_info_t **other_threads = team->t.t_threads;5356for (f = team->t.t_nproc; f < avail_threads; ++f) {5357// Adjust barrier data of reserved threads (if any) of the team5358// Other data will be set in __kmp_initialize_info() below.5359int b;5360kmp_balign_t *balign = other_threads[f]->th.th_bar;5361for (b = 0; b < bs_last_barrier; ++b) {5362balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;5363KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);5364#if USE_DEBUGGER5365balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;5366#endif5367}5368}5369if (hot_teams[level].hot_team_nth >= new_nproc) {5370// we have all needed threads in reserve, no need to allocate any5371// this only possible in mode 1, cannot have reserved threads in mode 05372KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);5373team->t.t_nproc = new_nproc; // just get reserved threads involved5374} else {5375// We may have some threads in reserve, but not enough;5376// get reserved threads involved if any.5377team->t.t_nproc = hot_teams[level].hot_team_nth;5378hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size5379#endif // KMP_NESTED_HOT_TEAMS5380if (team->t.t_max_nproc < new_nproc) {5381/* reallocate larger arrays */5382__kmp_reallocate_team_arrays(team, new_nproc);5383__kmp_reinitialize_team(team, new_icvs, NULL);5384}53855386#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \5387KMP_AFFINITY_SUPPORTED5388/* Temporarily set full mask for primary thread before creation of5389workers. The reason is that workers inherit the affinity from the5390primary thread, so if a lot of workers are created on the single5391core quickly, they don't get a chance to set their own affinity for5392a long time. */5393kmp_affinity_raii_t new_temp_affinity{__kmp_affin_fullMask};5394#endif53955396/* allocate new threads for the hot team */5397for (f = team->t.t_nproc; f < new_nproc; f++) {5398kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);5399KMP_DEBUG_ASSERT(new_worker);5400team->t.t_threads[f] = new_worker;54015402KA_TRACE(20,5403("__kmp_allocate_team: team %d init T#%d arrived: "5404"join=%llu, plain=%llu\n",5405team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,5406team->t.t_bar[bs_forkjoin_barrier].b_arrived,5407team->t.t_bar[bs_plain_barrier].b_arrived));54085409{ // Initialize barrier data for new threads.5410int b;5411kmp_balign_t *balign = new_worker->th.th_bar;5412for (b = 0; b < bs_last_barrier; ++b) {5413balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;5414KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=5415KMP_BARRIER_PARENT_FLAG);5416#if USE_DEBUGGER5417balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;5418#endif5419}5420}5421}54225423#if (KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY) && \5424KMP_AFFINITY_SUPPORTED5425/* Restore initial primary thread's affinity mask */5426new_temp_affinity.restore();5427#endif5428#if KMP_NESTED_HOT_TEAMS5429} // end of check of t_nproc vs. new_nproc vs. hot_team_nth5430#endif // KMP_NESTED_HOT_TEAMS5431if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5432// Barrier size already increased earlier in this function5433// Activate team threads via th_used_in_team5434__kmp_add_threads_to_team(team, new_nproc);5435}5436/* make sure everyone is syncronized */5437// new threads below5438__kmp_initialize_team(team, new_nproc, new_icvs,5439root->r.r_uber_thread->th.th_ident);54405441/* reinitialize the threads */5442KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);5443for (f = 0; f < team->t.t_nproc; ++f)5444__kmp_initialize_info(team->t.t_threads[f], team, f,5445__kmp_gtid_from_tid(f, team));54465447// set th_task_state for new threads in hot team with older thread's state5448kmp_uint8 old_state = team->t.t_threads[old_nproc - 1]->th.th_task_state;5449for (f = old_nproc; f < team->t.t_nproc; ++f)5450team->t.t_threads[f]->th.th_task_state = old_state;54515452#ifdef KMP_DEBUG5453for (f = 0; f < team->t.t_nproc; ++f) {5454KMP_DEBUG_ASSERT(team->t.t_threads[f] &&5455team->t.t_threads[f]->th.th_team_nproc ==5456team->t.t_nproc);5457}5458#endif54595460if (do_place_partition) {5461KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);5462#if KMP_AFFINITY_SUPPORTED5463__kmp_partition_places(team);5464#endif5465}5466} // Check changes in number of threads54675468if (master->th.th_teams_microtask) {5469for (f = 1; f < new_nproc; ++f) {5470// propagate teams construct specific info to workers5471kmp_info_t *thr = team->t.t_threads[f];5472thr->th.th_teams_microtask = master->th.th_teams_microtask;5473thr->th.th_teams_level = master->th.th_teams_level;5474thr->th.th_teams_size = master->th.th_teams_size;5475}5476}5477#if KMP_NESTED_HOT_TEAMS5478if (level) {5479// Sync barrier state for nested hot teams, not needed for outermost hot5480// team.5481for (f = 1; f < new_nproc; ++f) {5482kmp_info_t *thr = team->t.t_threads[f];5483int b;5484kmp_balign_t *balign = thr->th.th_bar;5485for (b = 0; b < bs_last_barrier; ++b) {5486balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;5487KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);5488#if USE_DEBUGGER5489balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;5490#endif5491}5492}5493}5494#endif // KMP_NESTED_HOT_TEAMS54955496/* reallocate space for arguments if necessary */5497__kmp_alloc_argv_entries(argc, team, TRUE);5498KMP_CHECK_UPDATE(team->t.t_argc, argc);5499// The hot team re-uses the previous task team,5500// if untouched during the previous release->gather phase.55015502KF_TRACE(10, (" hot_team = %p\n", team));55035504#if KMP_DEBUG5505if (__kmp_tasking_mode != tskm_immediate_exec) {5506KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p "5507"task_team[1] = %p after reinit\n",5508team->t.t_task_team[0], team->t.t_task_team[1]));5509}5510#endif55115512#if OMPT_SUPPORT5513__ompt_team_assign_id(team, ompt_parallel_data);5514#endif55155516KMP_MB();55175518return team;5519}55205521/* next, let's try to take one from the team pool */5522KMP_MB();5523for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {5524/* TODO: consider resizing undersized teams instead of reaping them, now5525that we have a resizing mechanism */5526if (team->t.t_max_nproc >= max_nproc) {5527/* take this team from the team pool */5528__kmp_team_pool = team->t.t_next_pool;55295530if (max_nproc > 1 &&5531__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5532if (!team->t.b) { // Allocate barrier structure5533team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);5534}5535}55365537/* setup the team for fresh use */5538__kmp_initialize_team(team, new_nproc, new_icvs, NULL);55395540KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and "5541"task_team[1] %p to NULL\n",5542&team->t.t_task_team[0], &team->t.t_task_team[1]));5543team->t.t_task_team[0] = NULL;5544team->t.t_task_team[1] = NULL;55455546/* reallocate space for arguments if necessary */5547__kmp_alloc_argv_entries(argc, team, TRUE);5548KMP_CHECK_UPDATE(team->t.t_argc, argc);55495550KA_TRACE(555120, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",5552team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));5553{ // Initialize barrier data.5554int b;5555for (b = 0; b < bs_last_barrier; ++b) {5556team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;5557#if USE_DEBUGGER5558team->t.t_bar[b].b_master_arrived = 0;5559team->t.t_bar[b].b_team_arrived = 0;5560#endif5561}5562}55635564team->t.t_proc_bind = new_proc_bind;55655566KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n",5567team->t.t_id));55685569#if OMPT_SUPPORT5570__ompt_team_assign_id(team, ompt_parallel_data);5571#endif55725573team->t.t_nested_nth = NULL;55745575KMP_MB();55765577return team;5578}55795580/* reap team if it is too small, then loop back and check the next one */5581// not sure if this is wise, but, will be redone during the hot-teams5582// rewrite.5583/* TODO: Use technique to find the right size hot-team, don't reap them */5584team = __kmp_reap_team(team);5585__kmp_team_pool = team;5586}55875588/* nothing available in the pool, no matter, make a new team! */5589KMP_MB();5590team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t));55915592/* and set it up */5593team->t.t_max_nproc = max_nproc;5594if (max_nproc > 1 &&5595__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5596// Allocate barrier structure5597team->t.b = distributedBarrier::allocate(__kmp_dflt_team_nth_ub);5598}55995600/* NOTE well, for some reason allocating one big buffer and dividing it up5601seems to really hurt performance a lot on the P4, so, let's not use this */5602__kmp_allocate_team_arrays(team, max_nproc);56035604KA_TRACE(20, ("__kmp_allocate_team: making a new team\n"));5605__kmp_initialize_team(team, new_nproc, new_icvs, NULL);56065607KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] "5608"%p to NULL\n",5609&team->t.t_task_team[0], &team->t.t_task_team[1]));5610team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes5611// memory, no need to duplicate5612team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes5613// memory, no need to duplicate56145615if (__kmp_storage_map) {5616__kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc);5617}56185619/* allocate space for arguments */5620__kmp_alloc_argv_entries(argc, team, FALSE);5621team->t.t_argc = argc;56225623KA_TRACE(20,5624("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",5625team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));5626{ // Initialize barrier data.5627int b;5628for (b = 0; b < bs_last_barrier; ++b) {5629team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;5630#if USE_DEBUGGER5631team->t.t_bar[b].b_master_arrived = 0;5632team->t.t_bar[b].b_team_arrived = 0;5633#endif5634}5635}56365637team->t.t_proc_bind = new_proc_bind;56385639#if OMPT_SUPPORT5640__ompt_team_assign_id(team, ompt_parallel_data);5641team->t.ompt_serialized_team_info = NULL;5642#endif56435644KMP_MB();56455646team->t.t_nested_nth = NULL;56475648KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n",5649team->t.t_id));56505651return team;5652}56535654/* TODO implement hot-teams at all levels */5655/* TODO implement lazy thread release on demand (disband request) */56565657/* free the team. return it to the team pool. release all the threads5658* associated with it */5659void __kmp_free_team(kmp_root_t *root,5660kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {5661int f;5662KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),5663team->t.t_id));56645665/* verify state */5666KMP_DEBUG_ASSERT(root);5667KMP_DEBUG_ASSERT(team);5668KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);5669KMP_DEBUG_ASSERT(team->t.t_threads);56705671int use_hot_team = team == root->r.r_hot_team;5672#if KMP_NESTED_HOT_TEAMS5673int level;5674if (master) {5675level = team->t.t_active_level - 1;5676if (master->th.th_teams_microtask) { // in teams construct?5677if (master->th.th_teams_size.nteams > 1) {5678++level; // level was not increased in teams construct for5679// team_of_masters5680}5681if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&5682master->th.th_teams_level == team->t.t_level) {5683++level; // level was not increased in teams construct for5684// team_of_workers before the parallel5685} // team->t.t_level will be increased inside parallel5686}5687#if KMP_DEBUG5688kmp_hot_team_ptr_t *hot_teams = master->th.th_hot_teams;5689#endif5690if (level < __kmp_hot_teams_max_level) {5691KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);5692use_hot_team = 1;5693}5694}5695#endif // KMP_NESTED_HOT_TEAMS56965697/* team is done working */5698TCW_SYNC_PTR(team->t.t_pkfn,5699NULL); // Important for Debugging Support Library.5700#if KMP_OS_WINDOWS5701team->t.t_copyin_counter = 0; // init counter for possible reuse5702#endif5703// Do not reset pointer to parent team to NULL for hot teams.57045705/* if we are non-hot team, release our threads */5706if (!use_hot_team) {5707if (__kmp_tasking_mode != tskm_immediate_exec) {5708// Wait for threads to reach reapable state5709for (f = 1; f < team->t.t_nproc; ++f) {5710KMP_DEBUG_ASSERT(team->t.t_threads[f]);5711kmp_info_t *th = team->t.t_threads[f];5712volatile kmp_uint32 *state = &th->th.th_reap_state;5713while (*state != KMP_SAFE_TO_REAP) {5714#if KMP_OS_WINDOWS5715// On Windows a thread can be killed at any time, check this5716DWORD ecode;5717if (!__kmp_is_thread_alive(th, &ecode)) {5718*state = KMP_SAFE_TO_REAP; // reset the flag for dead thread5719break;5720}5721#endif5722// first check if thread is sleeping5723if (th->th.th_sleep_loc)5724__kmp_null_resume_wrapper(th);5725KMP_CPU_PAUSE();5726}5727}57285729// Delete task teams5730int tt_idx;5731for (tt_idx = 0; tt_idx < 2; ++tt_idx) {5732kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];5733if (task_team != NULL) {5734for (f = 0; f < team->t.t_nproc; ++f) { // threads unref task teams5735KMP_DEBUG_ASSERT(team->t.t_threads[f]);5736team->t.t_threads[f]->th.th_task_team = NULL;5737}5738KA_TRACE(573920,5740("__kmp_free_team: T#%d deactivating task_team %p on team %d\n",5741__kmp_get_gtid(), task_team, team->t.t_id));5742#if KMP_NESTED_HOT_TEAMS5743__kmp_free_task_team(master, task_team);5744#endif5745team->t.t_task_team[tt_idx] = NULL;5746}5747}5748}57495750// Before clearing parent pointer, check if nested_nth list should be freed5751if (team->t.t_nested_nth && team->t.t_nested_nth != &__kmp_nested_nth &&5752team->t.t_nested_nth != team->t.t_parent->t.t_nested_nth) {5753KMP_INTERNAL_FREE(team->t.t_nested_nth->nth);5754KMP_INTERNAL_FREE(team->t.t_nested_nth);5755}5756team->t.t_nested_nth = NULL;57575758// Reset pointer to parent team only for non-hot teams.5759team->t.t_parent = NULL;5760team->t.t_level = 0;5761team->t.t_active_level = 0;57625763/* free the worker threads */5764for (f = 1; f < team->t.t_nproc; ++f) {5765KMP_DEBUG_ASSERT(team->t.t_threads[f]);5766if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5767KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team),57681, 2);5769}5770__kmp_free_thread(team->t.t_threads[f]);5771}57725773if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5774if (team->t.b) {5775// wake up thread at old location5776team->t.b->go_release();5777if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {5778for (f = 1; f < team->t.t_nproc; ++f) {5779if (team->t.b->sleep[f].sleep) {5780__kmp_atomic_resume_64(5781team->t.t_threads[f]->th.th_info.ds.ds_gtid,5782(kmp_atomic_flag_64<> *)NULL);5783}5784}5785}5786// Wait for threads to be removed from team5787for (int f = 1; f < team->t.t_nproc; ++f) {5788while (team->t.t_threads[f]->th.th_used_in_team.load() != 0)5789KMP_CPU_PAUSE();5790}5791}5792}57935794for (f = 1; f < team->t.t_nproc; ++f) {5795team->t.t_threads[f] = NULL;5796}57975798if (team->t.t_max_nproc > 1 &&5799__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {5800distributedBarrier::deallocate(team->t.b);5801team->t.b = NULL;5802}5803/* put the team back in the team pool */5804/* TODO limit size of team pool, call reap_team if pool too large */5805team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);5806__kmp_team_pool = (volatile kmp_team_t *)team;5807} else { // Check if team was created for primary threads in teams construct5808// See if first worker is a CG root5809KMP_DEBUG_ASSERT(team->t.t_threads[1] &&5810team->t.t_threads[1]->th.th_cg_roots);5811if (team->t.t_threads[1]->th.th_cg_roots->cg_root == team->t.t_threads[1]) {5812// Clean up the CG root nodes on workers so that this team can be re-used5813for (f = 1; f < team->t.t_nproc; ++f) {5814kmp_info_t *thr = team->t.t_threads[f];5815KMP_DEBUG_ASSERT(thr && thr->th.th_cg_roots &&5816thr->th.th_cg_roots->cg_root == thr);5817// Pop current CG root off list5818kmp_cg_root_t *tmp = thr->th.th_cg_roots;5819thr->th.th_cg_roots = tmp->up;5820KA_TRACE(100, ("__kmp_free_team: Thread %p popping node %p and moving"5821" up to node %p. cg_nthreads was %d\n",5822thr, tmp, thr->th.th_cg_roots, tmp->cg_nthreads));5823int i = tmp->cg_nthreads--;5824if (i == 1) {5825__kmp_free(tmp); // free CG if we are the last thread in it5826}5827// Restore current task's thread_limit from CG root5828if (thr->th.th_cg_roots)5829thr->th.th_current_task->td_icvs.thread_limit =5830thr->th.th_cg_roots->cg_thread_limit;5831}5832}5833}58345835KMP_MB();5836}58375838/* reap the team. destroy it, reclaim all its resources and free its memory */5839kmp_team_t *__kmp_reap_team(kmp_team_t *team) {5840kmp_team_t *next_pool = team->t.t_next_pool;58415842KMP_DEBUG_ASSERT(team);5843KMP_DEBUG_ASSERT(team->t.t_dispatch);5844KMP_DEBUG_ASSERT(team->t.t_disp_buffer);5845KMP_DEBUG_ASSERT(team->t.t_threads);5846KMP_DEBUG_ASSERT(team->t.t_argv);58475848/* TODO clean the threads that are a part of this? */58495850/* free stuff */5851__kmp_free_team_arrays(team);5852if (team->t.t_argv != &team->t.t_inline_argv[0])5853__kmp_free((void *)team->t.t_argv);5854__kmp_free(team);58555856KMP_MB();5857return next_pool;5858}58595860// Free the thread. Don't reap it, just place it on the pool of available5861// threads.5862//5863// Changes for Quad issue 527845: We need a predictable OMP tid <-> gtid5864// binding for the affinity mechanism to be useful.5865//5866// Now, we always keep the free list (__kmp_thread_pool) sorted by gtid.5867// However, we want to avoid a potential performance problem by always5868// scanning through the list to find the correct point at which to insert5869// the thread (potential N**2 behavior). To do this we keep track of the5870// last place a thread struct was inserted (__kmp_thread_pool_insert_pt).5871// With single-level parallelism, threads will always be added to the tail5872// of the list, kept track of by __kmp_thread_pool_insert_pt. With nested5873// parallelism, all bets are off and we may need to scan through the entire5874// free list.5875//5876// This change also has a potentially large performance benefit, for some5877// applications. Previously, as threads were freed from the hot team, they5878// would be placed back on the free list in inverse order. If the hot team5879// grew back to it's original size, then the freed thread would be placed5880// back on the hot team in reverse order. This could cause bad cache5881// locality problems on programs where the size of the hot team regularly5882// grew and shrunk.5883//5884// Now, for single-level parallelism, the OMP tid is always == gtid.5885void __kmp_free_thread(kmp_info_t *this_th) {5886int gtid;5887kmp_info_t **scan;58885889KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n",5890__kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));58915892KMP_DEBUG_ASSERT(this_th);58935894// When moving thread to pool, switch thread to wait on own b_go flag, and5895// uninitialized (NULL team).5896int b;5897kmp_balign_t *balign = this_th->th.th_bar;5898for (b = 0; b < bs_last_barrier; ++b) {5899if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)5900balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;5901balign[b].bb.team = NULL;5902balign[b].bb.leaf_kids = 0;5903}5904this_th->th.th_task_state = 0;5905this_th->th.th_reap_state = KMP_SAFE_TO_REAP;59065907/* put thread back on the free pool */5908TCW_PTR(this_th->th.th_team, NULL);5909TCW_PTR(this_th->th.th_root, NULL);5910TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */59115912while (this_th->th.th_cg_roots) {5913this_th->th.th_cg_roots->cg_nthreads--;5914KA_TRACE(100, ("__kmp_free_thread: Thread %p decrement cg_nthreads on node"5915" %p of thread %p to %d\n",5916this_th, this_th->th.th_cg_roots,5917this_th->th.th_cg_roots->cg_root,5918this_th->th.th_cg_roots->cg_nthreads));5919kmp_cg_root_t *tmp = this_th->th.th_cg_roots;5920if (tmp->cg_root == this_th) { // Thread is a cg_root5921KMP_DEBUG_ASSERT(tmp->cg_nthreads == 0);5922KA_TRACE(59235, ("__kmp_free_thread: Thread %p freeing node %p\n", this_th, tmp));5924this_th->th.th_cg_roots = tmp->up;5925__kmp_free(tmp);5926} else { // Worker thread5927if (tmp->cg_nthreads == 0) { // last thread leaves contention group5928__kmp_free(tmp);5929}5930this_th->th.th_cg_roots = NULL;5931break;5932}5933}59345935/* If the implicit task assigned to this thread can be used by other threads5936* -> multiple threads can share the data and try to free the task at5937* __kmp_reap_thread at exit. This duplicate use of the task data can happen5938* with higher probability when hot team is disabled but can occurs even when5939* the hot team is enabled */5940__kmp_free_implicit_task(this_th);5941this_th->th.th_current_task = NULL;59425943// If the __kmp_thread_pool_insert_pt is already past the new insert5944// point, then we need to re-scan the entire list.5945gtid = this_th->th.th_info.ds.ds_gtid;5946if (__kmp_thread_pool_insert_pt != NULL) {5947KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);5948if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {5949__kmp_thread_pool_insert_pt = NULL;5950}5951}59525953// Scan down the list to find the place to insert the thread.5954// scan is the address of a link in the list, possibly the address of5955// __kmp_thread_pool itself.5956//5957// In the absence of nested parallelism, the for loop will have 0 iterations.5958if (__kmp_thread_pool_insert_pt != NULL) {5959scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);5960} else {5961scan = CCAST(kmp_info_t **, &__kmp_thread_pool);5962}5963for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);5964scan = &((*scan)->th.th_next_pool))5965;59665967// Insert the new element on the list, and set __kmp_thread_pool_insert_pt5968// to its address.5969TCW_PTR(this_th->th.th_next_pool, *scan);5970__kmp_thread_pool_insert_pt = *scan = this_th;5971KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||5972(this_th->th.th_info.ds.ds_gtid <5973this_th->th.th_next_pool->th.th_info.ds.ds_gtid));5974TCW_4(this_th->th.th_in_pool, TRUE);5975__kmp_suspend_initialize_thread(this_th);5976__kmp_lock_suspend_mx(this_th);5977if (this_th->th.th_active == TRUE) {5978KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);5979this_th->th.th_active_in_pool = TRUE;5980}5981#if KMP_DEBUG5982else {5983KMP_DEBUG_ASSERT(this_th->th.th_active_in_pool == FALSE);5984}5985#endif5986__kmp_unlock_suspend_mx(this_th);59875988TCW_4(__kmp_nth, __kmp_nth - 1);59895990#ifdef KMP_ADJUST_BLOCKTIME5991/* Adjust blocktime back to user setting or default if necessary */5992/* Middle initialization might never have occurred */5993if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {5994KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);5995if (__kmp_nth <= __kmp_avail_proc) {5996__kmp_zero_bt = FALSE;5997}5998}5999#endif /* KMP_ADJUST_BLOCKTIME */60006001KMP_MB();6002}60036004/* ------------------------------------------------------------------------ */60056006void *__kmp_launch_thread(kmp_info_t *this_thr) {6007#if OMP_PROFILING_SUPPORT6008ProfileTraceFile = getenv("LIBOMPTARGET_PROFILE");6009// TODO: add a configuration option for time granularity6010if (ProfileTraceFile)6011llvm::timeTraceProfilerInitialize(500 /* us */, "libomptarget");6012#endif60136014int gtid = this_thr->th.th_info.ds.ds_gtid;6015/* void *stack_data;*/6016kmp_team_t **volatile pteam;60176018KMP_MB();6019KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid));60206021if (__kmp_env_consistency_check) {6022this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak?6023}60246025#if OMPD_SUPPORT6026if (ompd_state & OMPD_ENABLE_BP)6027ompd_bp_thread_begin();6028#endif60296030#if OMPT_SUPPORT6031ompt_data_t *thread_data = nullptr;6032if (ompt_enabled.enabled) {6033thread_data = &(this_thr->th.ompt_thread_info.thread_data);6034*thread_data = ompt_data_none;60356036this_thr->th.ompt_thread_info.state = ompt_state_overhead;6037this_thr->th.ompt_thread_info.wait_id = 0;6038this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);6039this_thr->th.ompt_thread_info.parallel_flags = 0;6040if (ompt_enabled.ompt_callback_thread_begin) {6041ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(6042ompt_thread_worker, thread_data);6043}6044this_thr->th.ompt_thread_info.state = ompt_state_idle;6045}6046#endif60476048/* This is the place where threads wait for work */6049while (!TCR_4(__kmp_global.g.g_done)) {6050KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);6051KMP_MB();60526053/* wait for work to do */6054KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid));60556056/* No tid yet since not part of a team */6057__kmp_fork_barrier(gtid, KMP_GTID_DNE);60586059#if OMPT_SUPPORT6060if (ompt_enabled.enabled) {6061this_thr->th.ompt_thread_info.state = ompt_state_overhead;6062}6063#endif60646065pteam = &this_thr->th.th_team;60666067/* have we been allocated? */6068if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {6069/* we were just woken up, so run our new task */6070if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {6071int rc;6072KA_TRACE(20,6073("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",6074gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),6075(*pteam)->t.t_pkfn));60766077updateHWFPControl(*pteam);60786079#if OMPT_SUPPORT6080if (ompt_enabled.enabled) {6081this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;6082}6083#endif60846085rc = (*pteam)->t.t_invoke(gtid);6086KMP_ASSERT(rc);60876088KMP_MB();6089KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",6090gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),6091(*pteam)->t.t_pkfn));6092}6093#if OMPT_SUPPORT6094if (ompt_enabled.enabled) {6095/* no frame set while outside task */6096__ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;60976098this_thr->th.ompt_thread_info.state = ompt_state_overhead;6099}6100#endif6101/* join barrier after parallel region */6102__kmp_join_barrier(gtid);6103}6104}61056106#if OMPD_SUPPORT6107if (ompd_state & OMPD_ENABLE_BP)6108ompd_bp_thread_end();6109#endif61106111#if OMPT_SUPPORT6112if (ompt_enabled.ompt_callback_thread_end) {6113ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);6114}6115#endif61166117this_thr->th.th_task_team = NULL;6118/* run the destructors for the threadprivate data for this thread */6119__kmp_common_destroy_gtid(gtid);61206121KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid));6122KMP_MB();61236124#if OMP_PROFILING_SUPPORT6125llvm::timeTraceProfilerFinishThread();6126#endif6127return this_thr;6128}61296130/* ------------------------------------------------------------------------ */61316132void __kmp_internal_end_dest(void *specific_gtid) {6133// Make sure no significant bits are lost6134int gtid;6135__kmp_type_convert((kmp_intptr_t)specific_gtid - 1, >id);61366137KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid));6138/* NOTE: the gtid is stored as gitd+1 in the thread-local-storage6139* this is because 0 is reserved for the nothing-stored case */61406141__kmp_internal_end_thread(gtid);6142}61436144#if KMP_OS_UNIX && KMP_DYNAMIC_LIB61456146__attribute__((destructor)) void __kmp_internal_end_dtor(void) {6147__kmp_internal_end_atexit();6148}61496150#endif61516152/* [Windows] josh: when the atexit handler is called, there may still be more6153than one thread alive */6154void __kmp_internal_end_atexit(void) {6155KA_TRACE(30, ("__kmp_internal_end_atexit\n"));6156/* [Windows]6157josh: ideally, we want to completely shutdown the library in this atexit6158handler, but stat code that depends on thread specific data for gtid fails6159because that data becomes unavailable at some point during the shutdown, so6160we call __kmp_internal_end_thread instead. We should eventually remove the6161dependency on __kmp_get_specific_gtid in the stat code and use6162__kmp_internal_end_library to cleanly shutdown the library.61636164// TODO: Can some of this comment about GVS be removed?6165I suspect that the offending stat code is executed when the calling thread6166tries to clean up a dead root thread's data structures, resulting in GVS6167code trying to close the GVS structures for that thread, but since the stat6168code uses __kmp_get_specific_gtid to get the gtid with the assumption that6169the calling thread is cleaning up itself instead of another thread, it get6170confused. This happens because allowing a thread to unregister and cleanup6171another thread is a recent modification for addressing an issue.6172Based on the current design (20050722), a thread may end up6173trying to unregister another thread only if thread death does not trigger6174the calling of __kmp_internal_end_thread. For Linux* OS, there is the6175thread specific data destructor function to detect thread death. For6176Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there6177is nothing. Thus, the workaround is applicable only for Windows static6178stat library. */6179__kmp_internal_end_library(-1);6180#if KMP_OS_WINDOWS6181__kmp_close_console();6182#endif6183}61846185static void __kmp_reap_thread(kmp_info_t *thread, int is_root) {6186// It is assumed __kmp_forkjoin_lock is acquired.61876188int gtid;61896190KMP_DEBUG_ASSERT(thread != NULL);61916192gtid = thread->th.th_info.ds.ds_gtid;61936194if (!is_root) {6195if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {6196/* Assume the threads are at the fork barrier here */6197KA_TRACE(619820, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",6199gtid));6200if (__kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {6201while (6202!KMP_COMPARE_AND_STORE_ACQ32(&(thread->th.th_used_in_team), 0, 3))6203KMP_CPU_PAUSE();6204__kmp_resume_32(gtid, (kmp_flag_32<false, false> *)NULL);6205} else {6206/* Need release fence here to prevent seg faults for tree forkjoin6207barrier (GEH) */6208kmp_flag_64<> flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,6209thread);6210__kmp_release_64(&flag);6211}6212}62136214// Terminate OS thread.6215__kmp_reap_worker(thread);62166217// The thread was killed asynchronously. If it was actively6218// spinning in the thread pool, decrement the global count.6219//6220// There is a small timing hole here - if the worker thread was just waking6221// up after sleeping in the pool, had reset it's th_active_in_pool flag but6222// not decremented the global counter __kmp_thread_pool_active_nth yet, then6223// the global counter might not get updated.6224//6225// Currently, this can only happen as the library is unloaded,6226// so there are no harmful side effects.6227if (thread->th.th_active_in_pool) {6228thread->th.th_active_in_pool = FALSE;6229KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);6230KMP_DEBUG_ASSERT(__kmp_thread_pool_active_nth >= 0);6231}6232}62336234__kmp_free_implicit_task(thread);62356236// Free the fast memory for tasking6237#if USE_FAST_MEMORY6238__kmp_free_fast_memory(thread);6239#endif /* USE_FAST_MEMORY */62406241__kmp_suspend_uninitialize_thread(thread);62426243KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);6244TCW_SYNC_PTR(__kmp_threads[gtid], NULL);62456246--__kmp_all_nth;6247// __kmp_nth was decremented when thread is added to the pool.62486249#ifdef KMP_ADJUST_BLOCKTIME6250/* Adjust blocktime back to user setting or default if necessary */6251/* Middle initialization might never have occurred */6252if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {6253KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);6254if (__kmp_nth <= __kmp_avail_proc) {6255__kmp_zero_bt = FALSE;6256}6257}6258#endif /* KMP_ADJUST_BLOCKTIME */62596260/* free the memory being used */6261if (__kmp_env_consistency_check) {6262if (thread->th.th_cons) {6263__kmp_free_cons_stack(thread->th.th_cons);6264thread->th.th_cons = NULL;6265}6266}62676268if (thread->th.th_pri_common != NULL) {6269__kmp_free(thread->th.th_pri_common);6270thread->th.th_pri_common = NULL;6271}62726273#if KMP_USE_BGET6274if (thread->th.th_local.bget_data != NULL) {6275__kmp_finalize_bget(thread);6276}6277#endif62786279#if KMP_AFFINITY_SUPPORTED6280if (thread->th.th_affin_mask != NULL) {6281KMP_CPU_FREE(thread->th.th_affin_mask);6282thread->th.th_affin_mask = NULL;6283}6284#endif /* KMP_AFFINITY_SUPPORTED */62856286#if KMP_USE_HIER_SCHED6287if (thread->th.th_hier_bar_data != NULL) {6288__kmp_free(thread->th.th_hier_bar_data);6289thread->th.th_hier_bar_data = NULL;6290}6291#endif62926293__kmp_reap_team(thread->th.th_serial_team);6294thread->th.th_serial_team = NULL;6295__kmp_free(thread);62966297KMP_MB();62986299} // __kmp_reap_thread63006301static void __kmp_itthash_clean(kmp_info_t *th) {6302#if USE_ITT_NOTIFY6303if (__kmp_itt_region_domains.count > 0) {6304for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {6305kmp_itthash_entry_t *bucket = __kmp_itt_region_domains.buckets[i];6306while (bucket) {6307kmp_itthash_entry_t *next = bucket->next_in_bucket;6308__kmp_thread_free(th, bucket);6309bucket = next;6310}6311}6312}6313if (__kmp_itt_barrier_domains.count > 0) {6314for (int i = 0; i < KMP_MAX_FRAME_DOMAINS; ++i) {6315kmp_itthash_entry_t *bucket = __kmp_itt_barrier_domains.buckets[i];6316while (bucket) {6317kmp_itthash_entry_t *next = bucket->next_in_bucket;6318__kmp_thread_free(th, bucket);6319bucket = next;6320}6321}6322}6323#endif6324}63256326static void __kmp_internal_end(void) {6327int i;63286329/* First, unregister the library */6330__kmp_unregister_library();63316332#if KMP_OS_WINDOWS6333/* In Win static library, we can't tell when a root actually dies, so we6334reclaim the data structures for any root threads that have died but not6335unregistered themselves, in order to shut down cleanly.6336In Win dynamic library we also can't tell when a thread dies. */6337__kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of6338// dead roots6339#endif63406341for (i = 0; i < __kmp_threads_capacity; i++)6342if (__kmp_root[i])6343if (__kmp_root[i]->r.r_active)6344break;6345KMP_MB(); /* Flush all pending memory write invalidates. */6346TCW_SYNC_4(__kmp_global.g.g_done, TRUE);63476348if (i < __kmp_threads_capacity) {6349#if KMP_USE_MONITOR6350// 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor??6351KMP_MB(); /* Flush all pending memory write invalidates. */63526353// Need to check that monitor was initialized before reaping it. If we are6354// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then6355// __kmp_monitor will appear to contain valid data, but it is only valid in6356// the parent process, not the child.6357// New behavior (201008): instead of keying off of the flag6358// __kmp_init_parallel, the monitor thread creation is keyed off6359// of the new flag __kmp_init_monitor.6360__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);6361if (TCR_4(__kmp_init_monitor)) {6362__kmp_reap_monitor(&__kmp_monitor);6363TCW_4(__kmp_init_monitor, 0);6364}6365__kmp_release_bootstrap_lock(&__kmp_monitor_lock);6366KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));6367#endif // KMP_USE_MONITOR6368} else {6369/* TODO move this to cleanup code */6370#ifdef KMP_DEBUG6371/* make sure that everything has properly ended */6372for (i = 0; i < __kmp_threads_capacity; i++) {6373if (__kmp_root[i]) {6374// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC:6375// there can be uber threads alive here6376KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active?6377}6378}6379#endif63806381KMP_MB();63826383// Reap the worker threads.6384// This is valid for now, but be careful if threads are reaped sooner.6385while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool.6386// Get the next thread from the pool.6387kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);6388__kmp_thread_pool = thread->th.th_next_pool;6389// Reap it.6390KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);6391thread->th.th_next_pool = NULL;6392thread->th.th_in_pool = FALSE;6393__kmp_reap_thread(thread, 0);6394}6395__kmp_thread_pool_insert_pt = NULL;63966397// Reap teams.6398while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool.6399// Get the next team from the pool.6400kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);6401__kmp_team_pool = team->t.t_next_pool;6402// Reap it.6403team->t.t_next_pool = NULL;6404__kmp_reap_team(team);6405}64066407__kmp_reap_task_teams();64086409#if KMP_OS_UNIX6410// Threads that are not reaped should not access any resources since they6411// are going to be deallocated soon, so the shutdown sequence should wait6412// until all threads either exit the final spin-waiting loop or begin6413// sleeping after the given blocktime.6414for (i = 0; i < __kmp_threads_capacity; i++) {6415kmp_info_t *thr = __kmp_threads[i];6416while (thr && KMP_ATOMIC_LD_ACQ(&thr->th.th_blocking))6417KMP_CPU_PAUSE();6418}6419#endif64206421for (i = 0; i < __kmp_threads_capacity; ++i) {6422// TBD: Add some checking...6423// Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL );6424}64256426/* Make sure all threadprivate destructors get run by joining with all6427worker threads before resetting this flag */6428TCW_SYNC_4(__kmp_init_common, FALSE);64296430KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n"));6431KMP_MB();64326433#if KMP_USE_MONITOR6434// See note above: One of the possible fixes for CQ138434 / CQ1401266435//6436// FIXME: push both code fragments down and CSE them?6437// push them into __kmp_cleanup() ?6438__kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);6439if (TCR_4(__kmp_init_monitor)) {6440__kmp_reap_monitor(&__kmp_monitor);6441TCW_4(__kmp_init_monitor, 0);6442}6443__kmp_release_bootstrap_lock(&__kmp_monitor_lock);6444KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n"));6445#endif6446} /* else !__kmp_global.t_active */6447TCW_4(__kmp_init_gtid, FALSE);6448KMP_MB(); /* Flush all pending memory write invalidates. */64496450__kmp_cleanup();6451#if OMPT_SUPPORT6452ompt_fini();6453#endif6454}64556456void __kmp_internal_end_library(int gtid_req) {6457/* if we have already cleaned up, don't try again, it wouldn't be pretty */6458/* this shouldn't be a race condition because __kmp_internal_end() is the6459only place to clear __kmp_serial_init */6460/* we'll check this later too, after we get the lock */6461// 2009-09-06: We do not set g_abort without setting g_done. This check looks6462// redundant, because the next check will work in any case.6463if (__kmp_global.g.g_abort) {6464KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n"));6465/* TODO abort? */6466return;6467}6468if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {6469KA_TRACE(10, ("__kmp_internal_end_library: already finished\n"));6470return;6471}64726473// If hidden helper team has been initialized, we need to deinit it6474if (TCR_4(__kmp_init_hidden_helper) &&6475!TCR_4(__kmp_hidden_helper_team_done)) {6476TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);6477// First release the main thread to let it continue its work6478__kmp_hidden_helper_main_thread_release();6479// Wait until the hidden helper team has been destroyed6480__kmp_hidden_helper_threads_deinitz_wait();6481}64826483KMP_MB(); /* Flush all pending memory write invalidates. */6484/* find out who we are and what we should do */6485{6486int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();6487KA_TRACE(648810, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));6489if (gtid == KMP_GTID_SHUTDOWN) {6490KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system "6491"already shutdown\n"));6492return;6493} else if (gtid == KMP_GTID_MONITOR) {6494KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not "6495"registered, or system shutdown\n"));6496return;6497} else if (gtid == KMP_GTID_DNE) {6498KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system "6499"shutdown\n"));6500/* we don't know who we are, but we may still shutdown the library */6501} else if (KMP_UBER_GTID(gtid)) {6502/* unregister ourselves as an uber thread. gtid is no longer valid */6503if (__kmp_root[gtid]->r.r_active) {6504__kmp_global.g.g_abort = -1;6505TCW_SYNC_4(__kmp_global.g.g_done, TRUE);6506__kmp_unregister_library();6507KA_TRACE(10,6508("__kmp_internal_end_library: root still active, abort T#%d\n",6509gtid));6510return;6511} else {6512__kmp_itthash_clean(__kmp_threads[gtid]);6513KA_TRACE(651410,6515("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));6516__kmp_unregister_root_current_thread(gtid);6517}6518} else {6519/* worker threads may call this function through the atexit handler, if they6520* call exit() */6521/* For now, skip the usual subsequent processing and just dump the debug buffer.6522TODO: do a thorough shutdown instead */6523#ifdef DUMP_DEBUG_ON_EXIT6524if (__kmp_debug_buf)6525__kmp_dump_debug_buffer();6526#endif6527// added unregister library call here when we switch to shm linux6528// if we don't, it will leave lots of files in /dev/shm6529// cleanup shared memory file before exiting.6530__kmp_unregister_library();6531return;6532}6533}6534/* synchronize the termination process */6535__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);65366537/* have we already finished */6538if (__kmp_global.g.g_abort) {6539KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n"));6540/* TODO abort? */6541__kmp_release_bootstrap_lock(&__kmp_initz_lock);6542return;6543}6544if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {6545__kmp_release_bootstrap_lock(&__kmp_initz_lock);6546return;6547}65486549/* We need this lock to enforce mutex between this reading of6550__kmp_threads_capacity and the writing by __kmp_register_root.6551Alternatively, we can use a counter of roots that is atomically updated by6552__kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and6553__kmp_internal_end_*. */6554__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);65556556/* now we can safely conduct the actual termination */6557__kmp_internal_end();65586559__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);6560__kmp_release_bootstrap_lock(&__kmp_initz_lock);65616562KA_TRACE(10, ("__kmp_internal_end_library: exit\n"));65636564#ifdef DUMP_DEBUG_ON_EXIT6565if (__kmp_debug_buf)6566__kmp_dump_debug_buffer();6567#endif65686569#if KMP_OS_WINDOWS6570__kmp_close_console();6571#endif65726573__kmp_fini_allocator();65746575} // __kmp_internal_end_library65766577void __kmp_internal_end_thread(int gtid_req) {6578int i;65796580/* if we have already cleaned up, don't try again, it wouldn't be pretty */6581/* this shouldn't be a race condition because __kmp_internal_end() is the6582* only place to clear __kmp_serial_init */6583/* we'll check this later too, after we get the lock */6584// 2009-09-06: We do not set g_abort without setting g_done. This check looks6585// redundant, because the next check will work in any case.6586if (__kmp_global.g.g_abort) {6587KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n"));6588/* TODO abort? */6589return;6590}6591if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {6592KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n"));6593return;6594}65956596// If hidden helper team has been initialized, we need to deinit it6597if (TCR_4(__kmp_init_hidden_helper) &&6598!TCR_4(__kmp_hidden_helper_team_done)) {6599TCW_SYNC_4(__kmp_hidden_helper_team_done, TRUE);6600// First release the main thread to let it continue its work6601__kmp_hidden_helper_main_thread_release();6602// Wait until the hidden helper team has been destroyed6603__kmp_hidden_helper_threads_deinitz_wait();6604}66056606KMP_MB(); /* Flush all pending memory write invalidates. */66076608/* find out who we are and what we should do */6609{6610int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();6611KA_TRACE(10,6612("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));6613if (gtid == KMP_GTID_SHUTDOWN) {6614KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system "6615"already shutdown\n"));6616return;6617} else if (gtid == KMP_GTID_MONITOR) {6618KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not "6619"registered, or system shutdown\n"));6620return;6621} else if (gtid == KMP_GTID_DNE) {6622KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system "6623"shutdown\n"));6624return;6625/* we don't know who we are */6626} else if (KMP_UBER_GTID(gtid)) {6627/* unregister ourselves as an uber thread. gtid is no longer valid */6628if (__kmp_root[gtid]->r.r_active) {6629__kmp_global.g.g_abort = -1;6630TCW_SYNC_4(__kmp_global.g.g_done, TRUE);6631KA_TRACE(10,6632("__kmp_internal_end_thread: root still active, abort T#%d\n",6633gtid));6634return;6635} else {6636KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n",6637gtid));6638__kmp_unregister_root_current_thread(gtid);6639}6640} else {6641/* just a worker thread, let's leave */6642KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid));66436644if (gtid >= 0) {6645__kmp_threads[gtid]->th.th_task_team = NULL;6646}66476648KA_TRACE(10,6649("__kmp_internal_end_thread: worker thread done, exiting T#%d\n",6650gtid));6651return;6652}6653}6654#if KMP_DYNAMIC_LIB6655if (__kmp_pause_status != kmp_hard_paused)6656// AC: lets not shutdown the dynamic library at the exit of uber thread,6657// because we will better shutdown later in the library destructor.6658{6659KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req));6660return;6661}6662#endif6663/* synchronize the termination process */6664__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);66656666/* have we already finished */6667if (__kmp_global.g.g_abort) {6668KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n"));6669/* TODO abort? */6670__kmp_release_bootstrap_lock(&__kmp_initz_lock);6671return;6672}6673if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {6674__kmp_release_bootstrap_lock(&__kmp_initz_lock);6675return;6676}66776678/* We need this lock to enforce mutex between this reading of6679__kmp_threads_capacity and the writing by __kmp_register_root.6680Alternatively, we can use a counter of roots that is atomically updated by6681__kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and6682__kmp_internal_end_*. */66836684/* should we finish the run-time? are all siblings done? */6685__kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);66866687for (i = 0; i < __kmp_threads_capacity; ++i) {6688if (KMP_UBER_GTID(i)) {6689KA_TRACE(669010,6691("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));6692__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);6693__kmp_release_bootstrap_lock(&__kmp_initz_lock);6694return;6695}6696}66976698/* now we can safely conduct the actual termination */66996700__kmp_internal_end();67016702__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);6703__kmp_release_bootstrap_lock(&__kmp_initz_lock);67046705KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req));67066707#ifdef DUMP_DEBUG_ON_EXIT6708if (__kmp_debug_buf)6709__kmp_dump_debug_buffer();6710#endif6711} // __kmp_internal_end_thread67126713// -----------------------------------------------------------------------------6714// Library registration stuff.67156716static long __kmp_registration_flag = 0;6717// Random value used to indicate library initialization.6718static char *__kmp_registration_str = NULL;6719// Value to be saved in env var __KMP_REGISTERED_LIB_<pid>.67206721static inline char *__kmp_reg_status_name() {6722/* On RHEL 3u5 if linked statically, getpid() returns different values in6723each thread. If registration and unregistration go in different threads6724(omp_misc_other_root_exit.cpp test case), the name of registered_lib_env6725env var can not be found, because the name will contain different pid. */6726// macOS* complains about name being too long with additional getuid()6727#if KMP_OS_UNIX && !KMP_OS_DARWIN && KMP_DYNAMIC_LIB6728return __kmp_str_format("__KMP_REGISTERED_LIB_%d_%d", (int)getpid(),6729(int)getuid());6730#else6731return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid());6732#endif6733} // __kmp_reg_status_get67346735#if defined(KMP_USE_SHM)6736bool __kmp_shm_available = false;6737bool __kmp_tmp_available = false;6738// If /dev/shm is not accessible, we will create a temporary file under /tmp.6739char *temp_reg_status_file_name = nullptr;6740#endif67416742void __kmp_register_library_startup(void) {67436744char *name = __kmp_reg_status_name(); // Name of the environment variable.6745int done = 0;6746union {6747double dtime;6748long ltime;6749} time;6750#if KMP_ARCH_X86 || KMP_ARCH_X86_646751__kmp_initialize_system_tick();6752#endif6753__kmp_read_system_time(&time.dtime);6754__kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);6755__kmp_registration_str =6756__kmp_str_format("%p-%lx-%s", &__kmp_registration_flag,6757__kmp_registration_flag, KMP_LIBRARY_FILE);67586759KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name,6760__kmp_registration_str));67616762while (!done) {67636764char *value = NULL; // Actual value of the environment variable.67656766#if defined(KMP_USE_SHM)6767char *shm_name = nullptr;6768char *data1 = nullptr;6769__kmp_shm_available = __kmp_detect_shm();6770if (__kmp_shm_available) {6771int fd1 = -1;6772shm_name = __kmp_str_format("/%s", name);6773int shm_preexist = 0;6774fd1 = shm_open(shm_name, O_CREAT | O_EXCL | O_RDWR, 0600);6775if ((fd1 == -1) && (errno == EEXIST)) {6776// file didn't open because it already exists.6777// try opening existing file6778fd1 = shm_open(shm_name, O_RDWR, 0600);6779if (fd1 == -1) { // file didn't open6780KMP_WARNING(FunctionError, "Can't open SHM");6781__kmp_shm_available = false;6782} else { // able to open existing file6783shm_preexist = 1;6784}6785}6786if (__kmp_shm_available && shm_preexist == 0) { // SHM created, set size6787if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;6788KMP_WARNING(FunctionError, "Can't set size of SHM");6789__kmp_shm_available = false;6790}6791}6792if (__kmp_shm_available) { // SHM exists, now map it6793data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,6794fd1, 0);6795if (data1 == MAP_FAILED) { // failed to map shared memory6796KMP_WARNING(FunctionError, "Can't map SHM");6797__kmp_shm_available = false;6798}6799}6800if (__kmp_shm_available) { // SHM mapped6801if (shm_preexist == 0) { // set data to SHM, set value6802KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);6803}6804// Read value from either what we just wrote or existing file.6805value = __kmp_str_format("%s", data1); // read value from SHM6806munmap(data1, SHM_SIZE);6807}6808if (fd1 != -1)6809close(fd1);6810}6811if (!__kmp_shm_available)6812__kmp_tmp_available = __kmp_detect_tmp();6813if (!__kmp_shm_available && __kmp_tmp_available) {6814// SHM failed to work due to an error other than that the file already6815// exists. Try to create a temp file under /tmp.6816// If /tmp isn't accessible, fall back to using environment variable.6817// TODO: /tmp might not always be the temporary directory. For now we will6818// not consider TMPDIR.6819int fd1 = -1;6820temp_reg_status_file_name = __kmp_str_format("/tmp/%s", name);6821int tmp_preexist = 0;6822fd1 = open(temp_reg_status_file_name, O_CREAT | O_EXCL | O_RDWR, 0600);6823if ((fd1 == -1) && (errno == EEXIST)) {6824// file didn't open because it already exists.6825// try opening existing file6826fd1 = open(temp_reg_status_file_name, O_RDWR, 0600);6827if (fd1 == -1) { // file didn't open if (fd1 == -1) {6828KMP_WARNING(FunctionError, "Can't open TEMP");6829__kmp_tmp_available = false;6830} else {6831tmp_preexist = 1;6832}6833}6834if (__kmp_tmp_available && tmp_preexist == 0) {6835// we created /tmp file now set size6836if (ftruncate(fd1, SHM_SIZE) == -1) { // error occured setting size;6837KMP_WARNING(FunctionError, "Can't set size of /tmp file");6838__kmp_tmp_available = false;6839}6840}6841if (__kmp_tmp_available) {6842data1 = (char *)mmap(0, SHM_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED,6843fd1, 0);6844if (data1 == MAP_FAILED) { // failed to map /tmp6845KMP_WARNING(FunctionError, "Can't map /tmp");6846__kmp_tmp_available = false;6847}6848}6849if (__kmp_tmp_available) {6850if (tmp_preexist == 0) { // set data to TMP, set value6851KMP_STRCPY_S(data1, SHM_SIZE, __kmp_registration_str);6852}6853// Read value from either what we just wrote or existing file.6854value = __kmp_str_format("%s", data1); // read value from SHM6855munmap(data1, SHM_SIZE);6856}6857if (fd1 != -1)6858close(fd1);6859}6860if (!__kmp_shm_available && !__kmp_tmp_available) {6861// no /dev/shm and no /tmp -- fall back to environment variable6862// Set environment variable, but do not overwrite if it exists.6863__kmp_env_set(name, __kmp_registration_str, 0);6864// read value to see if it got set6865value = __kmp_env_get(name);6866}6867#else // Windows and unix with static library6868// Set environment variable, but do not overwrite if it exists.6869__kmp_env_set(name, __kmp_registration_str, 0);6870// read value to see if it got set6871value = __kmp_env_get(name);6872#endif68736874if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {6875done = 1; // Ok, environment variable set successfully, exit the loop.6876} else {6877// Oops. Write failed. Another copy of OpenMP RTL is in memory.6878// Check whether it alive or dead.6879int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead.6880char *tail = value;6881char *flag_addr_str = NULL;6882char *flag_val_str = NULL;6883char const *file_name = NULL;6884__kmp_str_split(tail, '-', &flag_addr_str, &tail);6885__kmp_str_split(tail, '-', &flag_val_str, &tail);6886file_name = tail;6887if (tail != NULL) {6888unsigned long *flag_addr = 0;6889unsigned long flag_val = 0;6890KMP_SSCANF(flag_addr_str, "%p", RCAST(void **, &flag_addr));6891KMP_SSCANF(flag_val_str, "%lx", &flag_val);6892if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) {6893// First, check whether environment-encoded address is mapped into6894// addr space.6895// If so, dereference it to see if it still has the right value.6896if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {6897neighbor = 1;6898} else {6899// If not, then we know the other copy of the library is no longer6900// running.6901neighbor = 2;6902}6903}6904}6905switch (neighbor) {6906case 0: // Cannot parse environment variable -- neighbor status unknown.6907// Assume it is the incompatible format of future version of the6908// library. Assume the other library is alive.6909// WARN( ... ); // TODO: Issue a warning.6910file_name = "unknown library";6911KMP_FALLTHROUGH();6912// Attention! Falling to the next case. That's intentional.6913case 1: { // Neighbor is alive.6914// Check it is allowed.6915char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK");6916if (!__kmp_str_match_true(duplicate_ok)) {6917// That's not allowed. Issue fatal error.6918__kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),6919KMP_HNT(DuplicateLibrary), __kmp_msg_null);6920}6921KMP_INTERNAL_FREE(duplicate_ok);6922__kmp_duplicate_library_ok = 1;6923done = 1; // Exit the loop.6924} break;6925case 2: { // Neighbor is dead.69266927#if defined(KMP_USE_SHM)6928if (__kmp_shm_available) { // close shared memory.6929shm_unlink(shm_name); // this removes file in /dev/shm6930} else if (__kmp_tmp_available) {6931unlink(temp_reg_status_file_name); // this removes the temp file6932} else {6933// Clear the variable and try to register library again.6934__kmp_env_unset(name);6935}6936#else6937// Clear the variable and try to register library again.6938__kmp_env_unset(name);6939#endif6940} break;6941default: {6942KMP_DEBUG_ASSERT(0);6943} break;6944}6945}6946KMP_INTERNAL_FREE((void *)value);6947#if defined(KMP_USE_SHM)6948if (shm_name)6949KMP_INTERNAL_FREE((void *)shm_name);6950#endif6951} // while6952KMP_INTERNAL_FREE((void *)name);69536954} // func __kmp_register_library_startup69556956void __kmp_unregister_library(void) {69576958char *name = __kmp_reg_status_name();6959char *value = NULL;69606961#if defined(KMP_USE_SHM)6962char *shm_name = nullptr;6963int fd1;6964if (__kmp_shm_available) {6965shm_name = __kmp_str_format("/%s", name);6966fd1 = shm_open(shm_name, O_RDONLY, 0600);6967if (fd1 != -1) { // File opened successfully6968char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);6969if (data1 != MAP_FAILED) {6970value = __kmp_str_format("%s", data1); // read value from SHM6971munmap(data1, SHM_SIZE);6972}6973close(fd1);6974}6975} else if (__kmp_tmp_available) { // try /tmp6976fd1 = open(temp_reg_status_file_name, O_RDONLY);6977if (fd1 != -1) { // File opened successfully6978char *data1 = (char *)mmap(0, SHM_SIZE, PROT_READ, MAP_SHARED, fd1, 0);6979if (data1 != MAP_FAILED) {6980value = __kmp_str_format("%s", data1); // read value from /tmp6981munmap(data1, SHM_SIZE);6982}6983close(fd1);6984}6985} else { // fall back to envirable6986value = __kmp_env_get(name);6987}6988#else6989value = __kmp_env_get(name);6990#endif69916992KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);6993KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);6994if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {6995// Ok, this is our variable. Delete it.6996#if defined(KMP_USE_SHM)6997if (__kmp_shm_available) {6998shm_unlink(shm_name); // this removes file in /dev/shm6999} else if (__kmp_tmp_available) {7000unlink(temp_reg_status_file_name); // this removes the temp file7001} else {7002__kmp_env_unset(name);7003}7004#else7005__kmp_env_unset(name);7006#endif7007}70087009#if defined(KMP_USE_SHM)7010if (shm_name)7011KMP_INTERNAL_FREE(shm_name);7012if (temp_reg_status_file_name)7013KMP_INTERNAL_FREE(temp_reg_status_file_name);7014#endif70157016KMP_INTERNAL_FREE(__kmp_registration_str);7017KMP_INTERNAL_FREE(value);7018KMP_INTERNAL_FREE(name);70197020__kmp_registration_flag = 0;7021__kmp_registration_str = NULL;70227023} // __kmp_unregister_library70247025// End of Library registration stuff.7026// -----------------------------------------------------------------------------70277028#if KMP_MIC_SUPPORTED70297030static void __kmp_check_mic_type() {7031kmp_cpuid_t cpuid_state = {0};7032kmp_cpuid_t *cs_p = &cpuid_state;7033__kmp_x86_cpuid(1, 0, cs_p);7034// We don't support mic1 at the moment7035if ((cs_p->eax & 0xff0) == 0xB10) {7036__kmp_mic_type = mic2;7037} else if ((cs_p->eax & 0xf0ff0) == 0x50670) {7038__kmp_mic_type = mic3;7039} else {7040__kmp_mic_type = non_mic;7041}7042}70437044#endif /* KMP_MIC_SUPPORTED */70457046#if KMP_HAVE_UMWAIT7047static void __kmp_user_level_mwait_init() {7048struct kmp_cpuid buf;7049__kmp_x86_cpuid(7, 0, &buf);7050__kmp_waitpkg_enabled = ((buf.ecx >> 5) & 1);7051__kmp_umwait_enabled = __kmp_waitpkg_enabled && __kmp_user_level_mwait;7052__kmp_tpause_enabled = __kmp_waitpkg_enabled && (__kmp_tpause_state > 0);7053KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_umwait_enabled = %d\n",7054__kmp_umwait_enabled));7055}7056#elif KMP_HAVE_MWAIT7057#ifndef AT_INTELPHIUSERMWAIT7058// Spurious, non-existent value that should always fail to return anything.7059// Will be replaced with the correct value when we know that.7060#define AT_INTELPHIUSERMWAIT 100007061#endif7062// getauxval() function is available in RHEL7 and SLES12. If a system with an7063// earlier OS is used to build the RTL, we'll use the following internal7064// function when the entry is not found.7065unsigned long getauxval(unsigned long) KMP_WEAK_ATTRIBUTE_EXTERNAL;7066unsigned long getauxval(unsigned long) { return 0; }70677068static void __kmp_user_level_mwait_init() {7069// When getauxval() and correct value of AT_INTELPHIUSERMWAIT are available7070// use them to find if the user-level mwait is enabled. Otherwise, forcibly7071// set __kmp_mwait_enabled=TRUE on Intel MIC if the environment variable7072// KMP_USER_LEVEL_MWAIT was set to TRUE.7073if (__kmp_mic_type == mic3) {7074unsigned long res = getauxval(AT_INTELPHIUSERMWAIT);7075if ((res & 0x1) || __kmp_user_level_mwait) {7076__kmp_mwait_enabled = TRUE;7077if (__kmp_user_level_mwait) {7078KMP_INFORM(EnvMwaitWarn);7079}7080} else {7081__kmp_mwait_enabled = FALSE;7082}7083}7084KF_TRACE(30, ("__kmp_user_level_mwait_init: __kmp_mic_type = %d, "7085"__kmp_mwait_enabled = %d\n",7086__kmp_mic_type, __kmp_mwait_enabled));7087}7088#endif /* KMP_HAVE_UMWAIT */70897090static void __kmp_do_serial_initialize(void) {7091int i, gtid;7092size_t size;70937094KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n"));70957096KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4);7097KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4);7098KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8);7099KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8);7100KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *));71017102#if OMPT_SUPPORT7103ompt_pre_init();7104#endif7105#if OMPD_SUPPORT7106__kmp_env_dump();7107ompd_init();7108#endif71097110__kmp_validate_locks();71117112#if ENABLE_LIBOMPTARGET7113/* Initialize functions from libomptarget */7114__kmp_init_omptarget();7115#endif71167117/* Initialize internal memory allocator */7118__kmp_init_allocator();71197120/* Register the library startup via an environment variable or via mapped7121shared memory file and check to see whether another copy of the library is7122already registered. Since forked child process is often terminated, we7123postpone the registration till middle initialization in the child */7124if (__kmp_need_register_serial)7125__kmp_register_library_startup();71267127/* TODO reinitialization of library */7128if (TCR_4(__kmp_global.g.g_done)) {7129KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n"));7130}71317132__kmp_global.g.g_abort = 0;7133TCW_SYNC_4(__kmp_global.g.g_done, FALSE);71347135/* initialize the locks */7136#if KMP_USE_ADAPTIVE_LOCKS7137#if KMP_DEBUG_ADAPTIVE_LOCKS7138__kmp_init_speculative_stats();7139#endif7140#endif7141#if KMP_STATS_ENABLED7142__kmp_stats_init();7143#endif7144__kmp_init_lock(&__kmp_global_lock);7145__kmp_init_queuing_lock(&__kmp_dispatch_lock);7146__kmp_init_lock(&__kmp_debug_lock);7147__kmp_init_atomic_lock(&__kmp_atomic_lock);7148__kmp_init_atomic_lock(&__kmp_atomic_lock_1i);7149__kmp_init_atomic_lock(&__kmp_atomic_lock_2i);7150__kmp_init_atomic_lock(&__kmp_atomic_lock_4i);7151__kmp_init_atomic_lock(&__kmp_atomic_lock_4r);7152__kmp_init_atomic_lock(&__kmp_atomic_lock_8i);7153__kmp_init_atomic_lock(&__kmp_atomic_lock_8r);7154__kmp_init_atomic_lock(&__kmp_atomic_lock_8c);7155__kmp_init_atomic_lock(&__kmp_atomic_lock_10r);7156__kmp_init_atomic_lock(&__kmp_atomic_lock_16r);7157__kmp_init_atomic_lock(&__kmp_atomic_lock_16c);7158__kmp_init_atomic_lock(&__kmp_atomic_lock_20c);7159__kmp_init_atomic_lock(&__kmp_atomic_lock_32c);7160__kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);7161__kmp_init_bootstrap_lock(&__kmp_exit_lock);7162#if KMP_USE_MONITOR7163__kmp_init_bootstrap_lock(&__kmp_monitor_lock);7164#endif7165__kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);71667167/* conduct initialization and initial setup of configuration */71687169__kmp_runtime_initialize();71707171#if KMP_MIC_SUPPORTED7172__kmp_check_mic_type();7173#endif71747175// Some global variable initialization moved here from kmp_env_initialize()7176#ifdef KMP_DEBUG7177kmp_diag = 0;7178#endif7179__kmp_abort_delay = 0;71807181// From __kmp_init_dflt_team_nth()7182/* assume the entire machine will be used */7183__kmp_dflt_team_nth_ub = __kmp_xproc;7184if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {7185__kmp_dflt_team_nth_ub = KMP_MIN_NTH;7186}7187if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {7188__kmp_dflt_team_nth_ub = __kmp_sys_max_nth;7189}7190__kmp_max_nth = __kmp_sys_max_nth;7191__kmp_cg_max_nth = __kmp_sys_max_nth;7192__kmp_teams_max_nth = __kmp_xproc; // set a "reasonable" default7193if (__kmp_teams_max_nth > __kmp_sys_max_nth) {7194__kmp_teams_max_nth = __kmp_sys_max_nth;7195}71967197// Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME"7198// part7199__kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;7200#if KMP_USE_MONITOR7201__kmp_monitor_wakeups =7202KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);7203__kmp_bt_intervals =7204KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);7205#endif7206// From "KMP_LIBRARY" part of __kmp_env_initialize()7207__kmp_library = library_throughput;7208// From KMP_SCHEDULE initialization7209__kmp_static = kmp_sch_static_balanced;7210// AC: do not use analytical here, because it is non-monotonous7211//__kmp_guided = kmp_sch_guided_iterative_chunked;7212//__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no7213// need to repeat assignment7214// Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch7215// bit control and barrier method control parts7216#if KMP_FAST_REDUCTION_BARRIER7217#define kmp_reduction_barrier_gather_bb ((int)1)7218#define kmp_reduction_barrier_release_bb ((int)1)7219#define kmp_reduction_barrier_gather_pat __kmp_barrier_gather_pat_dflt7220#define kmp_reduction_barrier_release_pat __kmp_barrier_release_pat_dflt7221#endif // KMP_FAST_REDUCTION_BARRIER7222for (i = bs_plain_barrier; i < bs_last_barrier; i++) {7223__kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;7224__kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;7225__kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;7226__kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;7227#if KMP_FAST_REDUCTION_BARRIER7228if (i == bs_reduction_barrier) { // tested and confirmed on ALTIX only (7229// lin_64 ): hyper,17230__kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;7231__kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;7232__kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;7233__kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;7234}7235#endif // KMP_FAST_REDUCTION_BARRIER7236}7237#if KMP_FAST_REDUCTION_BARRIER7238#undef kmp_reduction_barrier_release_pat7239#undef kmp_reduction_barrier_gather_pat7240#undef kmp_reduction_barrier_release_bb7241#undef kmp_reduction_barrier_gather_bb7242#endif // KMP_FAST_REDUCTION_BARRIER7243#if KMP_MIC_SUPPORTED7244if (__kmp_mic_type == mic2) { // KNC7245// AC: plane=3,2, forkjoin=2,1 are optimal for 240 threads on KNC7246__kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3; // plain gather7247__kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =72481; // forkjoin release7249__kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;7250__kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;7251}7252#if KMP_FAST_REDUCTION_BARRIER7253if (__kmp_mic_type == mic2) { // KNC7254__kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;7255__kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;7256}7257#endif // KMP_FAST_REDUCTION_BARRIER7258#endif // KMP_MIC_SUPPORTED72597260// From KMP_CHECKS initialization7261#ifdef KMP_DEBUG7262__kmp_env_checks = TRUE; /* development versions have the extra checks */7263#else7264__kmp_env_checks = FALSE; /* port versions do not have the extra checks */7265#endif72667267// From "KMP_FOREIGN_THREADS_THREADPRIVATE" initialization7268__kmp_foreign_tp = TRUE;72697270__kmp_global.g.g_dynamic = FALSE;7271__kmp_global.g.g_dynamic_mode = dynamic_default;72727273__kmp_init_nesting_mode();72747275__kmp_env_initialize(NULL);72767277#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT7278__kmp_user_level_mwait_init();7279#endif7280// Print all messages in message catalog for testing purposes.7281#ifdef KMP_DEBUG7282char const *val = __kmp_env_get("KMP_DUMP_CATALOG");7283if (__kmp_str_match_true(val)) {7284kmp_str_buf_t buffer;7285__kmp_str_buf_init(&buffer);7286__kmp_i18n_dump_catalog(&buffer);7287__kmp_printf("%s", buffer.str);7288__kmp_str_buf_free(&buffer);7289}7290__kmp_env_free(&val);7291#endif72927293__kmp_threads_capacity =7294__kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);7295// Moved here from __kmp_env_initialize() "KMP_ALL_THREADPRIVATE" part7296__kmp_tp_capacity = __kmp_default_tp_capacity(7297__kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);72987299// If the library is shut down properly, both pools must be NULL. Just in7300// case, set them to NULL -- some memory may leak, but subsequent code will7301// work even if pools are not freed.7302KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);7303KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);7304KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);7305__kmp_thread_pool = NULL;7306__kmp_thread_pool_insert_pt = NULL;7307__kmp_team_pool = NULL;73087309/* Allocate all of the variable sized records */7310/* NOTE: __kmp_threads_capacity entries are allocated, but the arrays are7311* expandable */7312/* Since allocation is cache-aligned, just add extra padding at the end */7313size =7314(sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * __kmp_threads_capacity +7315CACHE_LINE;7316__kmp_threads = (kmp_info_t **)__kmp_allocate(size);7317__kmp_root = (kmp_root_t **)((char *)__kmp_threads +7318sizeof(kmp_info_t *) * __kmp_threads_capacity);73197320/* init thread counts */7321KMP_DEBUG_ASSERT(__kmp_all_nth ==73220); // Asserts fail if the library is reinitializing and7323KMP_DEBUG_ASSERT(__kmp_nth == 0); // something was wrong in termination.7324__kmp_all_nth = 0;7325__kmp_nth = 0;73267327/* setup the uber master thread and hierarchy */7328gtid = __kmp_register_root(TRUE);7329KA_TRACE(10, ("__kmp_do_serial_initialize T#%d\n", gtid));7330KMP_ASSERT(KMP_UBER_GTID(gtid));7331KMP_ASSERT(KMP_INITIAL_GTID(gtid));73327333KMP_MB(); /* Flush all pending memory write invalidates. */73347335__kmp_common_initialize();73367337#if KMP_OS_UNIX7338/* invoke the child fork handler */7339__kmp_register_atfork();7340#endif73417342#if !KMP_DYNAMIC_LIB || \7343((KMP_COMPILER_ICC || KMP_COMPILER_ICX) && KMP_OS_DARWIN)7344{7345/* Invoke the exit handler when the program finishes, only for static7346library and macOS* dynamic. For other dynamic libraries, we already7347have _fini and DllMain. */7348int rc = atexit(__kmp_internal_end_atexit);7349if (rc != 0) {7350__kmp_fatal(KMP_MSG(FunctionError, "atexit()"), KMP_ERR(rc),7351__kmp_msg_null);7352}7353}7354#endif73557356#if KMP_HANDLE_SIGNALS7357#if KMP_OS_UNIX7358/* NOTE: make sure that this is called before the user installs their own7359signal handlers so that the user handlers are called first. this way they7360can return false, not call our handler, avoid terminating the library, and7361continue execution where they left off. */7362__kmp_install_signals(FALSE);7363#endif /* KMP_OS_UNIX */7364#if KMP_OS_WINDOWS7365__kmp_install_signals(TRUE);7366#endif /* KMP_OS_WINDOWS */7367#endif73687369/* we have finished the serial initialization */7370__kmp_init_counter++;73717372__kmp_init_serial = TRUE;73737374if (__kmp_version) {7375__kmp_print_version_1();7376}73777378if (__kmp_settings) {7379__kmp_env_print();7380}73817382if (__kmp_display_env || __kmp_display_env_verbose) {7383__kmp_env_print_2();7384}73857386#if OMPT_SUPPORT7387ompt_post_init();7388#endif73897390KMP_MB();73917392KA_TRACE(10, ("__kmp_do_serial_initialize: exit\n"));7393}73947395void __kmp_serial_initialize(void) {7396if (__kmp_init_serial) {7397return;7398}7399__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);7400if (__kmp_init_serial) {7401__kmp_release_bootstrap_lock(&__kmp_initz_lock);7402return;7403}7404__kmp_do_serial_initialize();7405__kmp_release_bootstrap_lock(&__kmp_initz_lock);7406}74077408static void __kmp_do_middle_initialize(void) {7409int i, j;7410int prev_dflt_team_nth;74117412if (!__kmp_init_serial) {7413__kmp_do_serial_initialize();7414}74157416KA_TRACE(10, ("__kmp_middle_initialize: enter\n"));74177418if (UNLIKELY(!__kmp_need_register_serial)) {7419// We are in a forked child process. The registration was skipped during7420// serial initialization in __kmp_atfork_child handler. Do it here.7421__kmp_register_library_startup();7422}74237424// Save the previous value for the __kmp_dflt_team_nth so that7425// we can avoid some reinitialization if it hasn't changed.7426prev_dflt_team_nth = __kmp_dflt_team_nth;74277428#if KMP_AFFINITY_SUPPORTED7429// __kmp_affinity_initialize() will try to set __kmp_ncores to the7430// number of cores on the machine.7431__kmp_affinity_initialize(__kmp_affinity);74327433#endif /* KMP_AFFINITY_SUPPORTED */74347435KMP_ASSERT(__kmp_xproc > 0);7436if (__kmp_avail_proc == 0) {7437__kmp_avail_proc = __kmp_xproc;7438}74397440// If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3),7441// correct them now7442j = 0;7443while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {7444__kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =7445__kmp_avail_proc;7446j++;7447}74487449if (__kmp_dflt_team_nth == 0) {7450#ifdef KMP_DFLT_NTH_CORES7451// Default #threads = #cores7452__kmp_dflt_team_nth = __kmp_ncores;7453KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "7454"__kmp_ncores (%d)\n",7455__kmp_dflt_team_nth));7456#else7457// Default #threads = #available OS procs7458__kmp_dflt_team_nth = __kmp_avail_proc;7459KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = "7460"__kmp_avail_proc(%d)\n",7461__kmp_dflt_team_nth));7462#endif /* KMP_DFLT_NTH_CORES */7463}74647465if (__kmp_dflt_team_nth < KMP_MIN_NTH) {7466__kmp_dflt_team_nth = KMP_MIN_NTH;7467}7468if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {7469__kmp_dflt_team_nth = __kmp_sys_max_nth;7470}74717472if (__kmp_nesting_mode > 0)7473__kmp_set_nesting_mode_threads();74747475// There's no harm in continuing if the following check fails,7476// but it indicates an error in the previous logic.7477KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);74787479if (__kmp_dflt_team_nth != prev_dflt_team_nth) {7480// Run through the __kmp_threads array and set the num threads icv for each7481// root thread that is currently registered with the RTL (which has not7482// already explicitly set its nthreads-var with a call to7483// omp_set_num_threads()).7484for (i = 0; i < __kmp_threads_capacity; i++) {7485kmp_info_t *thread = __kmp_threads[i];7486if (thread == NULL)7487continue;7488if (thread->th.th_current_task->td_icvs.nproc != 0)7489continue;74907491set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);7492}7493}7494KA_TRACE(749520,7496("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",7497__kmp_dflt_team_nth));74987499#ifdef KMP_ADJUST_BLOCKTIME7500/* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */7501if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {7502KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);7503if (__kmp_nth > __kmp_avail_proc) {7504__kmp_zero_bt = TRUE;7505}7506}7507#endif /* KMP_ADJUST_BLOCKTIME */75087509/* we have finished middle initialization */7510TCW_SYNC_4(__kmp_init_middle, TRUE);75117512KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n"));7513}75147515void __kmp_middle_initialize(void) {7516if (__kmp_init_middle) {7517return;7518}7519__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);7520if (__kmp_init_middle) {7521__kmp_release_bootstrap_lock(&__kmp_initz_lock);7522return;7523}7524__kmp_do_middle_initialize();7525__kmp_release_bootstrap_lock(&__kmp_initz_lock);7526}75277528void __kmp_parallel_initialize(void) {7529int gtid = __kmp_entry_gtid(); // this might be a new root75307531/* synchronize parallel initialization (for sibling) */7532if (TCR_4(__kmp_init_parallel))7533return;7534__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);7535if (TCR_4(__kmp_init_parallel)) {7536__kmp_release_bootstrap_lock(&__kmp_initz_lock);7537return;7538}75397540/* TODO reinitialization after we have already shut down */7541if (TCR_4(__kmp_global.g.g_done)) {7542KA_TRACE(754310,7544("__kmp_parallel_initialize: attempt to init while shutting down\n"));7545__kmp_infinite_loop();7546}75477548/* jc: The lock __kmp_initz_lock is already held, so calling7549__kmp_serial_initialize would cause a deadlock. So we call7550__kmp_do_serial_initialize directly. */7551if (!__kmp_init_middle) {7552__kmp_do_middle_initialize();7553}7554__kmp_assign_root_init_mask();7555__kmp_resume_if_hard_paused();75567557/* begin initialization */7558KA_TRACE(10, ("__kmp_parallel_initialize: enter\n"));7559KMP_ASSERT(KMP_UBER_GTID(gtid));75607561#if KMP_ARCH_X86 || KMP_ARCH_X86_647562// Save the FP control regs.7563// Worker threads will set theirs to these values at thread startup.7564__kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);7565__kmp_store_mxcsr(&__kmp_init_mxcsr);7566__kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;7567#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */75687569#if KMP_OS_UNIX7570#if KMP_HANDLE_SIGNALS7571/* must be after __kmp_serial_initialize */7572__kmp_install_signals(TRUE);7573#endif7574#endif75757576__kmp_suspend_initialize();75777578#if defined(USE_LOAD_BALANCE)7579if (__kmp_global.g.g_dynamic_mode == dynamic_default) {7580__kmp_global.g.g_dynamic_mode = dynamic_load_balance;7581}7582#else7583if (__kmp_global.g.g_dynamic_mode == dynamic_default) {7584__kmp_global.g.g_dynamic_mode = dynamic_thread_limit;7585}7586#endif75877588if (__kmp_version) {7589__kmp_print_version_2();7590}75917592/* we have finished parallel initialization */7593TCW_SYNC_4(__kmp_init_parallel, TRUE);75947595KMP_MB();7596KA_TRACE(10, ("__kmp_parallel_initialize: exit\n"));75977598__kmp_release_bootstrap_lock(&__kmp_initz_lock);7599}76007601void __kmp_hidden_helper_initialize() {7602if (TCR_4(__kmp_init_hidden_helper))7603return;76047605// __kmp_parallel_initialize is required before we initialize hidden helper7606if (!TCR_4(__kmp_init_parallel))7607__kmp_parallel_initialize();76087609// Double check. Note that this double check should not be placed before7610// __kmp_parallel_initialize as it will cause dead lock.7611__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);7612if (TCR_4(__kmp_init_hidden_helper)) {7613__kmp_release_bootstrap_lock(&__kmp_initz_lock);7614return;7615}76167617#if KMP_AFFINITY_SUPPORTED7618// Initialize hidden helper affinity settings.7619// The above __kmp_parallel_initialize() will initialize7620// regular affinity (and topology) if not already done.7621if (!__kmp_hh_affinity.flags.initialized)7622__kmp_affinity_initialize(__kmp_hh_affinity);7623#endif76247625// Set the count of hidden helper tasks to be executed to zero7626KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);76277628// Set the global variable indicating that we're initializing hidden helper7629// team/threads7630TCW_SYNC_4(__kmp_init_hidden_helper_threads, TRUE);76317632// Platform independent initialization7633__kmp_do_initialize_hidden_helper_threads();76347635// Wait here for the finish of initialization of hidden helper teams7636__kmp_hidden_helper_threads_initz_wait();76377638// We have finished hidden helper initialization7639TCW_SYNC_4(__kmp_init_hidden_helper, TRUE);76407641__kmp_release_bootstrap_lock(&__kmp_initz_lock);7642}76437644/* ------------------------------------------------------------------------ */76457646void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr,7647kmp_team_t *team) {7648kmp_disp_t *dispatch;76497650KMP_MB();76517652/* none of the threads have encountered any constructs, yet. */7653this_thr->th.th_local.this_construct = 0;7654#if KMP_CACHE_MANAGE7655KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);7656#endif /* KMP_CACHE_MANAGE */7657dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);7658KMP_DEBUG_ASSERT(dispatch);7659KMP_DEBUG_ASSERT(team->t.t_dispatch);7660// KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[7661// this_thr->th.th_info.ds.ds_tid ] );76627663dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */7664dispatch->th_doacross_buf_idx = 0; // reset doacross dispatch buffer counter7665if (__kmp_env_consistency_check)7666__kmp_push_parallel(gtid, team->t.t_ident);76677668KMP_MB(); /* Flush all pending memory write invalidates. */7669}76707671void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr,7672kmp_team_t *team) {7673if (__kmp_env_consistency_check)7674__kmp_pop_parallel(gtid, team->t.t_ident);76757676__kmp_finish_implicit_task(this_thr);7677}76787679int __kmp_invoke_task_func(int gtid) {7680int rc;7681int tid = __kmp_tid_from_gtid(gtid);7682kmp_info_t *this_thr = __kmp_threads[gtid];7683kmp_team_t *team = this_thr->th.th_team;76847685__kmp_run_before_invoked_task(gtid, tid, this_thr, team);7686#if USE_ITT_BUILD7687if (__itt_stack_caller_create_ptr) {7688// inform ittnotify about entering user's code7689if (team->t.t_stack_id != NULL) {7690__kmp_itt_stack_callee_enter((__itt_caller)team->t.t_stack_id);7691} else {7692KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);7693__kmp_itt_stack_callee_enter(7694(__itt_caller)team->t.t_parent->t.t_stack_id);7695}7696}7697#endif /* USE_ITT_BUILD */7698#if INCLUDE_SSC_MARKS7699SSC_MARK_INVOKING();7700#endif77017702#if OMPT_SUPPORT7703void *dummy;7704void **exit_frame_p;7705ompt_data_t *my_task_data;7706ompt_data_t *my_parallel_data;7707int ompt_team_size;77087709if (ompt_enabled.enabled) {7710exit_frame_p = &(team->t.t_implicit_task_taskdata[tid]7711.ompt_task_info.frame.exit_frame.ptr);7712} else {7713exit_frame_p = &dummy;7714}77157716my_task_data =7717&(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);7718my_parallel_data = &(team->t.ompt_team_info.parallel_data);7719if (ompt_enabled.ompt_callback_implicit_task) {7720ompt_team_size = team->t.t_nproc;7721ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(7722ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,7723__kmp_tid_from_gtid(gtid), ompt_task_implicit);7724OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);7725}7726#endif77277728#if KMP_STATS_ENABLED7729stats_state_e previous_state = KMP_GET_THREAD_STATE();7730if (previous_state == stats_state_e::TEAMS_REGION) {7731KMP_PUSH_PARTITIONED_TIMER(OMP_teams);7732} else {7733KMP_PUSH_PARTITIONED_TIMER(OMP_parallel);7734}7735KMP_SET_THREAD_STATE(IMPLICIT_TASK);7736#endif77377738rc = __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,7739tid, (int)team->t.t_argc, (void **)team->t.t_argv7740#if OMPT_SUPPORT7741,7742exit_frame_p7743#endif7744);7745#if OMPT_SUPPORT7746*exit_frame_p = NULL;7747this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_team;7748#endif77497750#if KMP_STATS_ENABLED7751if (previous_state == stats_state_e::TEAMS_REGION) {7752KMP_SET_THREAD_STATE(previous_state);7753}7754KMP_POP_PARTITIONED_TIMER();7755#endif77567757#if USE_ITT_BUILD7758if (__itt_stack_caller_create_ptr) {7759// inform ittnotify about leaving user's code7760if (team->t.t_stack_id != NULL) {7761__kmp_itt_stack_callee_leave((__itt_caller)team->t.t_stack_id);7762} else {7763KMP_DEBUG_ASSERT(team->t.t_parent->t.t_stack_id != NULL);7764__kmp_itt_stack_callee_leave(7765(__itt_caller)team->t.t_parent->t.t_stack_id);7766}7767}7768#endif /* USE_ITT_BUILD */7769__kmp_run_after_invoked_task(gtid, tid, this_thr, team);77707771return rc;7772}77737774void __kmp_teams_master(int gtid) {7775// This routine is called by all primary threads in teams construct7776kmp_info_t *thr = __kmp_threads[gtid];7777kmp_team_t *team = thr->th.th_team;7778ident_t *loc = team->t.t_ident;7779thr->th.th_set_nproc = thr->th.th_teams_size.nth;7780KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);7781KMP_DEBUG_ASSERT(thr->th.th_set_nproc);7782KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,7783__kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));77847785// This thread is a new CG root. Set up the proper variables.7786kmp_cg_root_t *tmp = (kmp_cg_root_t *)__kmp_allocate(sizeof(kmp_cg_root_t));7787tmp->cg_root = thr; // Make thr the CG root7788// Init to thread limit stored when league primary threads were forked7789tmp->cg_thread_limit = thr->th.th_current_task->td_icvs.thread_limit;7790tmp->cg_nthreads = 1; // Init counter to one active thread, this one7791KA_TRACE(100, ("__kmp_teams_master: Thread %p created node %p and init"7792" cg_nthreads to 1\n",7793thr, tmp));7794tmp->up = thr->th.th_cg_roots;7795thr->th.th_cg_roots = tmp;77967797// Launch league of teams now, but not let workers execute7798// (they hang on fork barrier until next parallel)7799#if INCLUDE_SSC_MARKS7800SSC_MARK_FORKING();7801#endif7802__kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,7803(microtask_t)thr->th.th_teams_microtask, // "wrapped" task7804VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);7805#if INCLUDE_SSC_MARKS7806SSC_MARK_JOINING();7807#endif7808// If the team size was reduced from the limit, set it to the new size7809if (thr->th.th_team_nproc < thr->th.th_teams_size.nth)7810thr->th.th_teams_size.nth = thr->th.th_team_nproc;7811// AC: last parameter "1" eliminates join barrier which won't work because7812// worker threads are in a fork barrier waiting for more parallel regions7813__kmp_join_call(loc, gtid7814#if OMPT_SUPPORT7815,7816fork_context_intel7817#endif7818,78191);7820}78217822int __kmp_invoke_teams_master(int gtid) {7823kmp_info_t *this_thr = __kmp_threads[gtid];7824kmp_team_t *team = this_thr->th.th_team;7825#if KMP_DEBUG7826if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)7827KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==7828(void *)__kmp_teams_master);7829#endif7830__kmp_run_before_invoked_task(gtid, 0, this_thr, team);7831#if OMPT_SUPPORT7832int tid = __kmp_tid_from_gtid(gtid);7833ompt_data_t *task_data =7834&team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data;7835ompt_data_t *parallel_data = &team->t.ompt_team_info.parallel_data;7836if (ompt_enabled.ompt_callback_implicit_task) {7837ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(7838ompt_scope_begin, parallel_data, task_data, team->t.t_nproc, tid,7839ompt_task_initial);7840OMPT_CUR_TASK_INFO(this_thr)->thread_num = tid;7841}7842#endif7843__kmp_teams_master(gtid);7844#if OMPT_SUPPORT7845this_thr->th.ompt_thread_info.parallel_flags = ompt_parallel_league;7846#endif7847__kmp_run_after_invoked_task(gtid, 0, this_thr, team);7848return 1;7849}78507851/* this sets the requested number of threads for the next parallel region7852encountered by this team. since this should be enclosed in the forkjoin7853critical section it should avoid race conditions with asymmetrical nested7854parallelism */7855void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) {7856kmp_info_t *thr = __kmp_threads[gtid];78577858if (num_threads > 0)7859thr->th.th_set_nproc = num_threads;7860}78617862void __kmp_push_num_threads_list(ident_t *id, int gtid, kmp_uint32 list_length,7863int *num_threads_list) {7864kmp_info_t *thr = __kmp_threads[gtid];78657866KMP_DEBUG_ASSERT(list_length > 1);78677868if (num_threads_list[0] > 0)7869thr->th.th_set_nproc = num_threads_list[0];7870thr->th.th_set_nested_nth =7871(int *)KMP_INTERNAL_MALLOC(list_length * sizeof(int));7872for (kmp_uint32 i = 0; i < list_length; ++i)7873thr->th.th_set_nested_nth[i] = num_threads_list[i];7874thr->th.th_set_nested_nth_sz = list_length;7875}78767877void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,7878const char *msg) {7879kmp_info_t *thr = __kmp_threads[gtid];7880thr->th.th_nt_strict = true;7881thr->th.th_nt_loc = loc;7882// if sev is unset make fatal7883if (sev == severity_warning)7884thr->th.th_nt_sev = sev;7885else7886thr->th.th_nt_sev = severity_fatal;7887// if msg is unset, use an appropriate message7888if (msg)7889thr->th.th_nt_msg = msg;7890else7891thr->th.th_nt_msg = "Cannot form team with number of threads specified by "7892"strict num_threads clause.";7893}78947895static void __kmp_push_thread_limit(kmp_info_t *thr, int num_teams,7896int num_threads) {7897KMP_DEBUG_ASSERT(thr);7898// Remember the number of threads for inner parallel regions7899if (!TCR_4(__kmp_init_middle))7900__kmp_middle_initialize(); // get internal globals calculated7901__kmp_assign_root_init_mask();7902KMP_DEBUG_ASSERT(__kmp_avail_proc);7903KMP_DEBUG_ASSERT(__kmp_dflt_team_nth);79047905if (num_threads == 0) {7906if (__kmp_teams_thread_limit > 0) {7907num_threads = __kmp_teams_thread_limit;7908} else {7909num_threads = __kmp_avail_proc / num_teams;7910}7911// adjust num_threads w/o warning as it is not user setting7912// num_threads = min(num_threads, nthreads-var, thread-limit-var)7913// no thread_limit clause specified - do not change thread-limit-var ICV7914if (num_threads > __kmp_dflt_team_nth) {7915num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV7916}7917if (num_threads > thr->th.th_current_task->td_icvs.thread_limit) {7918num_threads = thr->th.th_current_task->td_icvs.thread_limit;7919} // prevent team size to exceed thread-limit-var7920if (num_teams * num_threads > __kmp_teams_max_nth) {7921num_threads = __kmp_teams_max_nth / num_teams;7922}7923if (num_threads == 0) {7924num_threads = 1;7925}7926} else {7927if (num_threads < 0) {7928__kmp_msg(kmp_ms_warning, KMP_MSG(CantFormThrTeam, num_threads, 1),7929__kmp_msg_null);7930num_threads = 1;7931}7932// This thread will be the primary thread of the league primary threads7933// Store new thread limit; old limit is saved in th_cg_roots list7934thr->th.th_current_task->td_icvs.thread_limit = num_threads;7935// num_threads = min(num_threads, nthreads-var)7936if (num_threads > __kmp_dflt_team_nth) {7937num_threads = __kmp_dflt_team_nth; // honor nthreads-var ICV7938}7939if (num_teams * num_threads > __kmp_teams_max_nth) {7940int new_threads = __kmp_teams_max_nth / num_teams;7941if (new_threads == 0) {7942new_threads = 1;7943}7944if (new_threads != num_threads) {7945if (!__kmp_reserve_warn) { // user asked for too many threads7946__kmp_reserve_warn = 1; // conflicts with KMP_TEAMS_THREAD_LIMIT7947__kmp_msg(kmp_ms_warning,7948KMP_MSG(CantFormThrTeam, num_threads, new_threads),7949KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);7950}7951}7952num_threads = new_threads;7953}7954}7955thr->th.th_teams_size.nth = num_threads;7956}79577958/* this sets the requested number of teams for the teams region and/or7959the number of threads for the next parallel region encountered */7960void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams,7961int num_threads) {7962kmp_info_t *thr = __kmp_threads[gtid];7963if (num_teams < 0) {7964// OpenMP specification requires requested values to be positive,7965// but people can send us any value, so we'd better check7966__kmp_msg(kmp_ms_warning, KMP_MSG(NumTeamsNotPositive, num_teams, 1),7967__kmp_msg_null);7968num_teams = 1;7969}7970if (num_teams == 0) {7971if (__kmp_nteams > 0) {7972num_teams = __kmp_nteams;7973} else {7974num_teams = 1; // default number of teams is 1.7975}7976}7977if (num_teams > __kmp_teams_max_nth) { // if too many teams requested?7978if (!__kmp_reserve_warn) {7979__kmp_reserve_warn = 1;7980__kmp_msg(kmp_ms_warning,7981KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),7982KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);7983}7984num_teams = __kmp_teams_max_nth;7985}7986// Set number of teams (number of threads in the outer "parallel" of the7987// teams)7988thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;79897990__kmp_push_thread_limit(thr, num_teams, num_threads);7991}79927993/* This sets the requested number of teams for the teams region and/or7994the number of threads for the next parallel region encountered */7995void __kmp_push_num_teams_51(ident_t *id, int gtid, int num_teams_lb,7996int num_teams_ub, int num_threads) {7997kmp_info_t *thr = __kmp_threads[gtid];7998KMP_DEBUG_ASSERT(num_teams_lb >= 0 && num_teams_ub >= 0);7999KMP_DEBUG_ASSERT(num_teams_ub >= num_teams_lb);8000KMP_DEBUG_ASSERT(num_threads >= 0);80018002if (num_teams_lb > num_teams_ub) {8003__kmp_fatal(KMP_MSG(FailedToCreateTeam, num_teams_lb, num_teams_ub),8004KMP_HNT(SetNewBound, __kmp_teams_max_nth), __kmp_msg_null);8005}80068007int num_teams = 1; // defalt number of teams is 1.80088009if (num_teams_lb == 0 && num_teams_ub > 0)8010num_teams_lb = num_teams_ub;80118012if (num_teams_lb == 0 && num_teams_ub == 0) { // no num_teams clause8013num_teams = (__kmp_nteams > 0) ? __kmp_nteams : num_teams;8014if (num_teams > __kmp_teams_max_nth) {8015if (!__kmp_reserve_warn) {8016__kmp_reserve_warn = 1;8017__kmp_msg(kmp_ms_warning,8018KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),8019KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);8020}8021num_teams = __kmp_teams_max_nth;8022}8023} else if (num_teams_lb == num_teams_ub) { // requires exact number of teams8024num_teams = num_teams_ub;8025} else { // num_teams_lb <= num_teams <= num_teams_ub8026if (num_threads <= 0) {8027if (num_teams_ub > __kmp_teams_max_nth) {8028num_teams = num_teams_lb;8029} else {8030num_teams = num_teams_ub;8031}8032} else {8033num_teams = (num_threads > __kmp_teams_max_nth)8034? num_teams8035: __kmp_teams_max_nth / num_threads;8036if (num_teams < num_teams_lb) {8037num_teams = num_teams_lb;8038} else if (num_teams > num_teams_ub) {8039num_teams = num_teams_ub;8040}8041}8042}8043// Set number of teams (number of threads in the outer "parallel" of the8044// teams)8045thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;80468047__kmp_push_thread_limit(thr, num_teams, num_threads);8048}80498050// Set the proc_bind var to use in the following parallel region.8051void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) {8052kmp_info_t *thr = __kmp_threads[gtid];8053thr->th.th_set_proc_bind = proc_bind;8054}80558056/* Launch the worker threads into the microtask. */80578058void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) {8059kmp_info_t *this_thr = __kmp_threads[gtid];80608061#ifdef KMP_DEBUG8062int f;8063#endif /* KMP_DEBUG */80648065KMP_DEBUG_ASSERT(team);8066KMP_DEBUG_ASSERT(this_thr->th.th_team == team);8067KMP_ASSERT(KMP_MASTER_GTID(gtid));8068KMP_MB(); /* Flush all pending memory write invalidates. */80698070team->t.t_construct = 0; /* no single directives seen yet */8071team->t.t_ordered.dt.t_value =80720; /* thread 0 enters the ordered section first */80738074/* Reset the identifiers on the dispatch buffer */8075KMP_DEBUG_ASSERT(team->t.t_disp_buffer);8076if (team->t.t_max_nproc > 1) {8077int i;8078for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {8079team->t.t_disp_buffer[i].buffer_index = i;8080team->t.t_disp_buffer[i].doacross_buf_idx = i;8081}8082} else {8083team->t.t_disp_buffer[0].buffer_index = 0;8084team->t.t_disp_buffer[0].doacross_buf_idx = 0;8085}80868087KMP_MB(); /* Flush all pending memory write invalidates. */8088KMP_ASSERT(this_thr->th.th_team == team);80898090#ifdef KMP_DEBUG8091for (f = 0; f < team->t.t_nproc; f++) {8092KMP_DEBUG_ASSERT(team->t.t_threads[f] &&8093team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);8094}8095#endif /* KMP_DEBUG */80968097/* release the worker threads so they may begin working */8098__kmp_fork_barrier(gtid, 0);8099}81008101void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {8102kmp_info_t *this_thr = __kmp_threads[gtid];81038104KMP_DEBUG_ASSERT(team);8105KMP_DEBUG_ASSERT(this_thr->th.th_team == team);8106KMP_ASSERT(KMP_MASTER_GTID(gtid));8107KMP_MB(); /* Flush all pending memory write invalidates. */81088109/* Join barrier after fork */81108111#ifdef KMP_DEBUG8112if (__kmp_threads[gtid] &&8113__kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {8114__kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,8115__kmp_threads[gtid]);8116__kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, "8117"team->t.t_nproc=%d\n",8118gtid, __kmp_threads[gtid]->th.th_team_nproc, team,8119team->t.t_nproc);8120__kmp_print_structure();8121}8122KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&8123__kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);8124#endif /* KMP_DEBUG */81258126__kmp_join_barrier(gtid); /* wait for everyone */8127#if OMPT_SUPPORT8128ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state;8129if (ompt_enabled.enabled &&8130(ompt_state == ompt_state_wait_barrier_teams ||8131ompt_state == ompt_state_wait_barrier_implicit_parallel)) {8132int ds_tid = this_thr->th.th_info.ds.ds_tid;8133ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);8134this_thr->th.ompt_thread_info.state = ompt_state_overhead;8135#if OMPT_OPTIONAL8136void *codeptr = NULL;8137if (KMP_MASTER_TID(ds_tid) &&8138(ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||8139ompt_callbacks.ompt_callback(ompt_callback_sync_region)))8140codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;81418142ompt_sync_region_t sync_kind = ompt_sync_region_barrier_implicit_parallel;8143if (this_thr->th.ompt_thread_info.parallel_flags & ompt_parallel_league)8144sync_kind = ompt_sync_region_barrier_teams;8145if (ompt_enabled.ompt_callback_sync_region_wait) {8146ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(8147sync_kind, ompt_scope_end, NULL, task_data, codeptr);8148}8149if (ompt_enabled.ompt_callback_sync_region) {8150ompt_callbacks.ompt_callback(ompt_callback_sync_region)(8151sync_kind, ompt_scope_end, NULL, task_data, codeptr);8152}8153#endif8154if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {8155ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(8156ompt_scope_end, NULL, task_data, 0, ds_tid,8157ompt_task_implicit); // TODO: Can this be ompt_task_initial?8158}8159}8160#endif81618162KMP_MB(); /* Flush all pending memory write invalidates. */8163KMP_ASSERT(this_thr->th.th_team == team);8164}81658166/* ------------------------------------------------------------------------ */81678168#ifdef USE_LOAD_BALANCE81698170// Return the worker threads actively spinning in the hot team, if we8171// are at the outermost level of parallelism. Otherwise, return 0.8172static int __kmp_active_hot_team_nproc(kmp_root_t *root) {8173int i;8174int retval;8175kmp_team_t *hot_team;81768177if (root->r.r_active) {8178return 0;8179}8180hot_team = root->r.r_hot_team;8181if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {8182return hot_team->t.t_nproc - 1; // Don't count primary thread8183}81848185// Skip the primary thread - it is accounted for elsewhere.8186retval = 0;8187for (i = 1; i < hot_team->t.t_nproc; i++) {8188if (hot_team->t.t_threads[i]->th.th_active) {8189retval++;8190}8191}8192return retval;8193}81948195// Perform an automatic adjustment to the number of8196// threads used by the next parallel region.8197static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) {8198int retval;8199int pool_active;8200int hot_team_active;8201int team_curr_active;8202int system_active;82038204KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,8205set_nproc));8206KMP_DEBUG_ASSERT(root);8207KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]8208->th.th_current_task->td_icvs.dynamic == TRUE);8209KMP_DEBUG_ASSERT(set_nproc > 1);82108211if (set_nproc == 1) {8212KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n"));8213return 1;8214}82158216// Threads that are active in the thread pool, active in the hot team for this8217// particular root (if we are at the outer par level), and the currently8218// executing thread (to become the primary thread) are available to add to the8219// new team, but are currently contributing to the system load, and must be8220// accounted for.8221pool_active = __kmp_thread_pool_active_nth;8222hot_team_active = __kmp_active_hot_team_nproc(root);8223team_curr_active = pool_active + hot_team_active + 1;82248225// Check the system load.8226system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);8227KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d "8228"hot team active = %d\n",8229system_active, pool_active, hot_team_active));82308231if (system_active < 0) {8232// There was an error reading the necessary info from /proc, so use the8233// thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode8234// = dynamic_thread_limit, we shouldn't wind up getting back here.8235__kmp_global.g.g_dynamic_mode = dynamic_thread_limit;8236KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit");82378238// Make this call behave like the thread limit algorithm.8239retval = __kmp_avail_proc - __kmp_nth +8240(root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);8241if (retval > set_nproc) {8242retval = set_nproc;8243}8244if (retval < KMP_MIN_NTH) {8245retval = KMP_MIN_NTH;8246}82478248KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n",8249retval));8250return retval;8251}82528253// There is a slight delay in the load balance algorithm in detecting new8254// running procs. The real system load at this instant should be at least as8255// large as the #active omp thread that are available to add to the team.8256if (system_active < team_curr_active) {8257system_active = team_curr_active;8258}8259retval = __kmp_avail_proc - system_active + team_curr_active;8260if (retval > set_nproc) {8261retval = set_nproc;8262}8263if (retval < KMP_MIN_NTH) {8264retval = KMP_MIN_NTH;8265}82668267KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval));8268return retval;8269} // __kmp_load_balance_nproc()82708271#endif /* USE_LOAD_BALANCE */82728273/* ------------------------------------------------------------------------ */82748275/* NOTE: this is called with the __kmp_init_lock held */8276void __kmp_cleanup(void) {8277int f;82788279KA_TRACE(10, ("__kmp_cleanup: enter\n"));82808281if (TCR_4(__kmp_init_parallel)) {8282#if KMP_HANDLE_SIGNALS8283__kmp_remove_signals();8284#endif8285TCW_4(__kmp_init_parallel, FALSE);8286}82878288if (TCR_4(__kmp_init_middle)) {8289#if KMP_AFFINITY_SUPPORTED8290__kmp_affinity_uninitialize();8291#endif /* KMP_AFFINITY_SUPPORTED */8292__kmp_cleanup_hierarchy();8293TCW_4(__kmp_init_middle, FALSE);8294}82958296KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n"));82978298if (__kmp_init_serial) {8299__kmp_runtime_destroy();8300__kmp_init_serial = FALSE;8301}83028303__kmp_cleanup_threadprivate_caches();83048305for (f = 0; f < __kmp_threads_capacity; f++) {8306if (__kmp_root[f] != NULL) {8307__kmp_free(__kmp_root[f]);8308__kmp_root[f] = NULL;8309}8310}8311__kmp_free(__kmp_threads);8312// __kmp_threads and __kmp_root were allocated at once, as single block, so8313// there is no need in freeing __kmp_root.8314__kmp_threads = NULL;8315__kmp_root = NULL;8316__kmp_threads_capacity = 0;83178318// Free old __kmp_threads arrays if they exist.8319kmp_old_threads_list_t *ptr = __kmp_old_threads_list;8320while (ptr) {8321kmp_old_threads_list_t *next = ptr->next;8322__kmp_free(ptr->threads);8323__kmp_free(ptr);8324ptr = next;8325}83268327#if KMP_USE_DYNAMIC_LOCK8328__kmp_cleanup_indirect_user_locks();8329#else8330__kmp_cleanup_user_locks();8331#endif8332#if OMPD_SUPPORT8333if (ompd_state) {8334__kmp_free(ompd_env_block);8335ompd_env_block = NULL;8336ompd_env_block_size = 0;8337}8338#endif83398340#if KMP_AFFINITY_SUPPORTED8341KMP_INTERNAL_FREE(CCAST(char *, __kmp_cpuinfo_file));8342__kmp_cpuinfo_file = NULL;8343#endif /* KMP_AFFINITY_SUPPORTED */83448345#if KMP_USE_ADAPTIVE_LOCKS8346#if KMP_DEBUG_ADAPTIVE_LOCKS8347__kmp_print_speculative_stats();8348#endif8349#endif8350KMP_INTERNAL_FREE(__kmp_nested_nth.nth);8351__kmp_nested_nth.nth = NULL;8352__kmp_nested_nth.size = 0;8353__kmp_nested_nth.used = 0;83548355KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);8356__kmp_nested_proc_bind.bind_types = NULL;8357__kmp_nested_proc_bind.size = 0;8358__kmp_nested_proc_bind.used = 0;8359if (__kmp_affinity_format) {8360KMP_INTERNAL_FREE(__kmp_affinity_format);8361__kmp_affinity_format = NULL;8362}83638364__kmp_i18n_catclose();83658366#if KMP_USE_HIER_SCHED8367__kmp_hier_scheds.deallocate();8368#endif83698370#if KMP_STATS_ENABLED8371__kmp_stats_fini();8372#endif83738374KA_TRACE(10, ("__kmp_cleanup: exit\n"));8375}83768377/* ------------------------------------------------------------------------ */83788379int __kmp_ignore_mppbeg(void) {8380char *env;83818382if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) {8383if (__kmp_str_match_false(env))8384return FALSE;8385}8386// By default __kmpc_begin() is no-op.8387return TRUE;8388}83898390int __kmp_ignore_mppend(void) {8391char *env;83928393if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) {8394if (__kmp_str_match_false(env))8395return FALSE;8396}8397// By default __kmpc_end() is no-op.8398return TRUE;8399}84008401void __kmp_internal_begin(void) {8402int gtid;8403kmp_root_t *root;84048405/* this is a very important step as it will register new sibling threads8406and assign these new uber threads a new gtid */8407gtid = __kmp_entry_gtid();8408root = __kmp_threads[gtid]->th.th_root;8409KMP_ASSERT(KMP_UBER_GTID(gtid));84108411if (root->r.r_begin)8412return;8413__kmp_acquire_lock(&root->r.r_begin_lock, gtid);8414if (root->r.r_begin) {8415__kmp_release_lock(&root->r.r_begin_lock, gtid);8416return;8417}84188419root->r.r_begin = TRUE;84208421__kmp_release_lock(&root->r.r_begin_lock, gtid);8422}84238424/* ------------------------------------------------------------------------ */84258426void __kmp_user_set_library(enum library_type arg) {8427int gtid;8428kmp_root_t *root;8429kmp_info_t *thread;84308431/* first, make sure we are initialized so we can get our gtid */84328433gtid = __kmp_entry_gtid();8434thread = __kmp_threads[gtid];84358436root = thread->th.th_root;84378438KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,8439library_serial));8440if (root->r.r_in_parallel) { /* Must be called in serial section of top-level8441thread */8442KMP_WARNING(SetLibraryIncorrectCall);8443return;8444}84458446switch (arg) {8447case library_serial:8448thread->th.th_set_nproc = 0;8449set__nproc(thread, 1);8450break;8451case library_turnaround:8452thread->th.th_set_nproc = 0;8453set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth8454: __kmp_dflt_team_nth_ub);8455break;8456case library_throughput:8457thread->th.th_set_nproc = 0;8458set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth8459: __kmp_dflt_team_nth_ub);8460break;8461default:8462KMP_FATAL(UnknownLibraryType, arg);8463}84648465__kmp_aux_set_library(arg);8466}84678468void __kmp_aux_set_stacksize(size_t arg) {8469if (!__kmp_init_serial)8470__kmp_serial_initialize();84718472#if KMP_OS_DARWIN8473if (arg & (0x1000 - 1)) {8474arg &= ~(0x1000 - 1);8475if (arg + 0x1000) /* check for overflow if we round up */8476arg += 0x1000;8477}8478#endif8479__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);84808481/* only change the default stacksize before the first parallel region */8482if (!TCR_4(__kmp_init_parallel)) {8483size_t value = arg; /* argument is in bytes */84848485if (value < __kmp_sys_min_stksize)8486value = __kmp_sys_min_stksize;8487else if (value > KMP_MAX_STKSIZE)8488value = KMP_MAX_STKSIZE;84898490__kmp_stksize = value;84918492__kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */8493}84948495__kmp_release_bootstrap_lock(&__kmp_initz_lock);8496}84978498/* set the behaviour of the runtime library */8499/* TODO this can cause some odd behaviour with sibling parallelism... */8500void __kmp_aux_set_library(enum library_type arg) {8501__kmp_library = arg;85028503switch (__kmp_library) {8504case library_serial: {8505KMP_INFORM(LibraryIsSerial);8506} break;8507case library_turnaround:8508if (__kmp_use_yield == 1 && !__kmp_use_yield_exp_set)8509__kmp_use_yield = 2; // only yield when oversubscribed8510break;8511case library_throughput:8512if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)8513__kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;8514break;8515default:8516KMP_FATAL(UnknownLibraryType, arg);8517}8518}85198520/* Getting team information common for all team API */8521// Returns NULL if not in teams construct8522static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {8523kmp_info_t *thr = __kmp_entry_thread();8524teams_serialized = 0;8525if (thr->th.th_teams_microtask) {8526kmp_team_t *team = thr->th.th_team;8527int tlevel = thr->th.th_teams_level; // the level of the teams construct8528int ii = team->t.t_level;8529teams_serialized = team->t.t_serialized;8530int level = tlevel + 1;8531KMP_DEBUG_ASSERT(ii >= tlevel);8532while (ii > level) {8533for (teams_serialized = team->t.t_serialized;8534(teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {8535}8536if (team->t.t_serialized && (!teams_serialized)) {8537team = team->t.t_parent;8538continue;8539}8540if (ii > level) {8541team = team->t.t_parent;8542ii--;8543}8544}8545return team;8546}8547return NULL;8548}85498550int __kmp_aux_get_team_num() {8551int serialized;8552kmp_team_t *team = __kmp_aux_get_team_info(serialized);8553if (team) {8554if (serialized > 1) {8555return 0; // teams region is serialized ( 1 team of 1 thread ).8556} else {8557return team->t.t_master_tid;8558}8559}8560return 0;8561}85628563int __kmp_aux_get_num_teams() {8564int serialized;8565kmp_team_t *team = __kmp_aux_get_team_info(serialized);8566if (team) {8567if (serialized > 1) {8568return 1;8569} else {8570return team->t.t_parent->t.t_nproc;8571}8572}8573return 1;8574}85758576/* ------------------------------------------------------------------------ */85778578/*8579* Affinity Format Parser8580*8581* Field is in form of: %[[[0].]size]type8582* % and type are required (%% means print a literal '%')8583* type is either single char or long name surrounded by {},8584* e.g., N or {num_threads}8585* 0 => leading zeros8586* . => right justified when size is specified8587* by default output is left justified8588* size is the *minimum* field length8589* All other characters are printed as is8590*8591* Available field types:8592* L {thread_level} - omp_get_level()8593* n {thread_num} - omp_get_thread_num()8594* h {host} - name of host machine8595* P {process_id} - process id (integer)8596* T {thread_identifier} - native thread identifier (integer)8597* N {num_threads} - omp_get_num_threads()8598* A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)8599* a {thread_affinity} - comma separated list of integers or integer ranges8600* (values of affinity mask)8601*8602* Implementation-specific field types can be added8603* If a type is unknown, print "undefined"8604*/86058606// Structure holding the short name, long name, and corresponding data type8607// for snprintf. A table of these will represent the entire valid keyword8608// field types.8609typedef struct kmp_affinity_format_field_t {8610char short_name; // from spec e.g., L -> thread level8611const char *long_name; // from spec thread_level -> thread level8612char field_format; // data type for snprintf (typically 'd' or 's'8613// for integer or string)8614} kmp_affinity_format_field_t;86158616static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {8617#if KMP_AFFINITY_SUPPORTED8618{'A', "thread_affinity", 's'},8619#endif8620{'t', "team_num", 'd'},8621{'T', "num_teams", 'd'},8622{'L', "nesting_level", 'd'},8623{'n', "thread_num", 'd'},8624{'N', "num_threads", 'd'},8625{'a', "ancestor_tnum", 'd'},8626{'H', "host", 's'},8627{'P', "process_id", 'd'},8628{'i', "native_thread_id", 'd'}};86298630// Return the number of characters it takes to hold field8631static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,8632const char **ptr,8633kmp_str_buf_t *field_buffer) {8634int rc, format_index, field_value;8635const char *width_left, *width_right;8636bool pad_zeros, right_justify, parse_long_name, found_valid_name;8637static const int FORMAT_SIZE = 20;8638char format[FORMAT_SIZE] = {0};8639char absolute_short_name = 0;86408641KMP_DEBUG_ASSERT(gtid >= 0);8642KMP_DEBUG_ASSERT(th);8643KMP_DEBUG_ASSERT(**ptr == '%');8644KMP_DEBUG_ASSERT(field_buffer);86458646__kmp_str_buf_clear(field_buffer);86478648// Skip the initial %8649(*ptr)++;86508651// Check for %% first8652if (**ptr == '%') {8653__kmp_str_buf_cat(field_buffer, "%", 1);8654(*ptr)++; // skip over the second %8655return 1;8656}86578658// Parse field modifiers if they are present8659pad_zeros = false;8660if (**ptr == '0') {8661pad_zeros = true;8662(*ptr)++; // skip over 08663}8664right_justify = false;8665if (**ptr == '.') {8666right_justify = true;8667(*ptr)++; // skip over .8668}8669// Parse width of field: [width_left, width_right)8670width_left = width_right = NULL;8671if (**ptr >= '0' && **ptr <= '9') {8672width_left = *ptr;8673SKIP_DIGITS(*ptr);8674width_right = *ptr;8675}86768677// Create the format for KMP_SNPRINTF based on flags parsed above8678format_index = 0;8679format[format_index++] = '%';8680if (!right_justify)8681format[format_index++] = '-';8682if (pad_zeros)8683format[format_index++] = '0';8684if (width_left && width_right) {8685int i = 0;8686// Only allow 8 digit number widths.8687// This also prevents overflowing format variable8688while (i < 8 && width_left < width_right) {8689format[format_index++] = *width_left;8690width_left++;8691i++;8692}8693}86948695// Parse a name (long or short)8696// Canonicalize the name into absolute_short_name8697found_valid_name = false;8698parse_long_name = (**ptr == '{');8699if (parse_long_name)8700(*ptr)++; // skip initial left brace8701for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /8702sizeof(__kmp_affinity_format_table[0]);8703++i) {8704char short_name = __kmp_affinity_format_table[i].short_name;8705const char *long_name = __kmp_affinity_format_table[i].long_name;8706char field_format = __kmp_affinity_format_table[i].field_format;8707if (parse_long_name) {8708size_t length = KMP_STRLEN(long_name);8709if (strncmp(*ptr, long_name, length) == 0) {8710found_valid_name = true;8711(*ptr) += length; // skip the long name8712}8713} else if (**ptr == short_name) {8714found_valid_name = true;8715(*ptr)++; // skip the short name8716}8717if (found_valid_name) {8718format[format_index++] = field_format;8719format[format_index++] = '\0';8720absolute_short_name = short_name;8721break;8722}8723}8724if (parse_long_name) {8725if (**ptr != '}') {8726absolute_short_name = 0;8727} else {8728(*ptr)++; // skip over the right brace8729}8730}87318732// Attempt to fill the buffer with the requested8733// value using snprintf within __kmp_str_buf_print()8734switch (absolute_short_name) {8735case 't':8736rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());8737break;8738case 'T':8739rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());8740break;8741case 'L':8742rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);8743break;8744case 'n':8745rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));8746break;8747case 'H': {8748static const int BUFFER_SIZE = 256;8749char buf[BUFFER_SIZE];8750__kmp_expand_host_name(buf, BUFFER_SIZE);8751rc = __kmp_str_buf_print(field_buffer, format, buf);8752} break;8753case 'P':8754rc = __kmp_str_buf_print(field_buffer, format, getpid());8755break;8756case 'i':8757rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());8758break;8759case 'N':8760rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);8761break;8762case 'a':8763field_value =8764__kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);8765rc = __kmp_str_buf_print(field_buffer, format, field_value);8766break;8767#if KMP_AFFINITY_SUPPORTED8768case 'A': {8769kmp_str_buf_t buf;8770__kmp_str_buf_init(&buf);8771__kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);8772rc = __kmp_str_buf_print(field_buffer, format, buf.str);8773__kmp_str_buf_free(&buf);8774} break;8775#endif8776default:8777// According to spec, If an implementation does not have info for field8778// type, then "undefined" is printed8779rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");8780// Skip the field8781if (parse_long_name) {8782SKIP_TOKEN(*ptr);8783if (**ptr == '}')8784(*ptr)++;8785} else {8786(*ptr)++;8787}8788}87898790KMP_ASSERT(format_index <= FORMAT_SIZE);8791return rc;8792}87938794/*8795* Return number of characters needed to hold the affinity string8796* (not including null byte character)8797* The resultant string is printed to buffer, which the caller can then8798* handle afterwards8799*/8800size_t __kmp_aux_capture_affinity(int gtid, const char *format,8801kmp_str_buf_t *buffer) {8802const char *parse_ptr;8803size_t retval;8804const kmp_info_t *th;8805kmp_str_buf_t field;88068807KMP_DEBUG_ASSERT(buffer);8808KMP_DEBUG_ASSERT(gtid >= 0);88098810__kmp_str_buf_init(&field);8811__kmp_str_buf_clear(buffer);88128813th = __kmp_threads[gtid];8814retval = 0;88158816// If format is NULL or zero-length string, then we use8817// affinity-format-var ICV8818parse_ptr = format;8819if (parse_ptr == NULL || *parse_ptr == '\0') {8820parse_ptr = __kmp_affinity_format;8821}8822KMP_DEBUG_ASSERT(parse_ptr);88238824while (*parse_ptr != '\0') {8825// Parse a field8826if (*parse_ptr == '%') {8827// Put field in the buffer8828int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);8829__kmp_str_buf_catbuf(buffer, &field);8830retval += rc;8831} else {8832// Put literal character in buffer8833__kmp_str_buf_cat(buffer, parse_ptr, 1);8834retval++;8835parse_ptr++;8836}8837}8838__kmp_str_buf_free(&field);8839return retval;8840}88418842// Displays the affinity string to stdout8843void __kmp_aux_display_affinity(int gtid, const char *format) {8844kmp_str_buf_t buf;8845__kmp_str_buf_init(&buf);8846__kmp_aux_capture_affinity(gtid, format, &buf);8847__kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);8848__kmp_str_buf_free(&buf);8849}88508851/* ------------------------------------------------------------------------ */8852void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {8853int blocktime = arg; /* argument is in microseconds */8854#if KMP_USE_MONITOR8855int bt_intervals;8856#endif8857kmp_int8 bt_set;88588859__kmp_save_internal_controls(thread);88608861/* Normalize and set blocktime for the teams */8862if (blocktime < KMP_MIN_BLOCKTIME)8863blocktime = KMP_MIN_BLOCKTIME;8864else if (blocktime > KMP_MAX_BLOCKTIME)8865blocktime = KMP_MAX_BLOCKTIME;88668867set__blocktime_team(thread->th.th_team, tid, blocktime);8868set__blocktime_team(thread->th.th_serial_team, 0, blocktime);88698870#if KMP_USE_MONITOR8871/* Calculate and set blocktime intervals for the teams */8872bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);88738874set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);8875set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);8876#endif88778878/* Set whether blocktime has been set to "TRUE" */8879bt_set = TRUE;88808881set__bt_set_team(thread->th.th_team, tid, bt_set);8882set__bt_set_team(thread->th.th_serial_team, 0, bt_set);8883#if KMP_USE_MONITOR8884KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, "8885"bt_intervals=%d, monitor_updates=%d\n",8886__kmp_gtid_from_tid(tid, thread->th.th_team),8887thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,8888__kmp_monitor_wakeups));8889#else8890KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",8891__kmp_gtid_from_tid(tid, thread->th.th_team),8892thread->th.th_team->t.t_id, tid, blocktime));8893#endif8894}88958896void __kmp_aux_set_defaults(char const *str, size_t len) {8897if (!__kmp_init_serial) {8898__kmp_serial_initialize();8899}8900__kmp_env_initialize(str);89018902if (__kmp_settings || __kmp_display_env || __kmp_display_env_verbose) {8903__kmp_env_print();8904}8905} // __kmp_aux_set_defaults89068907/* ------------------------------------------------------------------------ */8908/* internal fast reduction routines */89098910PACKED_REDUCTION_METHOD_T8911__kmp_determine_reduction_method(8912ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,8913void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),8914kmp_critical_name *lck) {89158916// Default reduction method: critical construct ( lck != NULL, like in current8917// PAROPT )8918// If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method8919// can be selected by RTL8920// If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method8921// can be selected by RTL8922// Finally, it's up to OpenMP RTL to make a decision on which method to select8923// among generated by PAROPT.89248925PACKED_REDUCTION_METHOD_T retval;89268927int team_size;89288929KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 )89308931#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \8932(loc && \8933((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)))8934#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func))89358936retval = critical_reduce_block;89378938// another choice of getting a team size (with 1 dynamic deference) is slower8939team_size = __kmp_get_team_num_threads(global_tid);8940if (team_size == 1) {89418942retval = empty_reduce_block;89438944} else {89458946int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;89478948#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \8949KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \8950KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_WASM89518952#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \8953KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD || \8954KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX89558956int teamsize_cutoff = 4;89578958#if KMP_MIC_SUPPORTED8959if (__kmp_mic_type != non_mic) {8960teamsize_cutoff = 8;8961}8962#endif8963int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;8964if (tree_available) {8965if (team_size <= teamsize_cutoff) {8966if (atomic_available) {8967retval = atomic_reduce_block;8968}8969} else {8970retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;8971}8972} else if (atomic_available) {8973retval = atomic_reduce_block;8974}8975#else8976#error "Unknown or unsupported OS"8977#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||8978// KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD ||8979// KMP_OS_SOLARIS || KMP_OS_WASI || KMP_OS_AIX89808981#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS || \8982KMP_ARCH_WASM || KMP_ARCH_PPC || KMP_ARCH_AARCH64_3289838984#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \8985KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_HURD || KMP_OS_SOLARIS || \8986KMP_OS_WASI || KMP_OS_AIX89878988// basic tuning89898990if (atomic_available) {8991if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ???8992retval = atomic_reduce_block;8993}8994} // otherwise: use critical section89958996#elif KMP_OS_DARWIN89978998int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;8999if (atomic_available && (num_vars <= 3)) {9000retval = atomic_reduce_block;9001} else if (tree_available) {9002if ((reduce_size > (9 * sizeof(kmp_real64))) &&9003(reduce_size < (2000 * sizeof(kmp_real64)))) {9004retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;9005}9006} // otherwise: use critical section90079008#else9009#error "Unknown or unsupported OS"9010#endif90119012#else9013#error "Unknown or unsupported architecture"9014#endif9015}90169017// KMP_FORCE_REDUCTION90189019// If the team is serialized (team_size == 1), ignore the forced reduction9020// method and stay with the unsynchronized method (empty_reduce_block)9021if (__kmp_force_reduction_method != reduction_method_not_defined &&9022team_size != 1) {90239024PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;90259026int atomic_available, tree_available;90279028switch ((forced_retval = __kmp_force_reduction_method)) {9029case critical_reduce_block:9030KMP_ASSERT(lck); // lck should be != 09031break;90329033case atomic_reduce_block:9034atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;9035if (!atomic_available) {9036KMP_WARNING(RedMethodNotSupported, "atomic");9037forced_retval = critical_reduce_block;9038}9039break;90409041case tree_reduce_block:9042tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;9043if (!tree_available) {9044KMP_WARNING(RedMethodNotSupported, "tree");9045forced_retval = critical_reduce_block;9046} else {9047#if KMP_FAST_REDUCTION_BARRIER9048forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;9049#endif9050}9051break;90529053default:9054KMP_ASSERT(0); // "unsupported method specified"9055}90569057retval = forced_retval;9058}90599060KA_TRACE(10, ("reduction method selected=%08x\n", retval));90619062#undef FAST_REDUCTION_TREE_METHOD_GENERATED9063#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED90649065return (retval);9066}9067// this function is for testing set/get/determine reduce method9068kmp_int32 __kmp_get_reduce_method(void) {9069return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);9070}90719072// Soft pause sets up threads to ignore blocktime and just go to sleep.9073// Spin-wait code checks __kmp_pause_status and reacts accordingly.9074void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }90759076// Hard pause shuts down the runtime completely. Resume happens naturally when9077// OpenMP is used subsequently.9078void __kmp_hard_pause() {9079__kmp_pause_status = kmp_hard_paused;9080__kmp_internal_end_thread(-1);9081}90829083// Soft resume sets __kmp_pause_status, and wakes up all threads.9084void __kmp_resume_if_soft_paused() {9085if (__kmp_pause_status == kmp_soft_paused) {9086__kmp_pause_status = kmp_not_paused;90879088for (int gtid = 1; gtid < __kmp_threads_capacity; ++gtid) {9089kmp_info_t *thread = __kmp_threads[gtid];9090if (thread) { // Wake it if sleeping9091kmp_flag_64<> fl(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go,9092thread);9093if (fl.is_sleeping())9094fl.resume(gtid);9095else if (__kmp_try_suspend_mx(thread)) { // got suspend lock9096__kmp_unlock_suspend_mx(thread); // unlock it; it won't sleep9097} else { // thread holds the lock and may sleep soon9098do { // until either the thread sleeps, or we can get the lock9099if (fl.is_sleeping()) {9100fl.resume(gtid);9101break;9102} else if (__kmp_try_suspend_mx(thread)) {9103__kmp_unlock_suspend_mx(thread);9104break;9105}9106} while (1);9107}9108}9109}9110}9111}91129113// This function is called via __kmpc_pause_resource. Returns 0 if successful.9114// TODO: add warning messages9115int __kmp_pause_resource(kmp_pause_status_t level) {9116if (level == kmp_not_paused) { // requesting resume9117if (__kmp_pause_status == kmp_not_paused) {9118// error message about runtime not being paused, so can't resume9119return 1;9120} else {9121KMP_DEBUG_ASSERT(__kmp_pause_status == kmp_soft_paused ||9122__kmp_pause_status == kmp_hard_paused);9123__kmp_pause_status = kmp_not_paused;9124return 0;9125}9126} else if (level == kmp_soft_paused) { // requesting soft pause9127if (__kmp_pause_status != kmp_not_paused) {9128// error message about already being paused9129return 1;9130} else {9131__kmp_soft_pause();9132return 0;9133}9134} else if (level == kmp_hard_paused) { // requesting hard pause9135if (__kmp_pause_status != kmp_not_paused) {9136// error message about already being paused9137return 1;9138} else {9139__kmp_hard_pause();9140return 0;9141}9142} else {9143// error message about invalid level9144return 1;9145}9146}91479148void __kmp_omp_display_env(int verbose) {9149__kmp_acquire_bootstrap_lock(&__kmp_initz_lock);9150if (__kmp_init_serial == 0)9151__kmp_do_serial_initialize();9152__kmp_display_env_impl(!verbose, verbose);9153__kmp_release_bootstrap_lock(&__kmp_initz_lock);9154}91559156// The team size is changing, so distributed barrier must be modified9157void __kmp_resize_dist_barrier(kmp_team_t *team, int old_nthreads,9158int new_nthreads) {9159KMP_DEBUG_ASSERT(__kmp_barrier_release_pattern[bs_forkjoin_barrier] ==9160bp_dist_bar);9161kmp_info_t **other_threads = team->t.t_threads;91629163// We want all the workers to stop waiting on the barrier while we adjust the9164// size of the team.9165for (int f = 1; f < old_nthreads; ++f) {9166KMP_DEBUG_ASSERT(other_threads[f] != NULL);9167// Ignore threads that are already inactive or not present in the team9168if (team->t.t_threads[f]->th.th_used_in_team.load() == 0) {9169// teams construct causes thread_limit to get passed in, and some of9170// those could be inactive; just ignore them9171continue;9172}9173// If thread is transitioning still to in_use state, wait for it9174if (team->t.t_threads[f]->th.th_used_in_team.load() == 3) {9175while (team->t.t_threads[f]->th.th_used_in_team.load() == 3)9176KMP_CPU_PAUSE();9177}9178// The thread should be in_use now9179KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 1);9180// Transition to unused state9181team->t.t_threads[f]->th.th_used_in_team.store(2);9182KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 2);9183}9184// Release all the workers9185team->t.b->go_release();91869187KMP_MFENCE();91889189// Workers should see transition status 2 and move to 0; but may need to be9190// woken up first9191int count = old_nthreads - 1;9192while (count > 0) {9193count = old_nthreads - 1;9194for (int f = 1; f < old_nthreads; ++f) {9195if (other_threads[f]->th.th_used_in_team.load() != 0) {9196if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up the workers9197kmp_atomic_flag_64<> *flag = (kmp_atomic_flag_64<> *)CCAST(9198void *, other_threads[f]->th.th_sleep_loc);9199__kmp_atomic_resume_64(other_threads[f]->th.th_info.ds.ds_gtid, flag);9200}9201} else {9202KMP_DEBUG_ASSERT(team->t.t_threads[f]->th.th_used_in_team.load() == 0);9203count--;9204}9205}9206}9207// Now update the barrier size9208team->t.b->update_num_threads(new_nthreads);9209team->t.b->go_reset();9210}92119212void __kmp_add_threads_to_team(kmp_team_t *team, int new_nthreads) {9213// Add the threads back to the team9214KMP_DEBUG_ASSERT(team);9215// Threads were paused and pointed at th_used_in_team temporarily during a9216// resize of the team. We're going to set th_used_in_team to 3 to indicate to9217// the thread that it should transition itself back into the team. Then, if9218// blocktime isn't infinite, the thread could be sleeping, so we send a resume9219// to wake it up.9220for (int f = 1; f < new_nthreads; ++f) {9221KMP_DEBUG_ASSERT(team->t.t_threads[f]);9222KMP_COMPARE_AND_STORE_ACQ32(&(team->t.t_threads[f]->th.th_used_in_team), 0,92233);9224if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { // Wake up sleeping threads9225__kmp_resume_32(team->t.t_threads[f]->th.th_info.ds.ds_gtid,9226(kmp_flag_32<false, false> *)NULL);9227}9228}9229// The threads should be transitioning to the team; when they are done, they9230// should have set th_used_in_team to 1. This loop forces master to wait until9231// all threads have moved into the team and are waiting in the barrier.9232int count = new_nthreads - 1;9233while (count > 0) {9234count = new_nthreads - 1;9235for (int f = 1; f < new_nthreads; ++f) {9236if (team->t.t_threads[f]->th.th_used_in_team.load() == 1) {9237count--;9238}9239}9240}9241}92429243// Globals and functions for hidden helper task9244kmp_info_t **__kmp_hidden_helper_threads;9245kmp_info_t *__kmp_hidden_helper_main_thread;9246std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;9247#if KMP_OS_LINUX9248kmp_int32 __kmp_hidden_helper_threads_num = 8;9249kmp_int32 __kmp_enable_hidden_helper = TRUE;9250#else9251kmp_int32 __kmp_hidden_helper_threads_num = 0;9252kmp_int32 __kmp_enable_hidden_helper = FALSE;9253#endif92549255namespace {9256std::atomic<kmp_int32> __kmp_hit_hidden_helper_threads_num;92579258void __kmp_hidden_helper_wrapper_fn(int *gtid, int *, ...) {9259// This is an explicit synchronization on all hidden helper threads in case9260// that when a regular thread pushes a hidden helper task to one hidden9261// helper thread, the thread has not been awaken once since they're released9262// by the main thread after creating the team.9263KMP_ATOMIC_INC(&__kmp_hit_hidden_helper_threads_num);9264while (KMP_ATOMIC_LD_ACQ(&__kmp_hit_hidden_helper_threads_num) !=9265__kmp_hidden_helper_threads_num)9266;92679268// If main thread, then wait for signal9269if (__kmpc_master(nullptr, *gtid)) {9270// First, unset the initial state and release the initial thread9271TCW_4(__kmp_init_hidden_helper_threads, FALSE);9272__kmp_hidden_helper_initz_release();9273__kmp_hidden_helper_main_thread_wait();9274// Now wake up all worker threads9275for (int i = 1; i < __kmp_hit_hidden_helper_threads_num; ++i) {9276__kmp_hidden_helper_worker_thread_signal();9277}9278}9279}9280} // namespace92819282void __kmp_hidden_helper_threads_initz_routine() {9283// Create a new root for hidden helper team/threads9284const int gtid = __kmp_register_root(TRUE);9285__kmp_hidden_helper_main_thread = __kmp_threads[gtid];9286__kmp_hidden_helper_threads = &__kmp_threads[gtid];9287__kmp_hidden_helper_main_thread->th.th_set_nproc =9288__kmp_hidden_helper_threads_num;92899290KMP_ATOMIC_ST_REL(&__kmp_hit_hidden_helper_threads_num, 0);92919292__kmpc_fork_call(nullptr, 0, __kmp_hidden_helper_wrapper_fn);92939294// Set the initialization flag to FALSE9295TCW_SYNC_4(__kmp_init_hidden_helper, FALSE);92969297__kmp_hidden_helper_threads_deinitz_release();9298}92999300/* Nesting Mode:9301Set via KMP_NESTING_MODE, which takes an integer.9302Note: we skip duplicate topology levels, and skip levels with only9303one entity.9304KMP_NESTING_MODE=0 is the default, and doesn't use nesting mode.9305KMP_NESTING_MODE=1 sets as many nesting levels as there are distinct levels9306in the topology, and initializes the number of threads at each of those9307levels to the number of entities at each level, respectively, below the9308entity at the parent level.9309KMP_NESTING_MODE=N, where N>1, attempts to create up to N nesting levels,9310but starts with nesting OFF -- max-active-levels-var is 1 -- and requires9311the user to turn nesting on explicitly. This is an even more experimental9312option to this experimental feature, and may change or go away in the9313future.9314*/93159316// Allocate space to store nesting levels9317void __kmp_init_nesting_mode() {9318int levels = KMP_HW_LAST;9319__kmp_nesting_mode_nlevels = levels;9320__kmp_nesting_nth_level = (int *)KMP_INTERNAL_MALLOC(levels * sizeof(int));9321for (int i = 0; i < levels; ++i)9322__kmp_nesting_nth_level[i] = 0;9323if (__kmp_nested_nth.size < levels) {9324__kmp_nested_nth.nth =9325(int *)KMP_INTERNAL_REALLOC(__kmp_nested_nth.nth, levels * sizeof(int));9326__kmp_nested_nth.size = levels;9327}9328}93299330// Set # threads for top levels of nesting; must be called after topology set9331void __kmp_set_nesting_mode_threads() {9332kmp_info_t *thread = __kmp_threads[__kmp_entry_gtid()];93339334if (__kmp_nesting_mode == 1)9335__kmp_nesting_mode_nlevels = KMP_MAX_ACTIVE_LEVELS_LIMIT;9336else if (__kmp_nesting_mode > 1)9337__kmp_nesting_mode_nlevels = __kmp_nesting_mode;93389339if (__kmp_topology) { // use topology info9340int loc, hw_level;9341for (loc = 0, hw_level = 0; hw_level < __kmp_topology->get_depth() &&9342loc < __kmp_nesting_mode_nlevels;9343loc++, hw_level++) {9344__kmp_nesting_nth_level[loc] = __kmp_topology->get_ratio(hw_level);9345if (__kmp_nesting_nth_level[loc] == 1)9346loc--;9347}9348// Make sure all cores are used9349if (__kmp_nesting_mode > 1 && loc > 1) {9350int core_level = __kmp_topology->get_level(KMP_HW_CORE);9351int num_cores = __kmp_topology->get_count(core_level);9352int upper_levels = 1;9353for (int level = 0; level < loc - 1; ++level)9354upper_levels *= __kmp_nesting_nth_level[level];9355if (upper_levels * __kmp_nesting_nth_level[loc - 1] < num_cores)9356__kmp_nesting_nth_level[loc - 1] =9357num_cores / __kmp_nesting_nth_level[loc - 2];9358}9359__kmp_nesting_mode_nlevels = loc;9360__kmp_nested_nth.used = __kmp_nesting_mode_nlevels;9361} else { // no topology info available; provide a reasonable guesstimation9362if (__kmp_avail_proc >= 4) {9363__kmp_nesting_nth_level[0] = __kmp_avail_proc / 2;9364__kmp_nesting_nth_level[1] = 2;9365__kmp_nesting_mode_nlevels = 2;9366} else {9367__kmp_nesting_nth_level[0] = __kmp_avail_proc;9368__kmp_nesting_mode_nlevels = 1;9369}9370__kmp_nested_nth.used = __kmp_nesting_mode_nlevels;9371}9372for (int i = 0; i < __kmp_nesting_mode_nlevels; ++i) {9373__kmp_nested_nth.nth[i] = __kmp_nesting_nth_level[i];9374}9375set__nproc(thread, __kmp_nesting_nth_level[0]);9376if (__kmp_nesting_mode > 1 && __kmp_nesting_mode_nlevels > __kmp_nesting_mode)9377__kmp_nesting_mode_nlevels = __kmp_nesting_mode;9378if (get__max_active_levels(thread) > 1) {9379// if max levels was set, set nesting mode levels to same9380__kmp_nesting_mode_nlevels = get__max_active_levels(thread);9381}9382if (__kmp_nesting_mode == 1) // turn on nesting for this case only9383set__max_active_levels(thread, __kmp_nesting_mode_nlevels);9384}93859386// Empty symbols to export (see exports_so.txt) when feature is disabled9387extern "C" {9388#if !KMP_STATS_ENABLED9389void __kmp_reset_stats() {}9390#endif9391#if !USE_DEBUGGER9392int __kmp_omp_debug_struct_info = FALSE;9393int __kmp_debugging = FALSE;9394#endif9395#if !USE_ITT_BUILD || !USE_ITT_NOTIFY9396void __kmp_itt_fini_ittlib() {}9397void __kmp_itt_init_ittlib() {}9398#endif9399}94009401// end of file940294039404