Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp.h
35258 views
/*! \file */1/*2* kmp.h -- KPTS runtime header file.3*/45//===----------------------------------------------------------------------===//6//7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.8// See https://llvm.org/LICENSE.txt for license information.9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception10//11//===----------------------------------------------------------------------===//1213#ifndef KMP_H14#define KMP_H1516#include "kmp_config.h"1718/* #define BUILD_PARALLEL_ORDERED 1 */1920/* This fix replaces gettimeofday with clock_gettime for better scalability on21the Altix. Requires user code to be linked with -lrt. */22//#define FIX_SGI_CLOCK2324/* Defines for OpenMP 3.0 tasking and auto scheduling */2526#ifndef KMP_STATIC_STEAL_ENABLED27#define KMP_STATIC_STEAL_ENABLED 128#endif29#define KMP_WEIGHTED_ITERATIONS_SUPPORTED \30(KMP_AFFINITY_SUPPORTED && KMP_STATIC_STEAL_ENABLED && \31(KMP_ARCH_X86 || KMP_ARCH_X86_64))3233#define TASK_CURRENT_NOT_QUEUED 034#define TASK_CURRENT_QUEUED 13536#ifdef BUILD_TIED_TASK_STACK37#define TASK_STACK_EMPTY 0 // entries when the stack is empty38#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK39// Number of entries in each task stack array40#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS)41// Mask for determining index into stack block42#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1)43#endif // BUILD_TIED_TASK_STACK4445#define TASK_NOT_PUSHED 146#define TASK_SUCCESSFULLY_PUSHED 047#define TASK_TIED 148#define TASK_UNTIED 049#define TASK_EXPLICIT 150#define TASK_IMPLICIT 051#define TASK_PROXY 152#define TASK_FULL 053#define TASK_DETACHABLE 154#define TASK_UNDETACHABLE 05556#define KMP_CANCEL_THREADS57#define KMP_THREAD_ATTR5859// Android does not have pthread_cancel. Undefine KMP_CANCEL_THREADS if being60// built on Android61#if defined(__ANDROID__)62#undef KMP_CANCEL_THREADS63#endif6465// Some WASI targets (e.g., wasm32-wasi-threads) do not support thread66// cancellation.67#if KMP_OS_WASI68#undef KMP_CANCEL_THREADS69#endif7071#if !KMP_OS_WASI72#include <signal.h>73#endif74#include <stdarg.h>75#include <stddef.h>76#include <stdio.h>77#include <stdlib.h>78#include <string.h>79#include <limits>80#include <type_traits>81/* include <ctype.h> don't use; problems with /MD on Windows* OS NT due to bad82Microsoft library. Some macros provided below to replace these functions */83#ifndef __ABSOFT_WIN84#include <sys/types.h>85#endif86#include <limits.h>87#include <time.h>8889#include <errno.h>9091#include "kmp_os.h"9293#include "kmp_safe_c_api.h"9495#if KMP_STATS_ENABLED96class kmp_stats_list;97#endif9899#if KMP_USE_HIER_SCHED100// Only include hierarchical scheduling if affinity is supported101#undef KMP_USE_HIER_SCHED102#define KMP_USE_HIER_SCHED KMP_AFFINITY_SUPPORTED103#endif104105// OMPD_SKIP_HWLOC used in libompd/omp-icv.cpp to avoid OMPD depending on hwloc106#if KMP_USE_HWLOC && KMP_AFFINITY_SUPPORTED && !defined(OMPD_SKIP_HWLOC)107#include "hwloc.h"108#ifndef HWLOC_OBJ_NUMANODE109#define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE110#endif111#ifndef HWLOC_OBJ_PACKAGE112#define HWLOC_OBJ_PACKAGE HWLOC_OBJ_SOCKET113#endif114#endif115116#if KMP_ARCH_X86 || KMP_ARCH_X86_64117#include <xmmintrin.h>118#endif119120// The below has to be defined before including "kmp_barrier.h".121#define KMP_INTERNAL_MALLOC(sz) malloc(sz)122#define KMP_INTERNAL_FREE(p) free(p)123#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz))124#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz))125126#include "kmp_debug.h"127#include "kmp_lock.h"128#include "kmp_version.h"129#include "kmp_barrier.h"130#if USE_DEBUGGER131#include "kmp_debugger.h"132#endif133#include "kmp_i18n.h"134135#define KMP_HANDLE_SIGNALS ((KMP_OS_UNIX && !KMP_OS_WASI) || KMP_OS_WINDOWS)136137#include "kmp_wrapper_malloc.h"138#if KMP_OS_UNIX139#include <unistd.h>140#if !defined NSIG && defined _NSIG141#define NSIG _NSIG142#endif143#endif144145#if KMP_OS_LINUX146#pragma weak clock_gettime147#endif148149#if OMPT_SUPPORT150#include "ompt-internal.h"151#endif152153#if OMPD_SUPPORT154#include "ompd-specific.h"155#endif156157#ifndef UNLIKELY158#define UNLIKELY(x) (x)159#endif160161// Affinity format function162#include "kmp_str.h"163164// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.165// 3 - fast allocation using sync, non-sync free lists of any size, non-self166// free lists of limited size.167#ifndef USE_FAST_MEMORY168#define USE_FAST_MEMORY 3169#endif170171#ifndef KMP_NESTED_HOT_TEAMS172#define KMP_NESTED_HOT_TEAMS 0173#define USE_NESTED_HOT_ARG(x)174#else175#if KMP_NESTED_HOT_TEAMS176#define USE_NESTED_HOT_ARG(x) , x177#else178#define USE_NESTED_HOT_ARG(x)179#endif180#endif181182// Assume using BGET compare_exchange instruction instead of lock by default.183#ifndef USE_CMP_XCHG_FOR_BGET184#define USE_CMP_XCHG_FOR_BGET 1185#endif186187// Test to see if queuing lock is better than bootstrap lock for bget188// #ifndef USE_QUEUING_LOCK_FOR_BGET189// #define USE_QUEUING_LOCK_FOR_BGET190// #endif191192#define KMP_NSEC_PER_SEC 1000000000L193#define KMP_USEC_PER_SEC 1000000L194#define KMP_NSEC_PER_USEC 1000L195196/*!197@ingroup BASIC_TYPES198@{199*/200201/*!202Values for bit flags used in the ident_t to describe the fields.203*/204enum {205/*! Use trampoline for internal microtasks */206KMP_IDENT_IMB = 0x01,207/*! Use c-style ident structure */208KMP_IDENT_KMPC = 0x02,209/* 0x04 is no longer used */210/*! Entry point generated by auto-parallelization */211KMP_IDENT_AUTOPAR = 0x08,212/*! Compiler generates atomic reduction option for kmpc_reduce* */213KMP_IDENT_ATOMIC_REDUCE = 0x10,214/*! To mark a 'barrier' directive in user code */215KMP_IDENT_BARRIER_EXPL = 0x20,216/*! To Mark implicit barriers. */217KMP_IDENT_BARRIER_IMPL = 0x0040,218KMP_IDENT_BARRIER_IMPL_MASK = 0x01C0,219KMP_IDENT_BARRIER_IMPL_FOR = 0x0040,220KMP_IDENT_BARRIER_IMPL_SECTIONS = 0x00C0,221222KMP_IDENT_BARRIER_IMPL_SINGLE = 0x0140,223KMP_IDENT_BARRIER_IMPL_WORKSHARE = 0x01C0,224225/*! To mark a static loop in OMPT callbacks */226KMP_IDENT_WORK_LOOP = 0x200,227/*! To mark a sections directive in OMPT callbacks */228KMP_IDENT_WORK_SECTIONS = 0x400,229/*! To mark a distribute construct in OMPT callbacks */230KMP_IDENT_WORK_DISTRIBUTE = 0x800,231/*! Atomic hint; bottom four bits as omp_sync_hint_t. Top four reserved and232not currently used. If one day we need more bits, then we can use233an invalid combination of hints to mean that another, larger field234should be used in a different flag. */235KMP_IDENT_ATOMIC_HINT_MASK = 0xFF0000,236KMP_IDENT_ATOMIC_HINT_UNCONTENDED = 0x010000,237KMP_IDENT_ATOMIC_HINT_CONTENDED = 0x020000,238KMP_IDENT_ATOMIC_HINT_NONSPECULATIVE = 0x040000,239KMP_IDENT_ATOMIC_HINT_SPECULATIVE = 0x080000,240KMP_IDENT_OPENMP_SPEC_VERSION_MASK = 0xFF000000241};242243/*!244* The ident structure that describes a source location.245*/246typedef struct ident {247kmp_int32 reserved_1; /**< might be used in Fortran; see above */248kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC249identifies this union member */250kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */251#if USE_ITT_BUILD252/* but currently used for storing region-specific ITT */253/* contextual information. */254#endif /* USE_ITT_BUILD */255kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */256char const *psource; /**< String describing the source location.257The string is composed of semi-colon separated fields258which describe the source file, the function and a pair259of line numbers that delimit the construct. */260// Returns the OpenMP version in form major*10+minor (e.g., 50 for 5.0)261kmp_int32 get_openmp_version() {262return (((flags & KMP_IDENT_OPENMP_SPEC_VERSION_MASK) >> 24) & 0xFF);263}264} ident_t;265/*!266@}267*/268269// Some forward declarations.270typedef union kmp_team kmp_team_t;271typedef struct kmp_taskdata kmp_taskdata_t;272typedef union kmp_task_team kmp_task_team_t;273typedef union kmp_team kmp_team_p;274typedef union kmp_info kmp_info_p;275typedef union kmp_root kmp_root_p;276277template <bool C = false, bool S = true> class kmp_flag_32;278template <bool C = false, bool S = true> class kmp_flag_64;279template <bool C = false, bool S = true> class kmp_atomic_flag_64;280class kmp_flag_oncore;281282#ifdef __cplusplus283extern "C" {284#endif285286/* ------------------------------------------------------------------------ */287288/* Pack two 32-bit signed integers into a 64-bit signed integer */289/* ToDo: Fix word ordering for big-endian machines. */290#define KMP_PACK_64(HIGH_32, LOW_32) \291((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32)))292293// Generic string manipulation macros. Assume that _x is of type char *294#define SKIP_WS(_x) \295{ \296while (*(_x) == ' ' || *(_x) == '\t') \297(_x)++; \298}299#define SKIP_DIGITS(_x) \300{ \301while (*(_x) >= '0' && *(_x) <= '9') \302(_x)++; \303}304#define SKIP_TOKEN(_x) \305{ \306while ((*(_x) >= '0' && *(_x) <= '9') || (*(_x) >= 'a' && *(_x) <= 'z') || \307(*(_x) >= 'A' && *(_x) <= 'Z') || *(_x) == '_') \308(_x)++; \309}310#define SKIP_TO(_x, _c) \311{ \312while (*(_x) != '\0' && *(_x) != (_c)) \313(_x)++; \314}315316/* ------------------------------------------------------------------------ */317318#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y))319#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))320321/* ------------------------------------------------------------------------ */322/* Enumeration types */323324enum kmp_state_timer {325ts_stop,326ts_start,327ts_pause,328329ts_last_state330};331332enum dynamic_mode {333dynamic_default,334#ifdef USE_LOAD_BALANCE335dynamic_load_balance,336#endif /* USE_LOAD_BALANCE */337dynamic_random,338dynamic_thread_limit,339dynamic_max340};341342/* external schedule constants, duplicate enum omp_sched in omp.h in order to343* not include it here */344#ifndef KMP_SCHED_TYPE_DEFINED345#define KMP_SCHED_TYPE_DEFINED346typedef enum kmp_sched {347kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check348// Note: need to adjust __kmp_sch_map global array in case enum is changed349kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33)350kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35)351kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36)352kmp_sched_auto = 4, // mapped to kmp_sch_auto (38)353kmp_sched_upper_std = 5, // upper bound for standard schedules354kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules355kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39)356#if KMP_STATIC_STEAL_ENABLED357kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)358#endif359kmp_sched_upper,360kmp_sched_default = kmp_sched_static, // default scheduling361kmp_sched_monotonic = 0x80000000362} kmp_sched_t;363#endif364365/*!366@ingroup WORK_SHARING367* Describes the loop schedule to be used for a parallel for loop.368*/369enum sched_type : kmp_int32 {370kmp_sch_lower = 32, /**< lower bound for unordered values */371kmp_sch_static_chunked = 33,372kmp_sch_static = 34, /**< static unspecialized */373kmp_sch_dynamic_chunked = 35,374kmp_sch_guided_chunked = 36, /**< guided unspecialized */375kmp_sch_runtime = 37,376kmp_sch_auto = 38, /**< auto */377kmp_sch_trapezoidal = 39,378379/* accessible only through KMP_SCHEDULE environment variable */380kmp_sch_static_greedy = 40,381kmp_sch_static_balanced = 41,382/* accessible only through KMP_SCHEDULE environment variable */383kmp_sch_guided_iterative_chunked = 42,384kmp_sch_guided_analytical_chunked = 43,385/* accessible only through KMP_SCHEDULE environment variable */386kmp_sch_static_steal = 44,387388/* static with chunk adjustment (e.g., simd) */389kmp_sch_static_balanced_chunked = 45,390kmp_sch_guided_simd = 46, /**< guided with chunk adjustment */391kmp_sch_runtime_simd = 47, /**< runtime with chunk adjustment */392393/* accessible only through KMP_SCHEDULE environment variable */394kmp_sch_upper, /**< upper bound for unordered values */395396kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */397kmp_ord_static_chunked = 65,398kmp_ord_static = 66, /**< ordered static unspecialized */399kmp_ord_dynamic_chunked = 67,400kmp_ord_guided_chunked = 68,401kmp_ord_runtime = 69,402kmp_ord_auto = 70, /**< ordered auto */403kmp_ord_trapezoidal = 71,404kmp_ord_upper, /**< upper bound for ordered values */405406/* Schedules for Distribute construct */407kmp_distribute_static_chunked = 91, /**< distribute static chunked */408kmp_distribute_static = 92, /**< distribute static unspecialized */409410/* For the "nomerge" versions, kmp_dispatch_next*() will always return a411single iteration/chunk, even if the loop is serialized. For the schedule412types listed above, the entire iteration vector is returned if the loop is413serialized. This doesn't work for gcc/gcomp sections. */414kmp_nm_lower = 160, /**< lower bound for nomerge values */415416kmp_nm_static_chunked =417(kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower),418kmp_nm_static = 162, /**< static unspecialized */419kmp_nm_dynamic_chunked = 163,420kmp_nm_guided_chunked = 164, /**< guided unspecialized */421kmp_nm_runtime = 165,422kmp_nm_auto = 166, /**< auto */423kmp_nm_trapezoidal = 167,424425/* accessible only through KMP_SCHEDULE environment variable */426kmp_nm_static_greedy = 168,427kmp_nm_static_balanced = 169,428/* accessible only through KMP_SCHEDULE environment variable */429kmp_nm_guided_iterative_chunked = 170,430kmp_nm_guided_analytical_chunked = 171,431kmp_nm_static_steal =432172, /* accessible only through OMP_SCHEDULE environment variable */433434kmp_nm_ord_static_chunked = 193,435kmp_nm_ord_static = 194, /**< ordered static unspecialized */436kmp_nm_ord_dynamic_chunked = 195,437kmp_nm_ord_guided_chunked = 196,438kmp_nm_ord_runtime = 197,439kmp_nm_ord_auto = 198, /**< auto */440kmp_nm_ord_trapezoidal = 199,441kmp_nm_upper, /**< upper bound for nomerge values */442443/* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since444we need to distinguish the three possible cases (no modifier, monotonic445modifier, nonmonotonic modifier), we need separate bits for each modifier.446The absence of monotonic does not imply nonmonotonic, especially since 4.5447says that the behaviour of the "no modifier" case is implementation defined448in 4.5, but will become "nonmonotonic" in 5.0.449450Since we're passing a full 32 bit value, we can use a couple of high bits451for these flags; out of paranoia we avoid the sign bit.452453These modifiers can be or-ed into non-static schedules by the compiler to454pass the additional information. They will be stripped early in the455processing in __kmp_dispatch_init when setting up schedules, so most of the456code won't ever see schedules with these bits set. */457kmp_sch_modifier_monotonic =458(1 << 29), /**< Set if the monotonic schedule modifier was present */459kmp_sch_modifier_nonmonotonic =460(1 << 30), /**< Set if the nonmonotonic schedule modifier was present */461462#define SCHEDULE_WITHOUT_MODIFIERS(s) \463(enum sched_type)( \464(s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic))465#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0)466#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)467#define SCHEDULE_HAS_NO_MODIFIERS(s) \468(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)469#define SCHEDULE_GET_MODIFIERS(s) \470((enum sched_type)( \471(s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))472#define SCHEDULE_SET_MODIFIERS(s, m) \473(s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))474#define SCHEDULE_NONMONOTONIC 0475#define SCHEDULE_MONOTONIC 1476477kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */478};479480// Apply modifiers on internal kind to standard kind481static inline void482__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,483enum sched_type internal_kind) {484if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {485*kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);486}487}488489// Apply modifiers on standard kind to internal kind490static inline void491__kmp_sched_apply_mods_intkind(kmp_sched_t kind,492enum sched_type *internal_kind) {493if ((int)kind & (int)kmp_sched_monotonic) {494*internal_kind = (enum sched_type)((int)*internal_kind |495(int)kmp_sch_modifier_monotonic);496}497}498499// Get standard schedule without modifiers500static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {501return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));502}503504/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */505typedef union kmp_r_sched {506struct {507enum sched_type r_sched_type;508int chunk;509};510kmp_int64 sched;511} kmp_r_sched_t;512513extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our514// internal schedule types515516enum library_type {517library_none,518library_serial,519library_turnaround,520library_throughput521};522523#if KMP_OS_LINUX524enum clock_function_type {525clock_function_gettimeofday,526clock_function_clock_gettime527};528#endif /* KMP_OS_LINUX */529530#if KMP_MIC_SUPPORTED531enum mic_type { non_mic, mic1, mic2, mic3, dummy };532#endif533534// OpenMP 3.1 - Nested num threads array535typedef struct kmp_nested_nthreads_t {536int *nth;537int size;538int used;539} kmp_nested_nthreads_t;540541extern kmp_nested_nthreads_t __kmp_nested_nth;542543/* -- fast reduction stuff ------------------------------------------------ */544545#undef KMP_FAST_REDUCTION_BARRIER546#define KMP_FAST_REDUCTION_BARRIER 1547548#undef KMP_FAST_REDUCTION_CORE_DUO549#if KMP_ARCH_X86 || KMP_ARCH_X86_64550#define KMP_FAST_REDUCTION_CORE_DUO 1551#endif552553enum _reduction_method {554reduction_method_not_defined = 0,555critical_reduce_block = (1 << 8),556atomic_reduce_block = (2 << 8),557tree_reduce_block = (3 << 8),558empty_reduce_block = (4 << 8)559};560561// Description of the packed_reduction_method variable:562// The packed_reduction_method variable consists of two enum types variables563// that are packed together into 0-th byte and 1-st byte:564// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of565// barrier that will be used in fast reduction: bs_plain_barrier or566// bs_reduction_barrier567// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will568// be used in fast reduction;569// Reduction method is of 'enum _reduction_method' type and it's defined the way570// so that the bits of 0-th byte are empty, so no need to execute a shift571// instruction while packing/unpacking572573#if KMP_FAST_REDUCTION_BARRIER574#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \575((reduction_method) | (barrier_type))576577#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \578((enum _reduction_method)((packed_reduction_method) & (0x0000FF00)))579580#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \581((enum barrier_type)((packed_reduction_method) & (0x000000FF)))582#else583#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \584(reduction_method)585586#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \587(packed_reduction_method)588589#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier)590#endif591592#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \593((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \594(which_reduction_block))595596#if KMP_FAST_REDUCTION_BARRIER597#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \598(PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier))599600#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \601(PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier))602#endif603604typedef int PACKED_REDUCTION_METHOD_T;605606/* -- end of fast reduction stuff ----------------------------------------- */607608#if KMP_OS_WINDOWS609#define USE_CBLKDATA610#if KMP_MSVC_COMPAT611#pragma warning(push)612#pragma warning(disable : 271 310)613#endif614#include <windows.h>615#if KMP_MSVC_COMPAT616#pragma warning(pop)617#endif618#endif619620#if KMP_OS_UNIX621#if !KMP_OS_WASI622#include <dlfcn.h>623#endif624#include <pthread.h>625#endif626627enum kmp_hw_t : int {628KMP_HW_UNKNOWN = -1,629KMP_HW_SOCKET = 0,630KMP_HW_PROC_GROUP,631KMP_HW_NUMA,632KMP_HW_DIE,633KMP_HW_LLC,634KMP_HW_L3,635KMP_HW_TILE,636KMP_HW_MODULE,637KMP_HW_L2,638KMP_HW_L1,639KMP_HW_CORE,640KMP_HW_THREAD,641KMP_HW_LAST642};643644typedef enum kmp_hw_core_type_t {645KMP_HW_CORE_TYPE_UNKNOWN = 0x0,646#if KMP_ARCH_X86 || KMP_ARCH_X86_64647KMP_HW_CORE_TYPE_ATOM = 0x20,648KMP_HW_CORE_TYPE_CORE = 0x40,649KMP_HW_MAX_NUM_CORE_TYPES = 3,650#else651KMP_HW_MAX_NUM_CORE_TYPES = 1,652#endif653} kmp_hw_core_type_t;654655#define KMP_HW_MAX_NUM_CORE_EFFS 8656657#define KMP_DEBUG_ASSERT_VALID_HW_TYPE(type) \658KMP_DEBUG_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)659#define KMP_ASSERT_VALID_HW_TYPE(type) \660KMP_ASSERT(type >= (kmp_hw_t)0 && type < KMP_HW_LAST)661662#define KMP_FOREACH_HW_TYPE(type) \663for (kmp_hw_t type = (kmp_hw_t)0; type < KMP_HW_LAST; \664type = (kmp_hw_t)((int)type + 1))665666const char *__kmp_hw_get_keyword(kmp_hw_t type, bool plural = false);667const char *__kmp_hw_get_catalog_string(kmp_hw_t type, bool plural = false);668const char *__kmp_hw_get_core_type_string(kmp_hw_core_type_t type);669670/* Only Linux* OS and Windows* OS support thread affinity. */671#if KMP_AFFINITY_SUPPORTED672673// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).674#if KMP_OS_WINDOWS675#if _MSC_VER < 1600 && KMP_MSVC_COMPAT676typedef struct GROUP_AFFINITY {677KAFFINITY Mask;678WORD Group;679WORD Reserved[3];680} GROUP_AFFINITY;681#endif /* _MSC_VER < 1600 */682#if KMP_GROUP_AFFINITY683extern int __kmp_num_proc_groups;684#else685static const int __kmp_num_proc_groups = 1;686#endif /* KMP_GROUP_AFFINITY */687typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD);688extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount;689690typedef WORD (*kmp_GetActiveProcessorGroupCount_t)(void);691extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount;692693typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *);694extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity;695696typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *,697GROUP_AFFINITY *);698extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;699#endif /* KMP_OS_WINDOWS */700701#if KMP_USE_HWLOC && !defined(OMPD_SKIP_HWLOC)702extern hwloc_topology_t __kmp_hwloc_topology;703extern int __kmp_hwloc_error;704#endif705706extern size_t __kmp_affin_mask_size;707#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0)708#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0)709#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)710#define KMP_CPU_SET_ITERATE(i, mask) \711for (i = (mask)->begin(); (int)i != (mask)->end(); i = (mask)->next(i))712#define KMP_CPU_SET(i, mask) (mask)->set(i)713#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i)714#define KMP_CPU_CLR(i, mask) (mask)->clear(i)715#define KMP_CPU_ZERO(mask) (mask)->zero()716#define KMP_CPU_ISEMPTY(mask) (mask)->empty()717#define KMP_CPU_COPY(dest, src) (dest)->copy(src)718#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src)719#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not()720#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src)721#define KMP_CPU_EQUAL(dest, src) (dest)->is_equal(src)722#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask())723#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr)724#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)725#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)726#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)727#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)728#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i)729#define KMP_CPU_ALLOC_ARRAY(arr, n) \730(arr = __kmp_affinity_dispatch->allocate_mask_array(n))731#define KMP_CPU_FREE_ARRAY(arr, n) \732__kmp_affinity_dispatch->deallocate_mask_array(arr)733#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n)734#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n)735#define __kmp_get_system_affinity(mask, abort_bool) \736(mask)->get_system_affinity(abort_bool)737#define __kmp_set_system_affinity(mask, abort_bool) \738(mask)->set_system_affinity(abort_bool)739#define __kmp_get_proc_group(mask) (mask)->get_proc_group()740741class KMPAffinity {742public:743class Mask {744public:745void *operator new(size_t n);746void operator delete(void *p);747void *operator new[](size_t n);748void operator delete[](void *p);749virtual ~Mask() {}750// Set bit i to 1751virtual void set(int i) {}752// Return bit i753virtual bool is_set(int i) const { return false; }754// Set bit i to 0755virtual void clear(int i) {}756// Zero out entire mask757virtual void zero() {}758// Check whether mask is empty759virtual bool empty() const { return true; }760// Copy src into this mask761virtual void copy(const Mask *src) {}762// this &= rhs763virtual void bitwise_and(const Mask *rhs) {}764// this |= rhs765virtual void bitwise_or(const Mask *rhs) {}766// this = ~this767virtual void bitwise_not() {}768// this == rhs769virtual bool is_equal(const Mask *rhs) const { return false; }770// API for iterating over an affinity mask771// for (int i = mask->begin(); i != mask->end(); i = mask->next(i))772virtual int begin() const { return 0; }773virtual int end() const { return 0; }774virtual int next(int previous) const { return 0; }775#if KMP_OS_WINDOWS776virtual int set_process_affinity(bool abort_on_error) const { return -1; }777#endif778// Set the system's affinity to this affinity mask's value779virtual int set_system_affinity(bool abort_on_error) const { return -1; }780// Set this affinity mask to the current system affinity781virtual int get_system_affinity(bool abort_on_error) { return -1; }782// Only 1 DWORD in the mask should have any procs set.783// Return the appropriate index, or -1 for an invalid mask.784virtual int get_proc_group() const { return -1; }785int get_max_cpu() const {786int cpu;787int max_cpu = -1;788KMP_CPU_SET_ITERATE(cpu, this) {789if (cpu > max_cpu)790max_cpu = cpu;791}792return max_cpu;793}794};795void *operator new(size_t n);796void operator delete(void *p);797// Need virtual destructor798virtual ~KMPAffinity() = default;799// Determine if affinity is capable800virtual void determine_capable(const char *env_var) {}801// Bind the current thread to os proc802virtual void bind_thread(int proc) {}803// Factory functions to allocate/deallocate a mask804virtual Mask *allocate_mask() { return nullptr; }805virtual void deallocate_mask(Mask *m) {}806virtual Mask *allocate_mask_array(int num) { return nullptr; }807virtual void deallocate_mask_array(Mask *m) {}808virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; }809static void pick_api();810static void destroy_api();811enum api_type {812NATIVE_OS813#if KMP_USE_HWLOC814,815HWLOC816#endif817};818virtual api_type get_api_type() const {819KMP_ASSERT(0);820return NATIVE_OS;821}822823private:824static bool picked_api;825};826827typedef KMPAffinity::Mask kmp_affin_mask_t;828extern KMPAffinity *__kmp_affinity_dispatch;829830#ifndef KMP_OS_AIX831class kmp_affinity_raii_t {832kmp_affin_mask_t *mask;833bool restored;834835public:836kmp_affinity_raii_t(const kmp_affin_mask_t *new_mask = nullptr)837: mask(nullptr), restored(false) {838if (KMP_AFFINITY_CAPABLE()) {839KMP_CPU_ALLOC(mask);840KMP_ASSERT(mask != NULL);841__kmp_get_system_affinity(mask, /*abort_on_error=*/true);842if (new_mask)843__kmp_set_system_affinity(new_mask, /*abort_on_error=*/true);844}845}846void restore() {847if (mask && KMP_AFFINITY_CAPABLE() && !restored) {848__kmp_set_system_affinity(mask, /*abort_on_error=*/true);849KMP_CPU_FREE(mask);850}851restored = true;852}853~kmp_affinity_raii_t() { restore(); }854};855#endif // !KMP_OS_AIX856857// Declare local char buffers with this size for printing debug and info858// messages, using __kmp_affinity_print_mask().859#define KMP_AFFIN_MASK_PRINT_LEN 1024860861enum affinity_type {862affinity_none = 0,863affinity_physical,864affinity_logical,865affinity_compact,866affinity_scatter,867affinity_explicit,868affinity_balanced,869affinity_disabled, // not used outsize the env var parser870affinity_default871};872873enum affinity_top_method {874affinity_top_method_all = 0, // try all (supported) methods, in order875#if KMP_ARCH_X86 || KMP_ARCH_X86_64876affinity_top_method_apicid,877affinity_top_method_x2apicid,878affinity_top_method_x2apicid_1f,879#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */880affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too881#if KMP_GROUP_AFFINITY882affinity_top_method_group,883#endif /* KMP_GROUP_AFFINITY */884affinity_top_method_flat,885#if KMP_USE_HWLOC886affinity_top_method_hwloc,887#endif888affinity_top_method_default889};890891#define affinity_respect_mask_default (2)892893typedef struct kmp_affinity_flags_t {894unsigned dups : 1;895unsigned verbose : 1;896unsigned warnings : 1;897unsigned respect : 2;898unsigned reset : 1;899unsigned initialized : 1;900unsigned core_types_gran : 1;901unsigned core_effs_gran : 1;902unsigned omp_places : 1;903unsigned reserved : 22;904} kmp_affinity_flags_t;905KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4);906907typedef struct kmp_affinity_ids_t {908int os_id;909int ids[KMP_HW_LAST];910} kmp_affinity_ids_t;911912typedef struct kmp_affinity_attrs_t {913int core_type : 8;914int core_eff : 8;915unsigned valid : 1;916unsigned reserved : 15;917} kmp_affinity_attrs_t;918#define KMP_AFFINITY_ATTRS_UNKNOWN \919{ KMP_HW_CORE_TYPE_UNKNOWN, kmp_hw_attr_t::UNKNOWN_CORE_EFF, 0, 0 }920921typedef struct kmp_affinity_t {922char *proclist;923enum affinity_type type;924kmp_hw_t gran;925int gran_levels;926kmp_affinity_attrs_t core_attr_gran;927int compact;928int offset;929kmp_affinity_flags_t flags;930unsigned num_masks;931kmp_affin_mask_t *masks;932kmp_affinity_ids_t *ids;933kmp_affinity_attrs_t *attrs;934unsigned num_os_id_masks;935kmp_affin_mask_t *os_id_masks;936const char *env_var;937} kmp_affinity_t;938939#define KMP_AFFINITY_INIT(env) \940{ \941nullptr, affinity_default, KMP_HW_UNKNOWN, -1, KMP_AFFINITY_ATTRS_UNKNOWN, \9420, 0, \943{TRUE, FALSE, TRUE, affinity_respect_mask_default, FALSE, FALSE, \944FALSE, FALSE, FALSE}, \9450, nullptr, nullptr, nullptr, 0, nullptr, env \946}947948extern enum affinity_top_method __kmp_affinity_top_method;949extern kmp_affinity_t __kmp_affinity;950extern kmp_affinity_t __kmp_hh_affinity;951extern kmp_affinity_t *__kmp_affinities[2];952953extern void __kmp_affinity_bind_thread(int which);954955extern kmp_affin_mask_t *__kmp_affin_fullMask;956extern kmp_affin_mask_t *__kmp_affin_origMask;957extern char *__kmp_cpuinfo_file;958959#if KMP_WEIGHTED_ITERATIONS_SUPPORTED960extern int __kmp_first_osid_with_ecore;961#endif962963#endif /* KMP_AFFINITY_SUPPORTED */964965// This needs to be kept in sync with the values in omp.h !!!966typedef enum kmp_proc_bind_t {967proc_bind_false = 0,968proc_bind_true,969proc_bind_primary,970proc_bind_close,971proc_bind_spread,972proc_bind_intel, // use KMP_AFFINITY interface973proc_bind_default974} kmp_proc_bind_t;975976typedef struct kmp_nested_proc_bind_t {977kmp_proc_bind_t *bind_types;978int size;979int used;980} kmp_nested_proc_bind_t;981982extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;983extern kmp_proc_bind_t __kmp_teams_proc_bind;984985extern int __kmp_display_affinity;986extern char *__kmp_affinity_format;987static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;988#if OMPT_SUPPORT989extern int __kmp_tool;990extern char *__kmp_tool_libraries;991#endif // OMPT_SUPPORT992993#if KMP_AFFINITY_SUPPORTED994#define KMP_PLACE_ALL (-1)995#define KMP_PLACE_UNDEFINED (-2)996// Is KMP_AFFINITY is being used instead of OMP_PROC_BIND/OMP_PLACES?997#define KMP_AFFINITY_NON_PROC_BIND \998((__kmp_nested_proc_bind.bind_types[0] == proc_bind_false || \999__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) && \1000(__kmp_affinity.num_masks > 0 || __kmp_affinity.type == affinity_balanced))1001#endif /* KMP_AFFINITY_SUPPORTED */10021003extern int __kmp_affinity_num_places;10041005typedef enum kmp_cancel_kind_t {1006cancel_noreq = 0,1007cancel_parallel = 1,1008cancel_loop = 2,1009cancel_sections = 3,1010cancel_taskgroup = 41011} kmp_cancel_kind_t;10121013// KMP_HW_SUBSET support:1014typedef struct kmp_hws_item {1015int num;1016int offset;1017} kmp_hws_item_t;10181019extern kmp_hws_item_t __kmp_hws_socket;1020extern kmp_hws_item_t __kmp_hws_die;1021extern kmp_hws_item_t __kmp_hws_node;1022extern kmp_hws_item_t __kmp_hws_tile;1023extern kmp_hws_item_t __kmp_hws_core;1024extern kmp_hws_item_t __kmp_hws_proc;1025extern int __kmp_hws_requested;1026extern int __kmp_hws_abs_flag; // absolute or per-item number requested10271028/* ------------------------------------------------------------------------ */10291030#define KMP_PAD(type, sz) \1031(sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1))10321033// We need to avoid using -1 as a GTID as +1 is added to the gtid1034// when storing it in a lock, and the value 0 is reserved.1035#define KMP_GTID_DNE (-2) /* Does not exist */1036#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */1037#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */1038#define KMP_GTID_UNKNOWN (-5) /* Is not known */1039#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */10401041/* OpenMP 5.0 Memory Management support */10421043#ifndef __OMP_H1044// Duplicate type definitions from omp.h1045typedef uintptr_t omp_uintptr_t;10461047typedef enum {1048omp_atk_sync_hint = 1,1049omp_atk_alignment = 2,1050omp_atk_access = 3,1051omp_atk_pool_size = 4,1052omp_atk_fallback = 5,1053omp_atk_fb_data = 6,1054omp_atk_pinned = 7,1055omp_atk_partition = 81056} omp_alloctrait_key_t;10571058typedef enum {1059omp_atv_false = 0,1060omp_atv_true = 1,1061omp_atv_contended = 3,1062omp_atv_uncontended = 4,1063omp_atv_serialized = 5,1064omp_atv_sequential = omp_atv_serialized, // (deprecated)1065omp_atv_private = 6,1066omp_atv_all = 7,1067omp_atv_thread = 8,1068omp_atv_pteam = 9,1069omp_atv_cgroup = 10,1070omp_atv_default_mem_fb = 11,1071omp_atv_null_fb = 12,1072omp_atv_abort_fb = 13,1073omp_atv_allocator_fb = 14,1074omp_atv_environment = 15,1075omp_atv_nearest = 16,1076omp_atv_blocked = 17,1077omp_atv_interleaved = 181078} omp_alloctrait_value_t;1079#define omp_atv_default ((omp_uintptr_t)-1)10801081typedef void *omp_memspace_handle_t;1082extern omp_memspace_handle_t const omp_default_mem_space;1083extern omp_memspace_handle_t const omp_large_cap_mem_space;1084extern omp_memspace_handle_t const omp_const_mem_space;1085extern omp_memspace_handle_t const omp_high_bw_mem_space;1086extern omp_memspace_handle_t const omp_low_lat_mem_space;1087extern omp_memspace_handle_t const llvm_omp_target_host_mem_space;1088extern omp_memspace_handle_t const llvm_omp_target_shared_mem_space;1089extern omp_memspace_handle_t const llvm_omp_target_device_mem_space;10901091typedef struct {1092omp_alloctrait_key_t key;1093omp_uintptr_t value;1094} omp_alloctrait_t;10951096typedef void *omp_allocator_handle_t;1097extern omp_allocator_handle_t const omp_null_allocator;1098extern omp_allocator_handle_t const omp_default_mem_alloc;1099extern omp_allocator_handle_t const omp_large_cap_mem_alloc;1100extern omp_allocator_handle_t const omp_const_mem_alloc;1101extern omp_allocator_handle_t const omp_high_bw_mem_alloc;1102extern omp_allocator_handle_t const omp_low_lat_mem_alloc;1103extern omp_allocator_handle_t const omp_cgroup_mem_alloc;1104extern omp_allocator_handle_t const omp_pteam_mem_alloc;1105extern omp_allocator_handle_t const omp_thread_mem_alloc;1106extern omp_allocator_handle_t const llvm_omp_target_host_mem_alloc;1107extern omp_allocator_handle_t const llvm_omp_target_shared_mem_alloc;1108extern omp_allocator_handle_t const llvm_omp_target_device_mem_alloc;1109extern omp_allocator_handle_t const kmp_max_mem_alloc;1110extern omp_allocator_handle_t __kmp_def_allocator;11111112// end of duplicate type definitions from omp.h1113#endif11141115extern int __kmp_memkind_available;11161117typedef omp_memspace_handle_t kmp_memspace_t; // placeholder11181119typedef struct kmp_allocator_t {1120omp_memspace_handle_t memspace;1121void **memkind; // pointer to memkind1122size_t alignment;1123omp_alloctrait_value_t fb;1124kmp_allocator_t *fb_data;1125kmp_uint64 pool_size;1126kmp_uint64 pool_used;1127bool pinned;1128} kmp_allocator_t;11291130extern omp_allocator_handle_t __kmpc_init_allocator(int gtid,1131omp_memspace_handle_t,1132int ntraits,1133omp_alloctrait_t traits[]);1134extern void __kmpc_destroy_allocator(int gtid, omp_allocator_handle_t al);1135extern void __kmpc_set_default_allocator(int gtid, omp_allocator_handle_t al);1136extern omp_allocator_handle_t __kmpc_get_default_allocator(int gtid);1137// external interfaces, may be used by compiler1138extern void *__kmpc_alloc(int gtid, size_t sz, omp_allocator_handle_t al);1139extern void *__kmpc_aligned_alloc(int gtid, size_t align, size_t sz,1140omp_allocator_handle_t al);1141extern void *__kmpc_calloc(int gtid, size_t nmemb, size_t sz,1142omp_allocator_handle_t al);1143extern void *__kmpc_realloc(int gtid, void *ptr, size_t sz,1144omp_allocator_handle_t al,1145omp_allocator_handle_t free_al);1146extern void __kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);1147// internal interfaces, contain real implementation1148extern void *__kmp_alloc(int gtid, size_t align, size_t sz,1149omp_allocator_handle_t al);1150extern void *__kmp_calloc(int gtid, size_t align, size_t nmemb, size_t sz,1151omp_allocator_handle_t al);1152extern void *__kmp_realloc(int gtid, void *ptr, size_t sz,1153omp_allocator_handle_t al,1154omp_allocator_handle_t free_al);1155extern void ___kmpc_free(int gtid, void *ptr, omp_allocator_handle_t al);11561157extern void __kmp_init_memkind();1158extern void __kmp_fini_memkind();1159extern void __kmp_init_target_mem();11601161/* ------------------------------------------------------------------------ */11621163#if ENABLE_LIBOMPTARGET1164extern void __kmp_init_target_task();1165#endif11661167/* ------------------------------------------------------------------------ */11681169#define KMP_UINT64_MAX \1170(~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1)))11711172#define KMP_MIN_NTH 111731174#ifndef KMP_MAX_NTH1175#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX1176#define KMP_MAX_NTH PTHREAD_THREADS_MAX1177#else1178#ifdef __ve__1179// VE's pthread supports only up to 64 threads per a VE process.1180// Please check p. 14 of following documentation for more details.1181// https://sxauroratsubasa.sakura.ne.jp/documents/veos/en/VEOS_high_level_design.pdf1182#define KMP_MAX_NTH 641183#else1184#define KMP_MAX_NTH INT_MAX1185#endif1186#endif1187#endif /* KMP_MAX_NTH */11881189#ifdef PTHREAD_STACK_MIN1190#define KMP_MIN_STKSIZE ((size_t)PTHREAD_STACK_MIN)1191#else1192#define KMP_MIN_STKSIZE ((size_t)(32 * 1024))1193#endif11941195#if KMP_OS_AIX && KMP_ARCH_PPC1196#define KMP_MAX_STKSIZE 0x10000000 /* 256Mb max size on 32-bit AIX */1197#else1198#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))1199#endif12001201#if KMP_ARCH_X861202#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024))1203#elif KMP_ARCH_X86_641204#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))1205#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024))1206#elif KMP_ARCH_VE1207// Minimum stack size for pthread for VE is 4MB.1208// https://www.hpc.nec/documents/veos/en/glibc/Difference_Points_glibc.htm1209#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))1210#elif KMP_OS_AIX1211// The default stack size for worker threads on AIX is 4MB.1212#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024))1213#else1214#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024))1215#endif12161217#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024))1218#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024))1219#define KMP_MAX_MALLOC_POOL_INCR \1220(~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1)))12211222#define KMP_MIN_STKOFFSET (0)1223#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE1224#if KMP_OS_DARWIN1225#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET1226#else1227#define KMP_DEFAULT_STKOFFSET CACHE_LINE1228#endif12291230#define KMP_MIN_STKPADDING (0)1231#define KMP_MAX_STKPADDING (2 * 1024 * 1024)12321233#define KMP_BLOCKTIME_MULTIPLIER \1234(1000000) /* number of blocktime units per second */1235#define KMP_MIN_BLOCKTIME (0)1236#define KMP_MAX_BLOCKTIME \1237(INT_MAX) /* Must be this for "infinite" setting the work */12381239/* __kmp_blocktime is in microseconds */1240#define KMP_DEFAULT_BLOCKTIME (__kmp_is_hybrid_cpu() ? (0) : (200000))12411242#if KMP_USE_MONITOR1243#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024))1244#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second1245#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec12461247/* Calculate new number of monitor wakeups for a specific block time based on1248previous monitor_wakeups. Only allow increasing number of wakeups */1249#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \1250(((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) \1251: ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS \1252: ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \1253? (monitor_wakeups) \1254: (KMP_BLOCKTIME_MULTIPLIER) / (blocktime))12551256/* Calculate number of intervals for a specific block time based on1257monitor_wakeups */1258#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \1259(((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \1260(KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)))1261#else1262#define KMP_BLOCKTIME(team, tid) \1263(get__bt_set(team, tid) ? get__blocktime(team, tid) : __kmp_dflt_blocktime)1264#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64)1265// HW TSC is used to reduce overhead (clock tick instead of nanosecond).1266extern kmp_uint64 __kmp_ticks_per_msec;1267extern kmp_uint64 __kmp_ticks_per_usec;1268#if KMP_COMPILER_ICC || KMP_COMPILER_ICX1269#define KMP_NOW() ((kmp_uint64)_rdtsc())1270#else1271#define KMP_NOW() __kmp_hardware_timestamp()1272#endif1273#define KMP_BLOCKTIME_INTERVAL(team, tid) \1274((kmp_uint64)KMP_BLOCKTIME(team, tid) * __kmp_ticks_per_usec)1275#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW())1276#else1277// System time is retrieved sporadically while blocking.1278extern kmp_uint64 __kmp_now_nsec();1279#define KMP_NOW() __kmp_now_nsec()1280#define KMP_BLOCKTIME_INTERVAL(team, tid) \1281((kmp_uint64)KMP_BLOCKTIME(team, tid) * (kmp_uint64)KMP_NSEC_PER_USEC)1282#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW())1283#endif1284#endif // KMP_USE_MONITOR12851286#define KMP_MIN_STATSCOLS 401287#define KMP_MAX_STATSCOLS 40961288#define KMP_DEFAULT_STATSCOLS 8012891290#define KMP_MIN_INTERVAL 01291#define KMP_MAX_INTERVAL (INT_MAX - 1)1292#define KMP_DEFAULT_INTERVAL 012931294#define KMP_MIN_CHUNK 11295#define KMP_MAX_CHUNK (INT_MAX - 1)1296#define KMP_DEFAULT_CHUNK 112971298#define KMP_MIN_DISP_NUM_BUFF 11299#define KMP_DFLT_DISP_NUM_BUFF 71300#define KMP_MAX_DISP_NUM_BUFF 409613011302#define KMP_MAX_ORDERED 813031304#define KMP_MAX_FIELDS 3213051306#define KMP_MAX_BRANCH_BITS 3113071308#define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX13091310#define KMP_MAX_DEFAULT_DEVICE_LIMIT INT_MAX13111312#define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX13131314/* Minimum number of threads before switch to TLS gtid (experimentally1315determined) */1316/* josh TODO: what about OS X* tuning? */1317#if KMP_ARCH_X86 || KMP_ARCH_X86_641318#define KMP_TLS_GTID_MIN 51319#else1320#define KMP_TLS_GTID_MIN INT_MAX1321#endif13221323#define KMP_MASTER_TID(tid) (0 == (tid))1324#define KMP_WORKER_TID(tid) (0 != (tid))13251326#define KMP_MASTER_GTID(gtid) (0 == __kmp_tid_from_gtid((gtid)))1327#define KMP_WORKER_GTID(gtid) (0 != __kmp_tid_from_gtid((gtid)))1328#define KMP_INITIAL_GTID(gtid) (0 == (gtid))13291330#ifndef TRUE1331#define FALSE 01332#define TRUE (!FALSE)1333#endif13341335/* NOTE: all of the following constants must be even */13361337#if KMP_OS_WINDOWS1338#define KMP_INIT_WAIT 64U /* initial number of spin-tests */1339#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */1340#elif KMP_OS_LINUX1341#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1342#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1343#elif KMP_OS_DARWIN1344/* TODO: tune for KMP_OS_DARWIN */1345#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1346#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1347#elif KMP_OS_DRAGONFLY1348/* TODO: tune for KMP_OS_DRAGONFLY */1349#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1350#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1351#elif KMP_OS_FREEBSD1352/* TODO: tune for KMP_OS_FREEBSD */1353#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1354#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1355#elif KMP_OS_NETBSD1356/* TODO: tune for KMP_OS_NETBSD */1357#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1358#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1359#elif KMP_OS_OPENBSD1360/* TODO: tune for KMP_OS_OPENBSD */1361#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1362#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1363#elif KMP_OS_HURD1364/* TODO: tune for KMP_OS_HURD */1365#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1366#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1367#elif KMP_OS_SOLARIS1368/* TODO: tune for KMP_OS_SOLARIS */1369#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1370#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1371#elif KMP_OS_WASI1372/* TODO: tune for KMP_OS_WASI */1373#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1374#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1375#elif KMP_OS_AIX1376/* TODO: tune for KMP_OS_AIX */1377#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */1378#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */1379#endif13801381#if KMP_ARCH_X86 || KMP_ARCH_X86_641382typedef struct kmp_cpuid {1383kmp_uint32 eax;1384kmp_uint32 ebx;1385kmp_uint32 ecx;1386kmp_uint32 edx;1387} kmp_cpuid_t;13881389typedef struct kmp_cpuinfo_flags_t {1390unsigned sse2 : 1; // 0 if SSE2 instructions are not supported, 1 otherwise.1391unsigned rtm : 1; // 0 if RTM instructions are not supported, 1 otherwise.1392unsigned hybrid : 1;1393unsigned reserved : 29; // Ensure size of 32 bits1394} kmp_cpuinfo_flags_t;13951396typedef struct kmp_cpuinfo {1397int initialized; // If 0, other fields are not initialized.1398int signature; // CPUID(1).EAX1399int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family)1400int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended1401// Model << 4 ) + Model)1402int stepping; // CPUID(1).EAX[3:0] ( Stepping )1403kmp_cpuinfo_flags_t flags;1404int apic_id;1405kmp_uint64 frequency; // Nominal CPU frequency in Hz.1406char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004)1407} kmp_cpuinfo_t;14081409extern void __kmp_query_cpuid(kmp_cpuinfo_t *p);14101411#if KMP_OS_UNIX1412// subleaf is only needed for cache and topology discovery and can be set to1413// zero in most cases1414static inline void __kmp_x86_cpuid(int leaf, int subleaf, struct kmp_cpuid *p) {1415__asm__ __volatile__("cpuid"1416: "=a"(p->eax), "=b"(p->ebx), "=c"(p->ecx), "=d"(p->edx)1417: "a"(leaf), "c"(subleaf));1418}1419// Load p into FPU control word1420static inline void __kmp_load_x87_fpu_control_word(const kmp_int16 *p) {1421__asm__ __volatile__("fldcw %0" : : "m"(*p));1422}1423// Store FPU control word into p1424static inline void __kmp_store_x87_fpu_control_word(kmp_int16 *p) {1425__asm__ __volatile__("fstcw %0" : "=m"(*p));1426}1427static inline void __kmp_clear_x87_fpu_status_word() {1428#if KMP_MIC1429// 32-bit protected mode x87 FPU state1430struct x87_fpu_state {1431unsigned cw;1432unsigned sw;1433unsigned tw;1434unsigned fip;1435unsigned fips;1436unsigned fdp;1437unsigned fds;1438};1439struct x87_fpu_state fpu_state = {0, 0, 0, 0, 0, 0, 0};1440__asm__ __volatile__("fstenv %0\n\t" // store FP env1441"andw $0x7f00, %1\n\t" // clear 0-7,15 bits of FP SW1442"fldenv %0\n\t" // load FP env back1443: "+m"(fpu_state), "+m"(fpu_state.sw));1444#else1445__asm__ __volatile__("fnclex");1446#endif // KMP_MIC1447}1448#if __SSE__1449static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }1450static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }1451#else1452static inline void __kmp_load_mxcsr(const kmp_uint32 *p) {}1453static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = 0; }1454#endif1455#else1456// Windows still has these as external functions in assembly file1457extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p);1458extern void __kmp_load_x87_fpu_control_word(const kmp_int16 *p);1459extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p);1460extern void __kmp_clear_x87_fpu_status_word();1461static inline void __kmp_load_mxcsr(const kmp_uint32 *p) { _mm_setcsr(*p); }1462static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); }1463#endif // KMP_OS_UNIX14641465#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */14661467// User-level Monitor/Mwait1468#if KMP_HAVE_UMWAIT1469// We always try for UMWAIT first1470#if KMP_HAVE_WAITPKG_INTRINSICS1471#if KMP_HAVE_IMMINTRIN_H1472#include <immintrin.h>1473#elif KMP_HAVE_INTRIN_H1474#include <intrin.h>1475#endif1476#endif // KMP_HAVE_WAITPKG_INTRINSICS14771478KMP_ATTRIBUTE_TARGET_WAITPKG1479static inline int __kmp_tpause(uint32_t hint, uint64_t counter) {1480#if !KMP_HAVE_WAITPKG_INTRINSICS1481uint32_t timeHi = uint32_t(counter >> 32);1482uint32_t timeLo = uint32_t(counter & 0xffffffff);1483char flag;1484__asm__ volatile("#tpause\n.byte 0x66, 0x0F, 0xAE, 0xF1\n"1485"setb %0"1486// The "=q" restraint means any register accessible as rl1487// in 32-bit mode: a, b, c, and d;1488// in 64-bit mode: any integer register1489: "=q"(flag)1490: "a"(timeLo), "d"(timeHi), "c"(hint)1491:);1492return flag;1493#else1494return _tpause(hint, counter);1495#endif1496}1497KMP_ATTRIBUTE_TARGET_WAITPKG1498static inline void __kmp_umonitor(void *cacheline) {1499#if !KMP_HAVE_WAITPKG_INTRINSICS1500__asm__ volatile("# umonitor\n.byte 0xF3, 0x0F, 0xAE, 0x01 "1501:1502: "a"(cacheline)1503:);1504#else1505_umonitor(cacheline);1506#endif1507}1508KMP_ATTRIBUTE_TARGET_WAITPKG1509static inline int __kmp_umwait(uint32_t hint, uint64_t counter) {1510#if !KMP_HAVE_WAITPKG_INTRINSICS1511uint32_t timeHi = uint32_t(counter >> 32);1512uint32_t timeLo = uint32_t(counter & 0xffffffff);1513char flag;1514__asm__ volatile("#umwait\n.byte 0xF2, 0x0F, 0xAE, 0xF1\n"1515"setb %0"1516// The "=q" restraint means any register accessible as rl1517// in 32-bit mode: a, b, c, and d;1518// in 64-bit mode: any integer register1519: "=q"(flag)1520: "a"(timeLo), "d"(timeHi), "c"(hint)1521:);1522return flag;1523#else1524return _umwait(hint, counter);1525#endif1526}1527#elif KMP_HAVE_MWAIT1528#if KMP_OS_UNIX1529#include <pmmintrin.h>1530#else1531#include <intrin.h>1532#endif1533#if KMP_OS_UNIX1534__attribute__((target("sse3")))1535#endif1536static inline void1537__kmp_mm_monitor(void *cacheline, unsigned extensions, unsigned hints) {1538_mm_monitor(cacheline, extensions, hints);1539}1540#if KMP_OS_UNIX1541__attribute__((target("sse3")))1542#endif1543static inline void1544__kmp_mm_mwait(unsigned extensions, unsigned hints) {1545_mm_mwait(extensions, hints);1546}1547#endif // KMP_HAVE_UMWAIT15481549#if KMP_ARCH_X861550extern void __kmp_x86_pause(void);1551#elif KMP_MIC1552// Performance testing on KNC (C0QS-7120 P/A/X/D, 61-core, 16 GB Memory) showed1553// regression after removal of extra PAUSE from spin loops. Changing1554// the delay from 100 to 300 showed even better performance than double PAUSE1555// on Spec OMP2001 and LCPC tasking tests, no regressions on EPCC.1556static inline void __kmp_x86_pause(void) { _mm_delay_32(300); }1557#else1558static inline void __kmp_x86_pause(void) { _mm_pause(); }1559#endif1560#define KMP_CPU_PAUSE() __kmp_x86_pause()1561#elif KMP_ARCH_PPC641562#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1")1563#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2")1564#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory")1565#define KMP_CPU_PAUSE() \1566do { \1567KMP_PPC64_PRI_LOW(); \1568KMP_PPC64_PRI_MED(); \1569KMP_PPC64_PRI_LOC_MB(); \1570} while (0)1571#else1572#define KMP_CPU_PAUSE() /* nothing to do */1573#endif15741575#define KMP_INIT_YIELD(count) \1576{ (count) = __kmp_yield_init; }15771578#define KMP_INIT_BACKOFF(time) \1579{ (time) = __kmp_pause_init; }15801581#define KMP_OVERSUBSCRIBED \1582(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc))15831584#define KMP_TRY_YIELD \1585((__kmp_use_yield == 1) || (__kmp_use_yield == 2 && (KMP_OVERSUBSCRIBED)))15861587#define KMP_TRY_YIELD_OVERSUB \1588((__kmp_use_yield == 1 || __kmp_use_yield == 2) && (KMP_OVERSUBSCRIBED))15891590#define KMP_YIELD(cond) \1591{ \1592KMP_CPU_PAUSE(); \1593if ((cond) && (KMP_TRY_YIELD)) \1594__kmp_yield(); \1595}15961597#define KMP_YIELD_OVERSUB() \1598{ \1599KMP_CPU_PAUSE(); \1600if ((KMP_TRY_YIELD_OVERSUB)) \1601__kmp_yield(); \1602}16031604// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround,1605// there should be no yielding since initial value from KMP_INIT_YIELD() is odd.1606#define KMP_YIELD_SPIN(count) \1607{ \1608KMP_CPU_PAUSE(); \1609if (KMP_TRY_YIELD) { \1610(count) -= 2; \1611if (!(count)) { \1612__kmp_yield(); \1613(count) = __kmp_yield_next; \1614} \1615} \1616}16171618// If TPAUSE is available & enabled, use it. If oversubscribed, use the slower1619// (C0.2) state, which improves performance of other SMT threads on the same1620// core, otherwise, use the fast (C0.1) default state, or whatever the user has1621// requested. Uses a timed TPAUSE, and exponential backoff. If TPAUSE isn't1622// available, fall back to the regular CPU pause and yield combination.1623#if KMP_HAVE_UMWAIT1624#define KMP_TPAUSE_MAX_MASK ((kmp_uint64)0xFFFF)1625#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \1626{ \1627if (__kmp_tpause_enabled) { \1628if (KMP_OVERSUBSCRIBED) { \1629__kmp_tpause(0, (time)); \1630} else { \1631__kmp_tpause(__kmp_tpause_hint, (time)); \1632} \1633(time) = (time << 1 | 1) & KMP_TPAUSE_MAX_MASK; \1634} else { \1635KMP_CPU_PAUSE(); \1636if ((KMP_TRY_YIELD_OVERSUB)) { \1637__kmp_yield(); \1638} else if (__kmp_use_yield == 1) { \1639(count) -= 2; \1640if (!(count)) { \1641__kmp_yield(); \1642(count) = __kmp_yield_next; \1643} \1644} \1645} \1646}1647#else1648#define KMP_YIELD_OVERSUB_ELSE_SPIN(count, time) \1649{ \1650KMP_CPU_PAUSE(); \1651if ((KMP_TRY_YIELD_OVERSUB)) \1652__kmp_yield(); \1653else if (__kmp_use_yield == 1) { \1654(count) -= 2; \1655if (!(count)) { \1656__kmp_yield(); \1657(count) = __kmp_yield_next; \1658} \1659} \1660}1661#endif // KMP_HAVE_UMWAIT16621663/* ------------------------------------------------------------------------ */1664/* Support datatypes for the orphaned construct nesting checks. */1665/* ------------------------------------------------------------------------ */16661667/* When adding to this enum, add its corresponding string in cons_text_c[]1668* array in kmp_error.cpp */1669enum cons_type {1670ct_none,1671ct_parallel,1672ct_pdo,1673ct_pdo_ordered,1674ct_psections,1675ct_psingle,1676ct_critical,1677ct_ordered_in_parallel,1678ct_ordered_in_pdo,1679ct_master,1680ct_reduce,1681ct_barrier,1682ct_masked1683};16841685#define IS_CONS_TYPE_ORDERED(ct) ((ct) == ct_pdo_ordered)16861687struct cons_data {1688ident_t const *ident;1689enum cons_type type;1690int prev;1691kmp_user_lock_p1692name; /* address exclusively for critical section name comparison */1693};16941695struct cons_header {1696int p_top, w_top, s_top;1697int stack_size, stack_top;1698struct cons_data *stack_data;1699};17001701struct kmp_region_info {1702char *text;1703int offset[KMP_MAX_FIELDS];1704int length[KMP_MAX_FIELDS];1705};17061707/* ---------------------------------------------------------------------- */1708/* ---------------------------------------------------------------------- */17091710#if KMP_OS_WINDOWS1711typedef HANDLE kmp_thread_t;1712typedef DWORD kmp_key_t;1713#endif /* KMP_OS_WINDOWS */17141715#if KMP_OS_UNIX1716typedef pthread_t kmp_thread_t;1717typedef pthread_key_t kmp_key_t;1718#endif17191720extern kmp_key_t __kmp_gtid_threadprivate_key;17211722typedef struct kmp_sys_info {1723long maxrss; /* the maximum resident set size utilized (in kilobytes) */1724long minflt; /* the number of page faults serviced without any I/O */1725long majflt; /* the number of page faults serviced that required I/O */1726long nswap; /* the number of times a process was "swapped" out of memory */1727long inblock; /* the number of times the file system had to perform input */1728long oublock; /* the number of times the file system had to perform output */1729long nvcsw; /* the number of times a context switch was voluntarily */1730long nivcsw; /* the number of times a context switch was forced */1731} kmp_sys_info_t;17321733#if USE_ITT_BUILD1734// We cannot include "kmp_itt.h" due to circular dependency. Declare the only1735// required type here. Later we will check the type meets requirements.1736typedef int kmp_itt_mark_t;1737#define KMP_ITT_DEBUG 01738#endif /* USE_ITT_BUILD */17391740typedef kmp_int32 kmp_critical_name[8];17411742/*!1743@ingroup PARALLEL1744The type for a microtask which gets passed to @ref __kmpc_fork_call().1745The arguments to the outlined function are1746@param global_tid the global thread identity of the thread executing the1747function.1748@param bound_tid the local identity of the thread executing the function1749@param ... pointers to shared variables accessed by the function.1750*/1751typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...);1752typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth,1753...);17541755/*!1756@ingroup THREADPRIVATE1757@{1758*/1759/* ---------------------------------------------------------------------------1760*/1761/* Threadprivate initialization/finalization function declarations */17621763/* for non-array objects: __kmpc_threadprivate_register() */17641765/*!1766Pointer to the constructor function.1767The first argument is the <tt>this</tt> pointer1768*/1769typedef void *(*kmpc_ctor)(void *);17701771/*!1772Pointer to the destructor function.1773The first argument is the <tt>this</tt> pointer1774*/1775typedef void (*kmpc_dtor)(1776void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel1777compiler */1778/*!1779Pointer to an alternate constructor.1780The first argument is the <tt>this</tt> pointer.1781*/1782typedef void *(*kmpc_cctor)(void *, void *);17831784/* for array objects: __kmpc_threadprivate_register_vec() */1785/* First arg: "this" pointer */1786/* Last arg: number of array elements */1787/*!1788Array constructor.1789First argument is the <tt>this</tt> pointer1790Second argument the number of array elements.1791*/1792typedef void *(*kmpc_ctor_vec)(void *, size_t);1793/*!1794Pointer to the array destructor function.1795The first argument is the <tt>this</tt> pointer1796Second argument the number of array elements.1797*/1798typedef void (*kmpc_dtor_vec)(void *, size_t);1799/*!1800Array constructor.1801First argument is the <tt>this</tt> pointer1802Third argument the number of array elements.1803*/1804typedef void *(*kmpc_cctor_vec)(void *, void *,1805size_t); /* function unused by compiler */18061807/*!1808@}1809*/18101811/* keeps tracked of threadprivate cache allocations for cleanup later */1812typedef struct kmp_cached_addr {1813void **addr; /* address of allocated cache */1814void ***compiler_cache; /* pointer to compiler's cache */1815void *data; /* pointer to global data */1816struct kmp_cached_addr *next; /* pointer to next cached address */1817} kmp_cached_addr_t;18181819struct private_data {1820struct private_data *next; /* The next descriptor in the list */1821void *data; /* The data buffer for this descriptor */1822int more; /* The repeat count for this descriptor */1823size_t size; /* The data size for this descriptor */1824};18251826struct private_common {1827struct private_common *next;1828struct private_common *link;1829void *gbl_addr;1830void *par_addr; /* par_addr == gbl_addr for PRIMARY thread */1831size_t cmn_size;1832};18331834struct shared_common {1835struct shared_common *next;1836struct private_data *pod_init;1837void *obj_init;1838void *gbl_addr;1839union {1840kmpc_ctor ctor;1841kmpc_ctor_vec ctorv;1842} ct;1843union {1844kmpc_cctor cctor;1845kmpc_cctor_vec cctorv;1846} cct;1847union {1848kmpc_dtor dtor;1849kmpc_dtor_vec dtorv;1850} dt;1851size_t vec_len;1852int is_vec;1853size_t cmn_size;1854};18551856#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */1857#define KMP_HASH_TABLE_SIZE \1858(1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */1859#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */1860#define KMP_HASH(x) \1861((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1))18621863struct common_table {1864struct private_common *data[KMP_HASH_TABLE_SIZE];1865};18661867struct shared_table {1868struct shared_common *data[KMP_HASH_TABLE_SIZE];1869};18701871/* ------------------------------------------------------------------------ */18721873#if KMP_USE_HIER_SCHED1874// Shared barrier data that exists inside a single unit of the scheduling1875// hierarchy1876typedef struct kmp_hier_private_bdata_t {1877kmp_int32 num_active;1878kmp_uint64 index;1879kmp_uint64 wait_val[2];1880} kmp_hier_private_bdata_t;1881#endif18821883typedef struct kmp_sched_flags {1884unsigned ordered : 1;1885unsigned nomerge : 1;1886unsigned contains_last : 1;1887unsigned use_hier : 1; // Used in KMP_USE_HIER_SCHED code1888unsigned use_hybrid : 1; // Used in KMP_WEIGHTED_ITERATIONS_SUPPORTED code1889unsigned unused : 27;1890} kmp_sched_flags_t;18911892KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4);18931894#if KMP_STATIC_STEAL_ENABLED1895typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {1896kmp_int32 count;1897kmp_int32 ub;1898/* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */1899kmp_int32 lb;1900kmp_int32 st;1901kmp_int32 tc;1902kmp_lock_t *steal_lock; // lock used for chunk stealing19031904kmp_uint32 ordered_lower;1905kmp_uint32 ordered_upper;19061907// KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on)1908// a) parm3 is properly aligned and1909// b) all parm1-4 are on the same cache line.1910// Because of parm1-4 are used together, performance seems to be better1911// if they are on the same cache line (not measured though).19121913struct KMP_ALIGN(32) {1914kmp_int32 parm1;1915kmp_int32 parm2;1916kmp_int32 parm3;1917kmp_int32 parm4;1918};19191920#if KMP_WEIGHTED_ITERATIONS_SUPPORTED1921kmp_uint32 pchunks;1922kmp_uint32 num_procs_with_pcore;1923kmp_int32 first_thread_with_ecore;1924#endif1925#if KMP_OS_WINDOWS1926kmp_int32 last_upper;1927#endif /* KMP_OS_WINDOWS */1928} dispatch_private_info32_t;19291930#if CACHE_LINE <= 1281931KMP_BUILD_ASSERT(sizeof(dispatch_private_info32_t) <= 128);1932#endif19331934typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {1935kmp_int64 count; // current chunk number for static & static-steal scheduling1936kmp_int64 ub; /* upper-bound */1937/* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */1938kmp_int64 lb; /* lower-bound */1939kmp_int64 st; /* stride */1940kmp_int64 tc; /* trip count (number of iterations) */1941kmp_lock_t *steal_lock; // lock used for chunk stealing19421943kmp_uint64 ordered_lower;1944kmp_uint64 ordered_upper;1945/* parm[1-4] are used in different ways by different scheduling algorithms */19461947// KMP_ALIGN(32) ensures ( if the KMP_ALIGN macro is turned on )1948// a) parm3 is properly aligned and1949// b) all parm1-4 are in the same cache line.1950// Because of parm1-4 are used together, performance seems to be better1951// if they are in the same line (not measured though).1952struct KMP_ALIGN(32) {1953kmp_int64 parm1;1954kmp_int64 parm2;1955kmp_int64 parm3;1956kmp_int64 parm4;1957};19581959#if KMP_WEIGHTED_ITERATIONS_SUPPORTED1960kmp_uint64 pchunks;1961kmp_uint64 num_procs_with_pcore;1962kmp_int64 first_thread_with_ecore;1963#endif19641965#if KMP_OS_WINDOWS1966kmp_int64 last_upper;1967#endif /* KMP_OS_WINDOWS */1968} dispatch_private_info64_t;19691970#if CACHE_LINE <= 1281971KMP_BUILD_ASSERT(sizeof(dispatch_private_info64_t) <= 128);1972#endif19731974#else /* KMP_STATIC_STEAL_ENABLED */1975typedef struct KMP_ALIGN_CACHE dispatch_private_info32 {1976kmp_int32 lb;1977kmp_int32 ub;1978kmp_int32 st;1979kmp_int32 tc;19801981kmp_int32 parm1;1982kmp_int32 parm2;1983kmp_int32 parm3;1984kmp_int32 parm4;19851986kmp_int32 count;19871988kmp_uint32 ordered_lower;1989kmp_uint32 ordered_upper;1990#if KMP_OS_WINDOWS1991kmp_int32 last_upper;1992#endif /* KMP_OS_WINDOWS */1993} dispatch_private_info32_t;19941995typedef struct KMP_ALIGN_CACHE dispatch_private_info64 {1996kmp_int64 lb; /* lower-bound */1997kmp_int64 ub; /* upper-bound */1998kmp_int64 st; /* stride */1999kmp_int64 tc; /* trip count (number of iterations) */20002001/* parm[1-4] are used in different ways by different scheduling algorithms */2002kmp_int64 parm1;2003kmp_int64 parm2;2004kmp_int64 parm3;2005kmp_int64 parm4;20062007kmp_int64 count; /* current chunk number for static scheduling */20082009kmp_uint64 ordered_lower;2010kmp_uint64 ordered_upper;2011#if KMP_OS_WINDOWS2012kmp_int64 last_upper;2013#endif /* KMP_OS_WINDOWS */2014} dispatch_private_info64_t;2015#endif /* KMP_STATIC_STEAL_ENABLED */20162017typedef struct KMP_ALIGN_CACHE dispatch_private_info {2018union private_info {2019dispatch_private_info32_t p32;2020dispatch_private_info64_t p64;2021} u;2022enum sched_type schedule; /* scheduling algorithm */2023kmp_sched_flags_t flags; /* flags (e.g., ordered, nomerge, etc.) */2024std::atomic<kmp_uint32> steal_flag; // static_steal only, state of a buffer2025kmp_int32 ordered_bumped;2026// Stack of buffers for nest of serial regions2027struct dispatch_private_info *next;2028kmp_int32 type_size; /* the size of types in private_info */2029#if KMP_USE_HIER_SCHED2030kmp_int32 hier_id;2031void *parent; /* hierarchical scheduling parent pointer */2032#endif2033enum cons_type pushed_ws;2034} dispatch_private_info_t;20352036typedef struct dispatch_shared_info32 {2037/* chunk index under dynamic, number of idle threads under static-steal;2038iteration index otherwise */2039volatile kmp_uint32 iteration;2040volatile kmp_int32 num_done;2041volatile kmp_uint32 ordered_iteration;2042// Dummy to retain the structure size after making ordered_iteration scalar2043kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1];2044} dispatch_shared_info32_t;20452046typedef struct dispatch_shared_info64 {2047/* chunk index under dynamic, number of idle threads under static-steal;2048iteration index otherwise */2049volatile kmp_uint64 iteration;2050volatile kmp_int64 num_done;2051volatile kmp_uint64 ordered_iteration;2052// Dummy to retain the structure size after making ordered_iteration scalar2053kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3];2054} dispatch_shared_info64_t;20552056typedef struct dispatch_shared_info {2057union shared_info {2058dispatch_shared_info32_t s32;2059dispatch_shared_info64_t s64;2060} u;2061volatile kmp_uint32 buffer_index;2062volatile kmp_int32 doacross_buf_idx; // teamwise index2063volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1)2064kmp_int32 doacross_num_done; // count finished threads2065#if KMP_USE_HIER_SCHED2066void *hier;2067#endif2068#if KMP_USE_HWLOC2069// When linking with libhwloc, the ORDERED EPCC test slows down on big2070// machines (> 48 cores). Performance analysis showed that a cache thrash2071// was occurring and this padding helps alleviate the problem.2072char padding[64];2073#endif2074} dispatch_shared_info_t;20752076typedef struct kmp_disp {2077/* Vector for ORDERED SECTION */2078void (*th_deo_fcn)(int *gtid, int *cid, ident_t *);2079/* Vector for END ORDERED SECTION */2080void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *);20812082dispatch_shared_info_t *th_dispatch_sh_current;2083dispatch_private_info_t *th_dispatch_pr_current;20842085dispatch_private_info_t *th_disp_buffer;2086kmp_uint32 th_disp_index;2087kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index2088volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags2089kmp_int64 *th_doacross_info; // info on loop bounds2090#if KMP_USE_INTERNODE_ALIGNMENT2091char more_padding[INTERNODE_CACHE_LINE];2092#endif2093} kmp_disp_t;20942095/* ------------------------------------------------------------------------ */2096/* Barrier stuff */20972098/* constants for barrier state update */2099#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */2100#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */2101#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state2102#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */21032104#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT)2105#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT)2106#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT)21072108#if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT)2109#error "Barrier sleep bit must be smaller than barrier bump bit"2110#endif2111#if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT)2112#error "Barrier unused bit must be smaller than barrier bump bit"2113#endif21142115// Constants for release barrier wait state: currently, hierarchical only2116#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep2117#define KMP_BARRIER_OWN_FLAG \21181 // Normal state; worker waiting on own b_go flag in release2119#define KMP_BARRIER_PARENT_FLAG \21202 // Special state; worker waiting on parent's b_go flag in release2121#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \21223 // Special state; tells worker to shift from parent to own b_go2123#define KMP_BARRIER_SWITCHING \21244 // Special state; worker resets appropriate flag on wake-up21252126#define KMP_NOT_SAFE_TO_REAP \21270 // Thread th_reap_state: not safe to reap (tasking)2128#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking)21292130// The flag_type describes the storage used for the flag.2131enum flag_type {2132flag32, /**< atomic 32 bit flags */2133flag64, /**< 64 bit flags */2134atomic_flag64, /**< atomic 64 bit flags */2135flag_oncore, /**< special 64-bit flag for on-core barrier (hierarchical) */2136flag_unset2137};21382139enum barrier_type {2140bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction2141barriers if enabled) */2142bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */2143#if KMP_FAST_REDUCTION_BARRIER2144bs_reduction_barrier, /* 2, All barriers that are used in reduction */2145#endif // KMP_FAST_REDUCTION_BARRIER2146bs_last_barrier /* Just a placeholder to mark the end */2147};21482149// to work with reduction barriers just like with plain barriers2150#if !KMP_FAST_REDUCTION_BARRIER2151#define bs_reduction_barrier bs_plain_barrier2152#endif // KMP_FAST_REDUCTION_BARRIER21532154typedef enum kmp_bar_pat { /* Barrier communication patterns */2155bp_linear_bar =21560, /* Single level (degenerate) tree */2157bp_tree_bar =21581, /* Balanced tree with branching factor 2^n */2159bp_hyper_bar = 2, /* Hypercube-embedded tree with min2160branching factor 2^n */2161bp_hierarchical_bar = 3, /* Machine hierarchy tree */2162bp_dist_bar = 4, /* Distributed barrier */2163bp_last_bar /* Placeholder to mark the end */2164} kmp_bar_pat_e;21652166#define KMP_BARRIER_ICV_PUSH 121672168/* Record for holding the values of the internal controls stack records */2169typedef struct kmp_internal_control {2170int serial_nesting_level; /* corresponds to the value of the2171th_team_serialized field */2172kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per2173thread) */2174kmp_int82175bt_set; /* internal control for whether blocktime is explicitly set */2176int blocktime; /* internal control for blocktime */2177#if KMP_USE_MONITOR2178int bt_intervals; /* internal control for blocktime intervals */2179#endif2180int nproc; /* internal control for #threads for next parallel region (per2181thread) */2182int thread_limit; /* internal control for thread-limit-var */2183int task_thread_limit; /* internal control for thread-limit-var of a task*/2184int max_active_levels; /* internal control for max_active_levels */2185kmp_r_sched_t2186sched; /* internal control for runtime schedule {sched,chunk} pair */2187kmp_proc_bind_t proc_bind; /* internal control for affinity */2188kmp_int32 default_device; /* internal control for default device */2189struct kmp_internal_control *next;2190} kmp_internal_control_t;21912192static inline void copy_icvs(kmp_internal_control_t *dst,2193kmp_internal_control_t *src) {2194*dst = *src;2195}21962197/* Thread barrier needs volatile barrier fields */2198typedef struct KMP_ALIGN_CACHE kmp_bstate {2199// th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all2200// uses of it). It is not explicitly aligned below, because we *don't* want2201// it to be padded -- instead, we fit b_go into the same cache line with2202// th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier.2203kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread2204// Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with2205// same NGO store2206volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical)2207KMP_ALIGN_CACHE volatile kmp_uint642208b_arrived; // STATE => task reached synch point.2209kmp_uint32 *skip_per_level;2210kmp_uint32 my_level;2211kmp_int32 parent_tid;2212kmp_int32 old_tid;2213kmp_uint32 depth;2214struct kmp_bstate *parent_bar;2215kmp_team_t *team;2216kmp_uint64 leaf_state;2217kmp_uint32 nproc;2218kmp_uint8 base_leaf_kids;2219kmp_uint8 leaf_kids;2220kmp_uint8 offset;2221kmp_uint8 wait_flag;2222kmp_uint8 use_oncore_barrier;2223#if USE_DEBUGGER2224// The following field is intended for the debugger solely. Only the worker2225// thread itself accesses this field: the worker increases it by 1 when it2226// arrives to a barrier.2227KMP_ALIGN_CACHE kmp_uint b_worker_arrived;2228#endif /* USE_DEBUGGER */2229} kmp_bstate_t;22302231union KMP_ALIGN_CACHE kmp_barrier_union {2232double b_align; /* use worst case alignment */2233char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)];2234kmp_bstate_t bb;2235};22362237typedef union kmp_barrier_union kmp_balign_t;22382239/* Team barrier needs only non-volatile arrived counter */2240union KMP_ALIGN_CACHE kmp_barrier_team_union {2241double b_align; /* use worst case alignment */2242char b_pad[CACHE_LINE];2243struct {2244kmp_uint64 b_arrived; /* STATE => task reached synch point. */2245#if USE_DEBUGGER2246// The following two fields are indended for the debugger solely. Only2247// primary thread of the team accesses these fields: the first one is2248// increased by 1 when the primary thread arrives to a barrier, the second2249// one is increased by one when all the threads arrived.2250kmp_uint b_master_arrived;2251kmp_uint b_team_arrived;2252#endif2253};2254};22552256typedef union kmp_barrier_team_union kmp_balign_team_t;22572258/* Padding for Linux* OS pthreads condition variables and mutexes used to signal2259threads when a condition changes. This is to workaround an NPTL bug where2260padding was added to pthread_cond_t which caused the initialization routine2261to write outside of the structure if compiled on pre-NPTL threads. */2262#if KMP_OS_WINDOWS2263typedef struct kmp_win32_mutex {2264/* The Lock */2265CRITICAL_SECTION cs;2266} kmp_win32_mutex_t;22672268typedef struct kmp_win32_cond {2269/* Count of the number of waiters. */2270int waiters_count_;22712272/* Serialize access to <waiters_count_> */2273kmp_win32_mutex_t waiters_count_lock_;22742275/* Number of threads to release via a <cond_broadcast> or a <cond_signal> */2276int release_count_;22772278/* Keeps track of the current "generation" so that we don't allow */2279/* one thread to steal all the "releases" from the broadcast. */2280int wait_generation_count_;22812282/* A manual-reset event that's used to block and release waiting threads. */2283HANDLE event_;2284} kmp_win32_cond_t;2285#endif22862287#if KMP_OS_UNIX22882289union KMP_ALIGN_CACHE kmp_cond_union {2290double c_align;2291char c_pad[CACHE_LINE];2292pthread_cond_t c_cond;2293};22942295typedef union kmp_cond_union kmp_cond_align_t;22962297union KMP_ALIGN_CACHE kmp_mutex_union {2298double m_align;2299char m_pad[CACHE_LINE];2300pthread_mutex_t m_mutex;2301};23022303typedef union kmp_mutex_union kmp_mutex_align_t;23042305#endif /* KMP_OS_UNIX */23062307typedef struct kmp_desc_base {2308void *ds_stackbase;2309size_t ds_stacksize;2310int ds_stackgrow;2311kmp_thread_t ds_thread;2312volatile int ds_tid;2313int ds_gtid;2314#if KMP_OS_WINDOWS2315volatile int ds_alive;2316DWORD ds_thread_id;2317/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes.2318However, debugger support (libomp_db) cannot work with handles, because they2319uncomparable. For example, debugger requests info about thread with handle h.2320h is valid within debugger process, and meaningless within debugee process.2321Even if h is duped by call to DuplicateHandle(), so the result h' is valid2322within debugee process, but it is a *new* handle which does *not* equal to2323any other handle in debugee... The only way to compare handles is convert2324them to system-wide ids. GetThreadId() function is available only in2325Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available2326on all Windows* OS flavours (including Windows* 95). Thus, we have to get2327thread id by call to GetCurrentThreadId() from within the thread and save it2328to let libomp_db identify threads. */2329#endif /* KMP_OS_WINDOWS */2330} kmp_desc_base_t;23312332typedef union KMP_ALIGN_CACHE kmp_desc {2333double ds_align; /* use worst case alignment */2334char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)];2335kmp_desc_base_t ds;2336} kmp_desc_t;23372338typedef struct kmp_local {2339volatile int this_construct; /* count of single's encountered by thread */2340void *reduce_data;2341#if KMP_USE_BGET2342void *bget_data;2343void *bget_list;2344#if !USE_CMP_XCHG_FOR_BGET2345#ifdef USE_QUEUING_LOCK_FOR_BGET2346kmp_lock_t bget_lock; /* Lock for accessing bget free list */2347#else2348kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be2349// bootstrap lock so we can use it at library2350// shutdown.2351#endif /* USE_LOCK_FOR_BGET */2352#endif /* ! USE_CMP_XCHG_FOR_BGET */2353#endif /* KMP_USE_BGET */23542355PACKED_REDUCTION_METHOD_T2356packed_reduction_method; /* stored by __kmpc_reduce*(), used by2357__kmpc_end_reduce*() */23582359} kmp_local_t;23602361#define KMP_CHECK_UPDATE(a, b) \2362if ((a) != (b)) \2363(a) = (b)2364#define KMP_CHECK_UPDATE_SYNC(a, b) \2365if ((a) != (b)) \2366TCW_SYNC_PTR((a), (b))23672368#define get__blocktime(xteam, xtid) \2369((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime)2370#define get__bt_set(xteam, xtid) \2371((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set)2372#if KMP_USE_MONITOR2373#define get__bt_intervals(xteam, xtid) \2374((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals)2375#endif23762377#define get__dynamic_2(xteam, xtid) \2378((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic)2379#define get__nproc_2(xteam, xtid) \2380((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc)2381#define get__sched_2(xteam, xtid) \2382((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched)23832384#define set__blocktime_team(xteam, xtid, xval) \2385(((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \2386(xval))23872388#if KMP_USE_MONITOR2389#define set__bt_intervals_team(xteam, xtid, xval) \2390(((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \2391(xval))2392#endif23932394#define set__bt_set_team(xteam, xtid, xval) \2395(((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval))23962397#define set__dynamic(xthread, xval) \2398(((xthread)->th.th_current_task->td_icvs.dynamic) = (xval))2399#define get__dynamic(xthread) \2400(((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE))24012402#define set__nproc(xthread, xval) \2403(((xthread)->th.th_current_task->td_icvs.nproc) = (xval))24042405#define set__thread_limit(xthread, xval) \2406(((xthread)->th.th_current_task->td_icvs.thread_limit) = (xval))24072408#define set__max_active_levels(xthread, xval) \2409(((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval))24102411#define get__max_active_levels(xthread) \2412((xthread)->th.th_current_task->td_icvs.max_active_levels)24132414#define set__sched(xthread, xval) \2415(((xthread)->th.th_current_task->td_icvs.sched) = (xval))24162417#define set__proc_bind(xthread, xval) \2418(((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval))2419#define get__proc_bind(xthread) \2420((xthread)->th.th_current_task->td_icvs.proc_bind)24212422// OpenMP tasking data structures24232424typedef enum kmp_tasking_mode {2425tskm_immediate_exec = 0,2426tskm_extra_barrier = 1,2427tskm_task_teams = 2,2428tskm_max = 22429} kmp_tasking_mode_t;24302431extern kmp_tasking_mode_t2432__kmp_tasking_mode; /* determines how/when to execute tasks */2433extern int __kmp_task_stealing_constraint;2434extern int __kmp_enable_task_throttling;2435extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if2436// specified, defaults to 0 otherwise2437// Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise2438extern kmp_int32 __kmp_max_task_priority;2439// Set via KMP_TASKLOOP_MIN_TASKS if specified, defaults to 0 otherwise2440extern kmp_uint64 __kmp_taskloop_min_tasks;24412442/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with2443taskdata first */2444#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1)2445#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1)24462447// The tt_found_tasks flag is a signal to all threads in the team that tasks2448// were spawned and queued since the previous barrier release.2449#define KMP_TASKING_ENABLED(task_team) \2450(TRUE == TCR_SYNC_4((task_team)->tt.tt_found_tasks))2451/*!2452@ingroup BASIC_TYPES2453@{2454*/24552456/*!2457*/2458typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *);24592460typedef union kmp_cmplrdata {2461kmp_int32 priority; /**< priority specified by user for the task */2462kmp_routine_entry_t2463destructors; /* pointer to function to invoke deconstructors of2464firstprivate C++ objects */2465/* future data */2466} kmp_cmplrdata_t;24672468/* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */2469/*!2470*/2471typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */2472void *shareds; /**< pointer to block of pointers to shared vars */2473kmp_routine_entry_t2474routine; /**< pointer to routine to call for executing task */2475kmp_int32 part_id; /**< part id for the task */2476kmp_cmplrdata_t2477data1; /* Two known optional additions: destructors and priority */2478kmp_cmplrdata_t data2; /* Process destructors first, priority second */2479/* future data */2480/* private vars */2481} kmp_task_t;24822483/*!2484@}2485*/24862487typedef struct kmp_taskgroup {2488std::atomic<kmp_int32> count; // number of allocated and incomplete tasks2489std::atomic<kmp_int32>2490cancel_request; // request for cancellation of this taskgroup2491struct kmp_taskgroup *parent; // parent taskgroup2492// Block of data to perform task reduction2493void *reduce_data; // reduction related info2494kmp_int32 reduce_num_data; // number of data items to reduce2495uintptr_t *gomp_data; // gomp reduction data2496} kmp_taskgroup_t;24972498// forward declarations2499typedef union kmp_depnode kmp_depnode_t;2500typedef struct kmp_depnode_list kmp_depnode_list_t;2501typedef struct kmp_dephash_entry kmp_dephash_entry_t;25022503// macros for checking dep flag as an integer2504#define KMP_DEP_IN 0x12505#define KMP_DEP_OUT 0x22506#define KMP_DEP_INOUT 0x32507#define KMP_DEP_MTX 0x42508#define KMP_DEP_SET 0x82509#define KMP_DEP_ALL 0x802510// Compiler sends us this info. Note: some test cases contain an explicit copy2511// of this struct and should be in sync with any changes here.2512typedef struct kmp_depend_info {2513kmp_intptr_t base_addr;2514size_t len;2515union {2516kmp_uint8 flag; // flag as an unsigned char2517struct { // flag as a set of 8 bits2518#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)2519/* Same fields as in the #else branch, but in reverse order */2520unsigned all : 1;2521unsigned unused : 3;2522unsigned set : 1;2523unsigned mtx : 1;2524unsigned out : 1;2525unsigned in : 1;2526#else2527unsigned in : 1;2528unsigned out : 1;2529unsigned mtx : 1;2530unsigned set : 1;2531unsigned unused : 3;2532unsigned all : 1;2533#endif2534} flags;2535};2536} kmp_depend_info_t;25372538// Internal structures to work with task dependencies:2539struct kmp_depnode_list {2540kmp_depnode_t *node;2541kmp_depnode_list_t *next;2542};25432544// Max number of mutexinoutset dependencies per node2545#define MAX_MTX_DEPS 425462547typedef struct kmp_base_depnode {2548kmp_depnode_list_t *successors; /* used under lock */2549kmp_task_t *task; /* non-NULL if depnode is active, used under lock */2550kmp_lock_t *mtx_locks[MAX_MTX_DEPS]; /* lock mutexinoutset dependent tasks */2551kmp_int32 mtx_num_locks; /* number of locks in mtx_locks array */2552kmp_lock_t lock; /* guards shared fields: task, successors */2553#if KMP_SUPPORT_GRAPH_OUTPUT2554kmp_uint32 id;2555#endif2556std::atomic<kmp_int32> npredecessors;2557std::atomic<kmp_int32> nrefs;2558} kmp_base_depnode_t;25592560union KMP_ALIGN_CACHE kmp_depnode {2561double dn_align; /* use worst case alignment */2562char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)];2563kmp_base_depnode_t dn;2564};25652566struct kmp_dephash_entry {2567kmp_intptr_t addr;2568kmp_depnode_t *last_out;2569kmp_depnode_list_t *last_set;2570kmp_depnode_list_t *prev_set;2571kmp_uint8 last_flag;2572kmp_lock_t *mtx_lock; /* is referenced by depnodes w/mutexinoutset dep */2573kmp_dephash_entry_t *next_in_bucket;2574};25752576typedef struct kmp_dephash {2577kmp_dephash_entry_t **buckets;2578size_t size;2579kmp_depnode_t *last_all;2580size_t generation;2581kmp_uint32 nelements;2582kmp_uint32 nconflicts;2583} kmp_dephash_t;25842585typedef struct kmp_task_affinity_info {2586kmp_intptr_t base_addr;2587size_t len;2588struct {2589bool flag1 : 1;2590bool flag2 : 1;2591kmp_int32 reserved : 30;2592} flags;2593} kmp_task_affinity_info_t;25942595typedef enum kmp_event_type_t {2596KMP_EVENT_UNINITIALIZED = 0,2597KMP_EVENT_ALLOW_COMPLETION = 12598} kmp_event_type_t;25992600typedef struct {2601kmp_event_type_t type;2602kmp_tas_lock_t lock;2603union {2604kmp_task_t *task;2605} ed;2606} kmp_event_t;26072608#if OMPX_TASKGRAPH2609// Initial number of allocated nodes while recording2610#define INIT_MAPSIZE 5026112612typedef struct kmp_taskgraph_flags { /*This needs to be exactly 32 bits */2613unsigned nowait : 1;2614unsigned re_record : 1;2615unsigned reserved : 30;2616} kmp_taskgraph_flags_t;26172618/// Represents a TDG node2619typedef struct kmp_node_info {2620kmp_task_t *task; // Pointer to the actual task2621kmp_int32 *successors; // Array of the succesors ids2622kmp_int32 nsuccessors; // Number of succesors of the node2623std::atomic<kmp_int32>2624npredecessors_counter; // Number of predessors on the fly2625kmp_int32 npredecessors; // Total number of predecessors2626kmp_int32 successors_size; // Number of allocated succesors ids2627kmp_taskdata_t *parent_task; // Parent implicit task2628} kmp_node_info_t;26292630/// Represent a TDG's current status2631typedef enum kmp_tdg_status {2632KMP_TDG_NONE = 0,2633KMP_TDG_RECORDING = 1,2634KMP_TDG_READY = 22635} kmp_tdg_status_t;26362637/// Structure that contains a TDG2638typedef struct kmp_tdg_info {2639kmp_int32 tdg_id; // Unique idenfifier of the TDG2640kmp_taskgraph_flags_t tdg_flags; // Flags related to a TDG2641kmp_int32 map_size; // Number of allocated TDG nodes2642kmp_int32 num_roots; // Number of roots tasks int the TDG2643kmp_int32 *root_tasks; // Array of tasks identifiers that are roots2644kmp_node_info_t *record_map; // Array of TDG nodes2645kmp_tdg_status_t tdg_status =2646KMP_TDG_NONE; // Status of the TDG (recording, ready...)2647std::atomic<kmp_int32> num_tasks; // Number of TDG nodes2648kmp_bootstrap_lock_t2649graph_lock; // Protect graph attributes when updated via taskloop_recur2650// Taskloop reduction related2651void *rec_taskred_data; // Data to pass to __kmpc_task_reduction_init or2652// __kmpc_taskred_init2653kmp_int32 rec_num_taskred;2654} kmp_tdg_info_t;26552656extern int __kmp_tdg_dot;2657extern kmp_int32 __kmp_max_tdgs;2658extern kmp_tdg_info_t **__kmp_global_tdgs;2659extern kmp_int32 __kmp_curr_tdg_idx;2660extern kmp_int32 __kmp_successors_size;2661extern std::atomic<kmp_int32> __kmp_tdg_task_id;2662extern kmp_int32 __kmp_num_tdg;2663#endif26642665#ifdef BUILD_TIED_TASK_STACK26662667/* Tied Task stack definitions */2668typedef struct kmp_stack_block {2669kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE];2670struct kmp_stack_block *sb_next;2671struct kmp_stack_block *sb_prev;2672} kmp_stack_block_t;26732674typedef struct kmp_task_stack {2675kmp_stack_block_t ts_first_block; // first block of stack entries2676kmp_taskdata_t **ts_top; // pointer to the top of stack2677kmp_int32 ts_entries; // number of entries on the stack2678} kmp_task_stack_t;26792680#endif // BUILD_TIED_TASK_STACK26812682typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */2683#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)2684/* Same fields as in the #else branch, but in reverse order */2685#if OMPX_TASKGRAPH2686unsigned reserved31 : 5;2687unsigned onced : 1;2688#else2689unsigned reserved31 : 6;2690#endif2691unsigned target : 1;2692unsigned native : 1;2693unsigned freed : 1;2694unsigned complete : 1;2695unsigned executing : 1;2696unsigned started : 1;2697unsigned team_serial : 1;2698unsigned tasking_ser : 1;2699unsigned task_serial : 1;2700unsigned tasktype : 1;2701unsigned reserved : 8;2702unsigned hidden_helper : 1;2703unsigned detachable : 1;2704unsigned priority_specified : 1;2705unsigned proxy : 1;2706unsigned destructors_thunk : 1;2707unsigned merged_if0 : 1;2708unsigned final : 1;2709unsigned tiedness : 1;2710#else2711/* Compiler flags */ /* Total compiler flags must be 16 bits */2712unsigned tiedness : 1; /* task is either tied (1) or untied (0) */2713unsigned final : 1; /* task is final(1) so execute immediately */2714unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if02715code path */2716unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to2717invoke destructors from the runtime */2718unsigned proxy : 1; /* task is a proxy task (it will be executed outside the2719context of the RTL) */2720unsigned priority_specified : 1; /* set if the compiler provides priority2721setting for the task */2722unsigned detachable : 1; /* 1 == can detach */2723unsigned hidden_helper : 1; /* 1 == hidden helper task */2724unsigned reserved : 8; /* reserved for compiler use */27252726/* Library flags */ /* Total library flags must be 16 bits */2727unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */2728unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)2729unsigned tasking_ser : 1; // all tasks in team are either executed immediately2730// (1) or may be deferred (0)2731unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel2732// (0) [>= 2 threads]2733/* If either team_serial or tasking_ser is set, task team may be NULL */2734/* Task State Flags: */2735unsigned started : 1; /* 1==started, 0==not started */2736unsigned executing : 1; /* 1==executing, 0==not executing */2737unsigned complete : 1; /* 1==complete, 0==not complete */2738unsigned freed : 1; /* 1==freed, 0==allocated */2739unsigned native : 1; /* 1==gcc-compiled task, 0==intel */2740unsigned target : 1;2741#if OMPX_TASKGRAPH2742unsigned onced : 1; /* 1==ran once already, 0==never ran, record & replay purposes */2743unsigned reserved31 : 5; /* reserved for library use */2744#else2745unsigned reserved31 : 6; /* reserved for library use */2746#endif2747#endif2748} kmp_tasking_flags_t;27492750typedef struct kmp_target_data {2751void *async_handle; // libomptarget async handle for task completion query2752} kmp_target_data_t;27532754struct kmp_taskdata { /* aligned during dynamic allocation */2755kmp_int32 td_task_id; /* id, assigned by debugger */2756kmp_tasking_flags_t td_flags; /* task flags */2757kmp_team_t *td_team; /* team for this task */2758kmp_info_p *td_alloc_thread; /* thread that allocated data structures */2759/* Currently not used except for perhaps IDB */2760kmp_taskdata_t *td_parent; /* parent task */2761kmp_int32 td_level; /* task nesting level */2762std::atomic<kmp_int32> td_untied_count; // untied task active parts counter2763ident_t *td_ident; /* task identifier */2764// Taskwait data.2765ident_t *td_taskwait_ident;2766kmp_uint32 td_taskwait_counter;2767kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */2768KMP_ALIGN_CACHE kmp_internal_control_t2769td_icvs; /* Internal control variables for the task */2770KMP_ALIGN_CACHE std::atomic<kmp_int32>2771td_allocated_child_tasks; /* Child tasks (+ current task) not yet2772deallocated */2773std::atomic<kmp_int32>2774td_incomplete_child_tasks; /* Child tasks not yet complete */2775kmp_taskgroup_t2776*td_taskgroup; // Each task keeps pointer to its current taskgroup2777kmp_dephash_t2778*td_dephash; // Dependencies for children tasks are tracked from here2779kmp_depnode_t2780*td_depnode; // Pointer to graph node if this task has dependencies2781kmp_task_team_t *td_task_team;2782size_t td_size_alloc; // Size of task structure, including shareds etc.2783#if defined(KMP_GOMP_COMPAT)2784// 4 or 8 byte integers for the loop bounds in GOMP_taskloop2785kmp_int32 td_size_loop_bounds;2786#endif2787kmp_taskdata_t *td_last_tied; // keep tied task for task scheduling constraint2788#if defined(KMP_GOMP_COMPAT)2789// GOMP sends in a copy function for copy constructors2790void (*td_copy_func)(void *, void *);2791#endif2792kmp_event_t td_allow_completion_event;2793#if OMPT_SUPPORT2794ompt_task_info_t ompt_task_info;2795#endif2796#if OMPX_TASKGRAPH2797bool is_taskgraph = 0; // whether the task is within a TDG2798kmp_tdg_info_t *tdg; // used to associate task with a TDG2799#endif2800kmp_target_data_t td_target_data;2801}; // struct kmp_taskdata28022803// Make sure padding above worked2804KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0);28052806// Data for task team but per thread2807typedef struct kmp_base_thread_data {2808kmp_info_p *td_thr; // Pointer back to thread info2809// Used only in __kmp_execute_tasks_template, maybe not avail until task is2810// queued?2811kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque2812kmp_taskdata_t *2813*td_deque; // Deque of tasks encountered by td_thr, dynamically allocated2814kmp_int32 td_deque_size; // Size of deck2815kmp_uint32 td_deque_head; // Head of deque (will wrap)2816kmp_uint32 td_deque_tail; // Tail of deque (will wrap)2817kmp_int32 td_deque_ntasks; // Number of tasks in deque2818// GEH: shouldn't this be volatile since used in while-spin?2819kmp_int32 td_deque_last_stolen; // Thread number of last successful steal2820#ifdef BUILD_TIED_TASK_STACK2821kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task2822// scheduling constraint2823#endif // BUILD_TIED_TASK_STACK2824} kmp_base_thread_data_t;28252826#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE2827#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS)28282829#define TASK_DEQUE_SIZE(td) ((td).td_deque_size)2830#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1)28312832typedef union KMP_ALIGN_CACHE kmp_thread_data {2833kmp_base_thread_data_t td;2834double td_align; /* use worst case alignment */2835char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)];2836} kmp_thread_data_t;28372838typedef struct kmp_task_pri {2839kmp_thread_data_t td;2840kmp_int32 priority;2841kmp_task_pri *next;2842} kmp_task_pri_t;28432844// Data for task teams which are used when tasking is enabled for the team2845typedef struct kmp_base_task_team {2846kmp_bootstrap_lock_t2847tt_threads_lock; /* Lock used to allocate per-thread part of task team */2848/* must be bootstrap lock since used at library shutdown*/28492850// TODO: check performance vs kmp_tas_lock_t2851kmp_bootstrap_lock_t tt_task_pri_lock; /* Lock to access priority tasks */2852kmp_task_pri_t *tt_task_pri_list;28532854kmp_task_team_t *tt_next; /* For linking the task team free list */2855kmp_thread_data_t2856*tt_threads_data; /* Array of per-thread structures for task team */2857/* Data survives task team deallocation */2858kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while2859executing this team? */2860/* TRUE means tt_threads_data is set up and initialized */2861kmp_int32 tt_nproc; /* #threads in team */2862kmp_int32 tt_max_threads; // # entries allocated for threads_data array2863kmp_int32 tt_found_proxy_tasks; // found proxy tasks since last barrier2864kmp_int32 tt_untied_task_encountered;2865std::atomic<kmp_int32> tt_num_task_pri; // number of priority tasks enqueued2866// There is hidden helper thread encountered in this task team so that we must2867// wait when waiting on task team2868kmp_int32 tt_hidden_helper_task_encountered;28692870KMP_ALIGN_CACHE2871std::atomic<kmp_int32> tt_unfinished_threads; /* #threads still active */28722873KMP_ALIGN_CACHE2874volatile kmp_uint322875tt_active; /* is the team still actively executing tasks */2876} kmp_base_task_team_t;28772878union KMP_ALIGN_CACHE kmp_task_team {2879kmp_base_task_team_t tt;2880double tt_align; /* use worst case alignment */2881char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)];2882};28832884typedef struct kmp_task_team_list_t {2885kmp_task_team_t *task_team;2886kmp_task_team_list_t *next;2887} kmp_task_team_list_t;28882889#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)2890// Free lists keep same-size free memory slots for fast memory allocation2891// routines2892typedef struct kmp_free_list {2893void *th_free_list_self; // Self-allocated tasks free list2894void *th_free_list_sync; // Self-allocated tasks stolen/returned by other2895// threads2896void *th_free_list_other; // Non-self free list (to be returned to owner's2897// sync list)2898} kmp_free_list_t;2899#endif2900#if KMP_NESTED_HOT_TEAMS2901// Hot teams array keeps hot teams and their sizes for given thread. Hot teams2902// are not put in teams pool, and they don't put threads in threads pool.2903typedef struct kmp_hot_team_ptr {2904kmp_team_p *hot_team; // pointer to hot_team of given nesting level2905kmp_int32 hot_team_nth; // number of threads allocated for the hot_team2906} kmp_hot_team_ptr_t;2907#endif2908typedef struct kmp_teams_size {2909kmp_int32 nteams; // number of teams in a league2910kmp_int32 nth; // number of threads in each team of the league2911} kmp_teams_size_t;29122913// This struct stores a thread that acts as a "root" for a contention2914// group. Contention groups are rooted at kmp_root threads, but also at2915// each primary thread of each team created in the teams construct.2916// This struct therefore also stores a thread_limit associated with2917// that contention group, and a counter to track the number of threads2918// active in that contention group. Each thread has a list of these: CG2919// root threads have an entry in their list in which cg_root refers to2920// the thread itself, whereas other workers in the CG will have a2921// single entry where cg_root is same as the entry containing their CG2922// root. When a thread encounters a teams construct, it will add a new2923// entry to the front of its list, because it now roots a new CG.2924typedef struct kmp_cg_root {2925kmp_info_p *cg_root; // "root" thread for a contention group2926// The CG root's limit comes from OMP_THREAD_LIMIT for root threads, or2927// thread_limit clause for teams primary threads2928kmp_int32 cg_thread_limit;2929kmp_int32 cg_nthreads; // Count of active threads in CG rooted at cg_root2930struct kmp_cg_root *up; // pointer to higher level CG root in list2931} kmp_cg_root_t;29322933// OpenMP thread data structures29342935typedef struct KMP_ALIGN_CACHE kmp_base_info {2936/* Start with the readonly data which is cache aligned and padded. This is2937written before the thread starts working by the primary thread. Uber2938masters may update themselves later. Usage does not consider serialized2939regions. */2940kmp_desc_t th_info;2941kmp_team_p *th_team; /* team we belong to */2942kmp_root_p *th_root; /* pointer to root of task hierarchy */2943kmp_info_p *th_next_pool; /* next available thread in the pool */2944kmp_disp_t *th_dispatch; /* thread's dispatch data */2945int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */29462947/* The following are cached from the team info structure */2948/* TODO use these in more places as determined to be needed via profiling */2949int th_team_nproc; /* number of threads in a team */2950kmp_info_p *th_team_master; /* the team's primary thread */2951int th_team_serialized; /* team is serialized */2952microtask_t th_teams_microtask; /* save entry address for teams construct */2953int th_teams_level; /* save initial level of teams construct */2954/* it is 0 on device but may be any on host */29552956/* The blocktime info is copied from the team struct to the thread struct */2957/* at the start of a barrier, and the values stored in the team are used */2958/* at points in the code where the team struct is no longer guaranteed */2959/* to exist (from the POV of worker threads). */2960#if KMP_USE_MONITOR2961int th_team_bt_intervals;2962int th_team_bt_set;2963#else2964kmp_uint64 th_team_bt_intervals;2965#endif29662967#if KMP_AFFINITY_SUPPORTED2968kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */2969kmp_affinity_ids_t th_topology_ids; /* thread's current topology ids */2970kmp_affinity_attrs_t th_topology_attrs; /* thread's current topology attrs */2971#endif2972omp_allocator_handle_t th_def_allocator; /* default allocator */2973/* The data set by the primary thread at reinit, then R/W by the worker */2974KMP_ALIGN_CACHE int2975th_set_nproc; /* if > 0, then only use this request for the next fork */2976int *th_set_nested_nth;2977bool th_nt_strict; // num_threads clause has strict modifier2978ident_t *th_nt_loc; // loc for strict modifier2979int th_nt_sev; // error severity for strict modifier2980const char *th_nt_msg; // error message for strict modifier2981int th_set_nested_nth_sz;2982#if KMP_NESTED_HOT_TEAMS2983kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */2984#endif2985kmp_proc_bind_t2986th_set_proc_bind; /* if != proc_bind_default, use request for next fork */2987kmp_teams_size_t2988th_teams_size; /* number of teams/threads in teams construct */2989#if KMP_AFFINITY_SUPPORTED2990int th_current_place; /* place currently bound to */2991int th_new_place; /* place to bind to in par reg */2992int th_first_place; /* first place in partition */2993int th_last_place; /* last place in partition */2994#endif2995int th_prev_level; /* previous level for affinity format */2996int th_prev_num_threads; /* previous num_threads for affinity format */2997#if USE_ITT_BUILD2998kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */2999kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */3000kmp_uint64 th_frame_time; /* frame timestamp */3001#endif /* USE_ITT_BUILD */3002kmp_local_t th_local;3003struct private_common *th_pri_head;30043005/* Now the data only used by the worker (after initial allocation) */3006/* TODO the first serial team should actually be stored in the info_t3007structure. this will help reduce initial allocation overhead */3008KMP_ALIGN_CACHE kmp_team_p3009*th_serial_team; /*serialized team held in reserve*/30103011#if OMPT_SUPPORT3012ompt_thread_info_t ompt_thread_info;3013#endif30143015/* The following are also read by the primary thread during reinit */3016struct common_table *th_pri_common;30173018volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */3019/* while awaiting queuing lock acquire */30203021volatile void *th_sleep_loc; // this points at a kmp_flag<T>3022flag_type th_sleep_loc_type; // enum type of flag stored in th_sleep_loc30233024ident_t *th_ident;3025unsigned th_x; // Random number generator data3026unsigned th_a; // Random number generator data30273028/* Tasking-related data for the thread */3029kmp_task_team_t *th_task_team; // Task team struct3030kmp_taskdata_t *th_current_task; // Innermost Task being executed3031kmp_uint8 th_task_state; // alternating 0/1 for task team identification3032kmp_uint32 th_reap_state; // Non-zero indicates thread is not3033// tasking, thus safe to reap30343035/* More stuff for keeping track of active/sleeping threads (this part is3036written by the worker thread) */3037kmp_uint8 th_active_in_pool; // included in count of #active threads in pool3038int th_active; // ! sleeping; 32 bits for TCR/TCW3039std::atomic<kmp_uint32> th_used_in_team; // Flag indicating use in team3040// 0 = not used in team; 1 = used in team;3041// 2 = transitioning to not used in team; 3 = transitioning to used in team3042struct cons_header *th_cons; // used for consistency check3043#if KMP_USE_HIER_SCHED3044// used for hierarchical scheduling3045kmp_hier_private_bdata_t *th_hier_bar_data;3046#endif30473048/* Add the syncronizing data which is cache aligned and padded. */3049KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier];30503051KMP_ALIGN_CACHE volatile kmp_int323052th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */30533054#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5)3055#define NUM_LISTS 43056kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory3057// allocation routines3058#endif30593060#if KMP_OS_WINDOWS3061kmp_win32_cond_t th_suspend_cv;3062kmp_win32_mutex_t th_suspend_mx;3063std::atomic<int> th_suspend_init;3064#endif3065#if KMP_OS_UNIX3066kmp_cond_align_t th_suspend_cv;3067kmp_mutex_align_t th_suspend_mx;3068std::atomic<int> th_suspend_init_count;3069#endif30703071#if USE_ITT_BUILD3072kmp_itt_mark_t th_itt_mark_single;3073// alignment ???3074#endif /* USE_ITT_BUILD */3075#if KMP_STATS_ENABLED3076kmp_stats_list *th_stats;3077#endif3078#if KMP_OS_UNIX3079std::atomic<bool> th_blocking;3080#endif3081kmp_cg_root_t *th_cg_roots; // list of cg_roots associated with this thread3082} kmp_base_info_t;30833084typedef union KMP_ALIGN_CACHE kmp_info {3085double th_align; /* use worst case alignment */3086char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)];3087kmp_base_info_t th;3088} kmp_info_t;30893090// OpenMP thread team data structures30913092typedef struct kmp_base_data {3093volatile kmp_uint32 t_value;3094} kmp_base_data_t;30953096typedef union KMP_ALIGN_CACHE kmp_sleep_team {3097double dt_align; /* use worst case alignment */3098char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];3099kmp_base_data_t dt;3100} kmp_sleep_team_t;31013102typedef union KMP_ALIGN_CACHE kmp_ordered_team {3103double dt_align; /* use worst case alignment */3104char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];3105kmp_base_data_t dt;3106} kmp_ordered_team_t;31073108typedef int (*launch_t)(int gtid);31093110/* Minimum number of ARGV entries to malloc if necessary */3111#define KMP_MIN_MALLOC_ARGV_ENTRIES 10031123113// Set up how many argv pointers will fit in cache lines containing3114// t_inline_argv. Historically, we have supported at least 96 bytes. Using a3115// larger value for more space between the primary write/worker read section and3116// read/write by all section seems to buy more performance on EPCC PARALLEL.3117#if KMP_ARCH_X86 || KMP_ARCH_X86_643118#define KMP_INLINE_ARGV_BYTES \3119(4 * CACHE_LINE - \3120((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \3121sizeof(kmp_int16) + sizeof(kmp_uint32)) % \3122CACHE_LINE))3123#else3124#define KMP_INLINE_ARGV_BYTES \3125(2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE))3126#endif3127#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP)31283129typedef struct KMP_ALIGN_CACHE kmp_base_team {3130// Synchronization Data3131// ---------------------------------------------------------------------------3132KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered;3133kmp_balign_team_t t_bar[bs_last_barrier];3134std::atomic<int> t_construct; // count of single directive encountered by team3135char pad[sizeof(kmp_lock_t)]; // padding to maintain performance on big iron31363137// [0] - parallel / [1] - worksharing task reduction data shared by taskgroups3138std::atomic<void *> t_tg_reduce_data[2]; // to support task modifier3139std::atomic<int> t_tg_fini_counter[2]; // sync end of task reductions31403141// Primary thread only3142// ---------------------------------------------------------------------------3143KMP_ALIGN_CACHE int t_master_tid; // tid of primary thread in parent team3144int t_master_this_cons; // "this_construct" single counter of primary thread3145// in parent team3146ident_t *t_ident; // if volatile, have to change too much other crud to3147// volatile too3148kmp_team_p *t_parent; // parent team3149kmp_team_p *t_next_pool; // next free team in the team pool3150kmp_disp_t *t_dispatch; // thread's dispatch data3151kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 23152kmp_proc_bind_t t_proc_bind; // bind type for par region3153int t_primary_task_state; // primary thread's task state saved3154#if USE_ITT_BUILD3155kmp_uint64 t_region_time; // region begin timestamp3156#endif /* USE_ITT_BUILD */31573158// Primary thread write, workers read3159// --------------------------------------------------------------------------3160KMP_ALIGN_CACHE void **t_argv;3161int t_argc;3162int t_nproc; // number of threads in team3163microtask_t t_pkfn;3164launch_t t_invoke; // procedure to launch the microtask31653166#if OMPT_SUPPORT3167ompt_team_info_t ompt_team_info;3168ompt_lw_taskteam_t *ompt_serialized_team_info;3169#endif31703171#if KMP_ARCH_X86 || KMP_ARCH_X86_643172kmp_int8 t_fp_control_saved;3173kmp_int8 t_pad2b;3174kmp_int16 t_x87_fpu_control_word; // FP control regs3175kmp_uint32 t_mxcsr;3176#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */31773178void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES];31793180KMP_ALIGN_CACHE kmp_info_t **t_threads;3181kmp_taskdata_t3182*t_implicit_task_taskdata; // Taskdata for the thread's implicit task3183int t_level; // nested parallel level31843185KMP_ALIGN_CACHE int t_max_argc;3186int t_max_nproc; // max threads this team can handle (dynamically expandable)3187int t_serialized; // levels deep of serialized teams3188dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system3189int t_id; // team's id, assigned by debugger.3190int t_active_level; // nested active parallel level3191kmp_r_sched_t t_sched; // run-time schedule for the team3192#if KMP_AFFINITY_SUPPORTED3193int t_first_place; // first & last place in parent thread's partition.3194int t_last_place; // Restore these values to primary thread after par region.3195#endif // KMP_AFFINITY_SUPPORTED3196int t_display_affinity;3197int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via3198// omp_set_num_threads() call3199omp_allocator_handle_t t_def_allocator; /* default allocator */32003201// Read/write by workers as well3202#if (KMP_ARCH_X86 || KMP_ARCH_X86_64)3203// Using CACHE_LINE=64 reduces memory footprint, but causes a big perf3204// regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra3205// padding serves to fix the performance of epcc 'parallel' and 'barrier' when3206// CACHE_LINE=64. TODO: investigate more and get rid if this padding.3207char dummy_padding[1024];3208#endif3209// Internal control stack for additional nested teams.3210KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top;3211// for SERIALIZED teams nested 2 or more levels deep3212// typed flag to store request state of cancellation3213std::atomic<kmp_int32> t_cancel_request;3214int t_master_active; // save on fork, restore on join3215void *t_copypriv_data; // team specific pointer to copyprivate data array3216#if KMP_OS_WINDOWS3217std::atomic<kmp_uint32> t_copyin_counter;3218#endif3219#if USE_ITT_BUILD3220void *t_stack_id; // team specific stack stitching id (for ittnotify)3221#endif /* USE_ITT_BUILD */3222distributedBarrier *b; // Distributed barrier data associated with team3223kmp_nested_nthreads_t *t_nested_nth;3224} kmp_base_team_t;32253226// Assert that the list structure fits and aligns within3227// the double task team pointer3228KMP_BUILD_ASSERT(sizeof(kmp_task_team_t *[2]) == sizeof(kmp_task_team_list_t));3229KMP_BUILD_ASSERT(alignof(kmp_task_team_t *[2]) ==3230alignof(kmp_task_team_list_t));32313232union KMP_ALIGN_CACHE kmp_team {3233kmp_base_team_t t;3234double t_align; /* use worst case alignment */3235char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)];3236};32373238typedef union KMP_ALIGN_CACHE kmp_time_global {3239double dt_align; /* use worst case alignment */3240char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)];3241kmp_base_data_t dt;3242} kmp_time_global_t;32433244typedef struct kmp_base_global {3245/* cache-aligned */3246kmp_time_global_t g_time;32473248/* non cache-aligned */3249volatile int g_abort;3250volatile int g_done;32513252int g_dynamic;3253enum dynamic_mode g_dynamic_mode;3254} kmp_base_global_t;32553256typedef union KMP_ALIGN_CACHE kmp_global {3257kmp_base_global_t g;3258double g_align; /* use worst case alignment */3259char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)];3260} kmp_global_t;32613262typedef struct kmp_base_root {3263// TODO: GEH - combine r_active with r_in_parallel then r_active ==3264// (r_in_parallel>= 0)3265// TODO: GEH - then replace r_active with t_active_levels if we can to reduce3266// the synch overhead or keeping r_active3267volatile int r_active; /* TRUE if some region in a nest has > 1 thread */3268// keeps a count of active parallel regions per root3269std::atomic<int> r_in_parallel;3270// GEH: This is misnamed, should be r_active_levels3271kmp_team_t *r_root_team;3272kmp_team_t *r_hot_team;3273kmp_info_t *r_uber_thread;3274kmp_lock_t r_begin_lock;3275volatile int r_begin;3276int r_blocktime; /* blocktime for this root and descendants */3277#if KMP_AFFINITY_SUPPORTED3278int r_affinity_assigned;3279#endif // KMP_AFFINITY_SUPPORTED3280} kmp_base_root_t;32813282typedef union KMP_ALIGN_CACHE kmp_root {3283kmp_base_root_t r;3284double r_align; /* use worst case alignment */3285char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)];3286} kmp_root_t;32873288struct fortran_inx_info {3289kmp_int32 data;3290};32913292// This list type exists to hold old __kmp_threads arrays so that3293// old references to them may complete while reallocation takes place when3294// expanding the array. The items in this list are kept alive until library3295// shutdown.3296typedef struct kmp_old_threads_list_t {3297kmp_info_t **threads;3298struct kmp_old_threads_list_t *next;3299} kmp_old_threads_list_t;33003301/* ------------------------------------------------------------------------ */33023303extern int __kmp_settings;3304extern int __kmp_duplicate_library_ok;3305#if USE_ITT_BUILD3306extern int __kmp_forkjoin_frames;3307extern int __kmp_forkjoin_frames_mode;3308#endif3309extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method;3310extern int __kmp_determ_red;33113312#ifdef KMP_DEBUG3313extern int kmp_a_debug;3314extern int kmp_b_debug;3315extern int kmp_c_debug;3316extern int kmp_d_debug;3317extern int kmp_e_debug;3318extern int kmp_f_debug;3319#endif /* KMP_DEBUG */33203321/* For debug information logging using rotating buffer */3322#define KMP_DEBUG_BUF_LINES_INIT 5123323#define KMP_DEBUG_BUF_LINES_MIN 133243325#define KMP_DEBUG_BUF_CHARS_INIT 1283326#define KMP_DEBUG_BUF_CHARS_MIN 233273328extern int3329__kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */3330extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */3331extern int3332__kmp_debug_buf_chars; /* How many characters allowed per line in buffer */3333extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer3334entry pointer */33353336extern char *__kmp_debug_buffer; /* Debug buffer itself */3337extern std::atomic<int> __kmp_debug_count; /* Counter for number of lines3338printed in buffer so far */3339extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase3340recommended in warnings */3341/* end rotating debug buffer */33423343#ifdef KMP_DEBUG3344extern int __kmp_par_range; /* +1 => only go par for constructs in range */33453346#define KMP_PAR_RANGE_ROUTINE_LEN 10243347extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN];3348#define KMP_PAR_RANGE_FILENAME_LEN 10243349extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN];3350extern int __kmp_par_range_lb;3351extern int __kmp_par_range_ub;3352#endif33533354/* For printing out dynamic storage map for threads and teams */3355extern int3356__kmp_storage_map; /* True means print storage map for threads and teams */3357extern int __kmp_storage_map_verbose; /* True means storage map includes3358placement info */3359extern int __kmp_storage_map_verbose_specified;33603361#if KMP_ARCH_X86 || KMP_ARCH_X86_643362extern kmp_cpuinfo_t __kmp_cpuinfo;3363static inline bool __kmp_is_hybrid_cpu() { return __kmp_cpuinfo.flags.hybrid; }3364#elif KMP_OS_DARWIN && KMP_ARCH_AARCH643365static inline bool __kmp_is_hybrid_cpu() { return true; }3366#else3367static inline bool __kmp_is_hybrid_cpu() { return false; }3368#endif33693370extern volatile int __kmp_init_serial;3371extern volatile int __kmp_init_gtid;3372extern volatile int __kmp_init_common;3373extern volatile int __kmp_need_register_serial;3374extern volatile int __kmp_init_middle;3375extern volatile int __kmp_init_parallel;3376#if KMP_USE_MONITOR3377extern volatile int __kmp_init_monitor;3378#endif3379extern volatile int __kmp_init_user_locks;3380extern volatile int __kmp_init_hidden_helper_threads;3381extern int __kmp_init_counter;3382extern int __kmp_root_counter;3383extern int __kmp_version;33843385/* list of address of allocated caches for commons */3386extern kmp_cached_addr_t *__kmp_threadpriv_cache_list;33873388/* Barrier algorithm types and options */3389extern kmp_uint32 __kmp_barrier_gather_bb_dflt;3390extern kmp_uint32 __kmp_barrier_release_bb_dflt;3391extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt;3392extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt;3393extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier];3394extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier];3395extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier];3396extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier];3397extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier];3398extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier];3399extern char const *__kmp_barrier_type_name[bs_last_barrier];3400extern char const *__kmp_barrier_pattern_name[bp_last_bar];34013402/* Global Locks */3403extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */3404extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */3405extern kmp_bootstrap_lock_t __kmp_task_team_lock;3406extern kmp_bootstrap_lock_t3407__kmp_exit_lock; /* exit() is not always thread-safe */3408#if KMP_USE_MONITOR3409extern kmp_bootstrap_lock_t3410__kmp_monitor_lock; /* control monitor thread creation */3411#endif3412extern kmp_bootstrap_lock_t3413__kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and3414__kmp_threads expansion to co-exist */34153416extern kmp_lock_t __kmp_global_lock; /* control OS/global access */3417extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */3418extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */34193420extern enum library_type __kmp_library;34213422extern enum sched_type __kmp_sched; /* default runtime scheduling */3423extern enum sched_type __kmp_static; /* default static scheduling method */3424extern enum sched_type __kmp_guided; /* default guided scheduling method */3425extern enum sched_type __kmp_auto; /* default auto scheduling method */3426extern int __kmp_chunk; /* default runtime chunk size */3427extern int __kmp_force_monotonic; /* whether monotonic scheduling forced */34283429extern size_t __kmp_stksize; /* stack size per thread */3430#if KMP_USE_MONITOR3431extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */3432#endif3433extern size_t __kmp_stkoffset; /* stack offset per thread */3434extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */34353436extern size_t3437__kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */3438extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */3439extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */3440extern int __kmp_env_checks; /* was KMP_CHECKS specified? */3441extern int __kmp_env_consistency_check; // was KMP_CONSISTENCY_CHECK specified?3442extern int __kmp_generate_warnings; /* should we issue warnings? */3443extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */34443445#ifdef DEBUG_SUSPEND3446extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */3447#endif34483449extern kmp_int32 __kmp_use_yield;3450extern kmp_int32 __kmp_use_yield_exp_set;3451extern kmp_uint32 __kmp_yield_init;3452extern kmp_uint32 __kmp_yield_next;3453extern kmp_uint64 __kmp_pause_init;34543455/* ------------------------------------------------------------------------- */3456extern int __kmp_allThreadsSpecified;34573458extern size_t __kmp_align_alloc;3459/* following data protected by initialization routines */3460extern int __kmp_xproc; /* number of processors in the system */3461extern int __kmp_avail_proc; /* number of processors available to the process */3462extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */3463extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */3464// maximum total number of concurrently-existing threads on device3465extern int __kmp_max_nth;3466// maximum total number of concurrently-existing threads in a contention group3467extern int __kmp_cg_max_nth;3468extern int __kmp_task_max_nth; // max threads used in a task3469extern int __kmp_teams_max_nth; // max threads used in a teams construct3470extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and3471__kmp_root */3472extern int __kmp_dflt_team_nth; /* default number of threads in a parallel3473region a la OMP_NUM_THREADS */3474extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial3475initialization */3476extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is3477used (fixed) */3478extern int __kmp_tp_cached; /* whether threadprivate cache has been created3479(__kmpc_threadprivate_cached()) */3480extern int __kmp_dflt_blocktime; /* number of microseconds to wait before3481blocking (env setting) */3482extern char __kmp_blocktime_units; /* 'm' or 'u' to note units specified */3483extern bool __kmp_wpolicy_passive; /* explicitly set passive wait policy */34843485// Convert raw blocktime from ms to us if needed.3486static inline void __kmp_aux_convert_blocktime(int *bt) {3487if (__kmp_blocktime_units == 'm') {3488if (*bt > INT_MAX / 1000) {3489*bt = INT_MAX / 1000;3490KMP_INFORM(MaxValueUsing, "kmp_set_blocktime(ms)", bt);3491}3492*bt = *bt * 1000;3493}3494}34953496#if KMP_USE_MONITOR3497extern int3498__kmp_monitor_wakeups; /* number of times monitor wakes up per second */3499extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before3500blocking */3501#endif3502#ifdef KMP_ADJUST_BLOCKTIME3503extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */3504#endif /* KMP_ADJUST_BLOCKTIME */3505#ifdef KMP_DFLT_NTH_CORES3506extern int __kmp_ncores; /* Total number of cores for threads placement */3507#endif3508/* Number of millisecs to delay on abort for Intel(R) VTune(TM) tools */3509extern int __kmp_abort_delay;35103511extern int __kmp_need_register_atfork_specified;3512extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork3513to install fork handler */3514extern int __kmp_gtid_mode; /* Method of getting gtid, values:35150 - not set, will be set at runtime35161 - using stack search35172 - dynamic TLS (pthread_getspecific(Linux* OS/OS3518X*) or TlsGetValue(Windows* OS))35193 - static TLS (__declspec(thread) __kmp_gtid),3520Linux* OS .so only. */3521extern int3522__kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */3523#ifdef KMP_TDATA_GTID3524extern KMP_THREAD_LOCAL int __kmp_gtid;3525#endif3526extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */3527extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread3528#if KMP_ARCH_X86 || KMP_ARCH_X86_643529extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork3530extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg3531extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */3532#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */35333534// max_active_levels for nested parallelism enabled by default via3535// OMP_MAX_ACTIVE_LEVELS, OMP_NESTED, OMP_NUM_THREADS, and OMP_PROC_BIND3536extern int __kmp_dflt_max_active_levels;3537// Indicates whether value of __kmp_dflt_max_active_levels was already3538// explicitly set by OMP_MAX_ACTIVE_LEVELS or OMP_NESTED=false3539extern bool __kmp_dflt_max_active_levels_set;3540extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in3541concurrent execution per team */3542#if KMP_NESTED_HOT_TEAMS3543extern int __kmp_hot_teams_mode;3544extern int __kmp_hot_teams_max_level;3545#endif35463547#if KMP_OS_LINUX3548extern enum clock_function_type __kmp_clock_function;3549extern int __kmp_clock_function_param;3550#endif /* KMP_OS_LINUX */35513552#if KMP_MIC_SUPPORTED3553extern enum mic_type __kmp_mic_type;3554#endif35553556#ifdef USE_LOAD_BALANCE3557extern double __kmp_load_balance_interval; // load balance algorithm interval3558#endif /* USE_LOAD_BALANCE */35593560#if KMP_USE_ADAPTIVE_LOCKS35613562// Parameters for the speculative lock backoff system.3563struct kmp_adaptive_backoff_params_t {3564// Number of soft retries before it counts as a hard retry.3565kmp_uint32 max_soft_retries;3566// Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to3567// the right3568kmp_uint32 max_badness;3569};35703571extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params;35723573#if KMP_DEBUG_ADAPTIVE_LOCKS3574extern const char *__kmp_speculative_statsfile;3575#endif35763577#endif // KMP_USE_ADAPTIVE_LOCKS35783579extern int __kmp_display_env; /* TRUE or FALSE */3580extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */3581extern int __kmp_omp_cancellation; /* TRUE or FALSE */3582extern int __kmp_nteams;3583extern int __kmp_teams_thread_limit;35843585/* ------------------------------------------------------------------------- */35863587/* the following are protected by the fork/join lock */3588/* write: lock read: anytime */3589extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */3590/* Holds old arrays of __kmp_threads until library shutdown */3591extern kmp_old_threads_list_t *__kmp_old_threads_list;3592/* read/write: lock */3593extern volatile kmp_team_t *__kmp_team_pool;3594extern volatile kmp_info_t *__kmp_thread_pool;3595extern kmp_info_t *__kmp_thread_pool_insert_pt;35963597// total num threads reachable from some root thread including all root threads3598extern volatile int __kmp_nth;3599/* total number of threads reachable from some root thread including all root3600threads, and those in the thread pool */3601extern volatile int __kmp_all_nth;3602extern std::atomic<int> __kmp_thread_pool_active_nth;36033604extern kmp_root_t **__kmp_root; /* root of thread hierarchy */3605/* end data protected by fork/join lock */3606/* ------------------------------------------------------------------------- */36073608#define __kmp_get_gtid() __kmp_get_global_thread_id()3609#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg()3610#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid()))3611#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team)3612#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid()))36133614// AT: Which way is correct?3615// AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc;3616// AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc;3617#define __kmp_get_team_num_threads(gtid) \3618(__kmp_threads[(gtid)]->th.th_team->t.t_nproc)36193620static inline bool KMP_UBER_GTID(int gtid) {3621KMP_DEBUG_ASSERT(gtid >= KMP_GTID_MIN);3622KMP_DEBUG_ASSERT(gtid < __kmp_threads_capacity);3623return (gtid >= 0 && __kmp_root[gtid] && __kmp_threads[gtid] &&3624__kmp_threads[gtid] == __kmp_root[gtid]->r.r_uber_thread);3625}36263627static inline int __kmp_tid_from_gtid(int gtid) {3628KMP_DEBUG_ASSERT(gtid >= 0);3629return __kmp_threads[gtid]->th.th_info.ds.ds_tid;3630}36313632static inline int __kmp_gtid_from_tid(int tid, const kmp_team_t *team) {3633KMP_DEBUG_ASSERT(tid >= 0 && team);3634return team->t.t_threads[tid]->th.th_info.ds.ds_gtid;3635}36363637static inline int __kmp_gtid_from_thread(const kmp_info_t *thr) {3638KMP_DEBUG_ASSERT(thr);3639return thr->th.th_info.ds.ds_gtid;3640}36413642static inline kmp_info_t *__kmp_thread_from_gtid(int gtid) {3643KMP_DEBUG_ASSERT(gtid >= 0);3644return __kmp_threads[gtid];3645}36463647static inline kmp_team_t *__kmp_team_from_gtid(int gtid) {3648KMP_DEBUG_ASSERT(gtid >= 0);3649return __kmp_threads[gtid]->th.th_team;3650}36513652static inline void __kmp_assert_valid_gtid(kmp_int32 gtid) {3653if (UNLIKELY(gtid < 0 || gtid >= __kmp_threads_capacity))3654KMP_FATAL(ThreadIdentInvalid);3655}36563657#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT3658extern int __kmp_user_level_mwait; // TRUE or FALSE; from KMP_USER_LEVEL_MWAIT3659extern int __kmp_umwait_enabled; // Runtime check if user-level mwait enabled3660extern int __kmp_mwait_enabled; // Runtime check if ring3 mwait is enabled3661extern int __kmp_mwait_hints; // Hints to pass in to mwait3662#endif36633664#if KMP_HAVE_UMWAIT3665extern int __kmp_waitpkg_enabled; // Runtime check if waitpkg exists3666extern int __kmp_tpause_state; // 0 (default), 1=C0.1, 2=C0.2; from KMP_TPAUSE3667extern int __kmp_tpause_hint; // 1=C0.1 (default), 0=C0.2; from KMP_TPAUSE3668extern int __kmp_tpause_enabled; // 0 (default), 1 (KMP_TPAUSE is non-zero)3669#endif36703671/* ------------------------------------------------------------------------- */36723673extern kmp_global_t __kmp_global; /* global status */36743675extern kmp_info_t __kmp_monitor;3676// For Debugging Support Library3677extern std::atomic<kmp_int32> __kmp_team_counter;3678// For Debugging Support Library3679extern std::atomic<kmp_int32> __kmp_task_counter;36803681#if USE_DEBUGGER3682#define _KMP_GEN_ID(counter) \3683(__kmp_debugging ? KMP_ATOMIC_INC(&counter) + 1 : ~0)3684#else3685#define _KMP_GEN_ID(counter) (~0)3686#endif /* USE_DEBUGGER */36873688#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter)3689#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter)36903691/* ------------------------------------------------------------------------ */36923693extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2,3694size_t size, char const *format, ...);36953696extern void __kmp_serial_initialize(void);3697extern void __kmp_middle_initialize(void);3698extern void __kmp_parallel_initialize(void);36993700extern void __kmp_internal_begin(void);3701extern void __kmp_internal_end_library(int gtid);3702extern void __kmp_internal_end_thread(int gtid);3703extern void __kmp_internal_end_atexit(void);3704extern void __kmp_internal_end_dtor(void);3705extern void __kmp_internal_end_dest(void *);37063707extern int __kmp_register_root(int initial_thread);3708extern void __kmp_unregister_root(int gtid);3709extern void __kmp_unregister_library(void); // called by __kmp_internal_end()37103711extern int __kmp_ignore_mppbeg(void);3712extern int __kmp_ignore_mppend(void);37133714extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws);3715extern void __kmp_exit_single(int gtid);37163717extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);3718extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref);37193720#ifdef USE_LOAD_BALANCE3721extern int __kmp_get_load_balance(int);3722#endif37233724extern int __kmp_get_global_thread_id(void);3725extern int __kmp_get_global_thread_id_reg(void);3726extern void __kmp_exit_thread(int exit_status);3727extern void __kmp_abort(char const *format, ...);3728extern void __kmp_abort_thread(void);3729KMP_NORETURN extern void __kmp_abort_process(void);3730extern void __kmp_warn(char const *format, ...);37313732extern void __kmp_set_num_threads(int new_nth, int gtid);37333734extern bool __kmp_detect_shm();3735extern bool __kmp_detect_tmp();37363737// Returns current thread (pointer to kmp_info_t). Current thread *must* be3738// registered.3739static inline kmp_info_t *__kmp_entry_thread() {3740int gtid = __kmp_entry_gtid();37413742return __kmp_threads[gtid];3743}37443745extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels);3746extern int __kmp_get_max_active_levels(int gtid);3747extern int __kmp_get_ancestor_thread_num(int gtid, int level);3748extern int __kmp_get_team_size(int gtid, int level);3749extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk);3750extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk);37513752extern unsigned short __kmp_get_random(kmp_info_t *thread);3753extern void __kmp_init_random(kmp_info_t *thread);37543755extern kmp_r_sched_t __kmp_get_schedule_global(void);3756extern void __kmp_adjust_num_threads(int new_nproc);3757extern void __kmp_check_stksize(size_t *val);37583759extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL);3760extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL);3761extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL);3762#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR)3763#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR)3764#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR)37653766#if USE_FAST_MEMORY3767extern void *___kmp_fast_allocate(kmp_info_t *this_thr,3768size_t size KMP_SRC_LOC_DECL);3769extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL);3770extern void __kmp_free_fast_memory(kmp_info_t *this_thr);3771extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr);3772#define __kmp_fast_allocate(this_thr, size) \3773___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR)3774#define __kmp_fast_free(this_thr, ptr) \3775___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR)3776#endif37773778extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL);3779extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem,3780size_t elsize KMP_SRC_LOC_DECL);3781extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr,3782size_t size KMP_SRC_LOC_DECL);3783extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL);3784#define __kmp_thread_malloc(th, size) \3785___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR)3786#define __kmp_thread_calloc(th, nelem, elsize) \3787___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR)3788#define __kmp_thread_realloc(th, ptr, size) \3789___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR)3790#define __kmp_thread_free(th, ptr) \3791___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR)37923793extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads);3794extern void __kmp_push_num_threads_list(ident_t *loc, int gtid,3795kmp_uint32 list_length,3796int *num_threads_list);3797extern void __kmp_set_strict_num_threads(ident_t *loc, int gtid, int sev,3798const char *msg);37993800extern void __kmp_push_proc_bind(ident_t *loc, int gtid,3801kmp_proc_bind_t proc_bind);3802extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams,3803int num_threads);3804extern void __kmp_push_num_teams_51(ident_t *loc, int gtid, int num_teams_lb,3805int num_teams_ub, int num_threads);38063807extern void __kmp_yield();38083809extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid,3810enum sched_type schedule, kmp_int32 lb,3811kmp_int32 ub, kmp_int32 st, kmp_int32 chunk);3812extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,3813enum sched_type schedule, kmp_uint32 lb,3814kmp_uint32 ub, kmp_int32 st,3815kmp_int32 chunk);3816extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid,3817enum sched_type schedule, kmp_int64 lb,3818kmp_int64 ub, kmp_int64 st, kmp_int64 chunk);3819extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,3820enum sched_type schedule, kmp_uint64 lb,3821kmp_uint64 ub, kmp_int64 st,3822kmp_int64 chunk);38233824extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid,3825kmp_int32 *p_last, kmp_int32 *p_lb,3826kmp_int32 *p_ub, kmp_int32 *p_st);3827extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid,3828kmp_int32 *p_last, kmp_uint32 *p_lb,3829kmp_uint32 *p_ub, kmp_int32 *p_st);3830extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid,3831kmp_int32 *p_last, kmp_int64 *p_lb,3832kmp_int64 *p_ub, kmp_int64 *p_st);3833extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid,3834kmp_int32 *p_last, kmp_uint64 *p_lb,3835kmp_uint64 *p_ub, kmp_int64 *p_st);38363837extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid);3838extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid);3839extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid);3840extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid);38413842extern void __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 gtid);38433844#ifdef KMP_GOMP_COMPAT38453846extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid,3847enum sched_type schedule, kmp_int32 lb,3848kmp_int32 ub, kmp_int32 st,3849kmp_int32 chunk, int push_ws);3850extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid,3851enum sched_type schedule, kmp_uint32 lb,3852kmp_uint32 ub, kmp_int32 st,3853kmp_int32 chunk, int push_ws);3854extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid,3855enum sched_type schedule, kmp_int64 lb,3856kmp_int64 ub, kmp_int64 st,3857kmp_int64 chunk, int push_ws);3858extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid,3859enum sched_type schedule, kmp_uint64 lb,3860kmp_uint64 ub, kmp_int64 st,3861kmp_int64 chunk, int push_ws);3862extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid);3863extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid);3864extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid);3865extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid);38663867#endif /* KMP_GOMP_COMPAT */38683869extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker);3870extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker);3871extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker);3872extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker);3873extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker);3874extern kmp_uint32 __kmp_wait_4(kmp_uint32 volatile *spinner, kmp_uint32 checker,3875kmp_uint32 (*pred)(kmp_uint32, kmp_uint32),3876void *obj);3877extern void __kmp_wait_4_ptr(void *spinner, kmp_uint32 checker,3878kmp_uint32 (*pred)(void *, kmp_uint32), void *obj);38793880extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64<> *flag,3881int final_spin3882#if USE_ITT_BUILD3883,3884void *itt_sync_obj3885#endif3886);3887extern void __kmp_release_64(kmp_flag_64<> *flag);38883889extern void __kmp_infinite_loop(void);38903891extern void __kmp_cleanup(void);38923893#if KMP_HANDLE_SIGNALS3894extern int __kmp_handle_signals;3895extern void __kmp_install_signals(int parallel_init);3896extern void __kmp_remove_signals(void);3897#endif38983899extern void __kmp_clear_system_time(void);3900extern void __kmp_read_system_time(double *delta);39013902extern void __kmp_check_stack_overlap(kmp_info_t *thr);39033904extern void __kmp_expand_host_name(char *buffer, size_t size);3905extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern);39063907#if KMP_ARCH_X86 || KMP_ARCH_X86_64 || (KMP_OS_WINDOWS && (KMP_ARCH_AARCH64 || KMP_ARCH_ARM))3908extern void3909__kmp_initialize_system_tick(void); /* Initialize timer tick value */3910#endif39113912extern void3913__kmp_runtime_initialize(void); /* machine specific initialization */3914extern void __kmp_runtime_destroy(void);39153916#if KMP_AFFINITY_SUPPORTED3917extern char *__kmp_affinity_print_mask(char *buf, int buf_len,3918kmp_affin_mask_t *mask);3919extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,3920kmp_affin_mask_t *mask);3921extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);3922extern void __kmp_affinity_uninitialize(void);3923extern void __kmp_affinity_set_init_mask(3924int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */3925void __kmp_affinity_bind_init_mask(int gtid);3926extern void __kmp_affinity_bind_place(int gtid);3927extern void __kmp_affinity_determine_capable(const char *env_var);3928extern int __kmp_aux_set_affinity(void **mask);3929extern int __kmp_aux_get_affinity(void **mask);3930extern int __kmp_aux_get_affinity_max_proc();3931extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask);3932extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask);3933extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask);3934extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);3935#if KMP_WEIGHTED_ITERATIONS_SUPPORTED3936extern int __kmp_get_first_osid_with_ecore(void);3937#endif3938#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \3939KMP_OS_AIX3940extern int kmp_set_thread_affinity_mask_initial(void);3941#endif3942static inline void __kmp_assign_root_init_mask() {3943int gtid = __kmp_entry_gtid();3944kmp_root_t *r = __kmp_threads[gtid]->th.th_root;3945if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {3946__kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE);3947__kmp_affinity_bind_init_mask(gtid);3948r->r.r_affinity_assigned = TRUE;3949}3950}3951static inline void __kmp_reset_root_init_mask(int gtid) {3952if (!KMP_AFFINITY_CAPABLE())3953return;3954kmp_info_t *th = __kmp_threads[gtid];3955kmp_root_t *r = th->th.th_root;3956if (r->r.r_uber_thread == th && r->r.r_affinity_assigned) {3957__kmp_set_system_affinity(__kmp_affin_origMask, FALSE);3958KMP_CPU_COPY(th->th.th_affin_mask, __kmp_affin_origMask);3959r->r.r_affinity_assigned = FALSE;3960}3961}3962#else /* KMP_AFFINITY_SUPPORTED */3963#define __kmp_assign_root_init_mask() /* Nothing */3964static inline void __kmp_reset_root_init_mask(int gtid) {}3965#endif /* KMP_AFFINITY_SUPPORTED */3966// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the3967// format string is for affinity, so platforms that do not support3968// affinity can still use the other fields, e.g., %n for num_threads3969extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,3970kmp_str_buf_t *buffer);3971extern void __kmp_aux_display_affinity(int gtid, const char *format);39723973extern void __kmp_cleanup_hierarchy();3974extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);39753976#if KMP_USE_FUTEX39773978extern int __kmp_futex_determine_capable(void);39793980#endif // KMP_USE_FUTEX39813982extern void __kmp_gtid_set_specific(int gtid);3983extern int __kmp_gtid_get_specific(void);39843985extern double __kmp_read_cpu_time(void);39863987extern int __kmp_read_system_info(struct kmp_sys_info *info);39883989#if KMP_USE_MONITOR3990extern void __kmp_create_monitor(kmp_info_t *th);3991#endif39923993extern void *__kmp_launch_thread(kmp_info_t *thr);39943995extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size);39963997#if KMP_OS_WINDOWS3998extern int __kmp_still_running(kmp_info_t *th);3999extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val);4000extern void __kmp_free_handle(kmp_thread_t tHandle);4001#endif40024003#if KMP_USE_MONITOR4004extern void __kmp_reap_monitor(kmp_info_t *th);4005#endif4006extern void __kmp_reap_worker(kmp_info_t *th);4007extern void __kmp_terminate_thread(int gtid);40084009extern int __kmp_try_suspend_mx(kmp_info_t *th);4010extern void __kmp_lock_suspend_mx(kmp_info_t *th);4011extern void __kmp_unlock_suspend_mx(kmp_info_t *th);40124013extern void __kmp_elapsed(double *);4014extern void __kmp_elapsed_tick(double *);40154016extern void __kmp_enable(int old_state);4017extern void __kmp_disable(int *old_state);40184019extern void __kmp_thread_sleep(int millis);40204021extern void __kmp_common_initialize(void);4022extern void __kmp_common_destroy(void);4023extern void __kmp_common_destroy_gtid(int gtid);40244025#if KMP_OS_UNIX4026extern void __kmp_register_atfork(void);4027#endif4028extern void __kmp_suspend_initialize(void);4029extern void __kmp_suspend_initialize_thread(kmp_info_t *th);4030extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th);40314032extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,4033int tid);4034extern kmp_team_t *4035__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc,4036#if OMPT_SUPPORT4037ompt_data_t ompt_parallel_data,4038#endif4039kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs,4040int argc USE_NESTED_HOT_ARG(kmp_info_t *thr));4041extern void __kmp_free_thread(kmp_info_t *);4042extern void __kmp_free_team(kmp_root_t *,4043kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *));4044extern kmp_team_t *__kmp_reap_team(kmp_team_t *);40454046/* ------------------------------------------------------------------------ */40474048extern void __kmp_initialize_bget(kmp_info_t *th);4049extern void __kmp_finalize_bget(kmp_info_t *th);40504051KMP_EXPORT void *kmpc_malloc(size_t size);4052KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment);4053KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize);4054KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size);4055KMP_EXPORT void kmpc_free(void *ptr);40564057/* declarations for internal use */40584059extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,4060size_t reduce_size, void *reduce_data,4061void (*reduce)(void *, void *));4062extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid);4063extern int __kmp_barrier_gomp_cancel(int gtid);40644065/*!4066* Tell the fork call which compiler generated the fork call, and therefore how4067* to deal with the call.4068*/4069enum fork_context_e {4070fork_context_gnu, /**< Called from GNU generated code, so must not invoke the4071microtask internally. */4072fork_context_intel, /**< Called from Intel generated code. */4073fork_context_last4074};4075extern int __kmp_fork_call(ident_t *loc, int gtid,4076enum fork_context_e fork_context, kmp_int32 argc,4077microtask_t microtask, launch_t invoker,4078kmp_va_list ap);40794080extern void __kmp_join_call(ident_t *loc, int gtid4081#if OMPT_SUPPORT4082,4083enum fork_context_e fork_context4084#endif4085,4086int exit_teams = 0);40874088extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid);4089extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team);4090extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team);4091extern int __kmp_invoke_task_func(int gtid);4092extern void __kmp_run_before_invoked_task(int gtid, int tid,4093kmp_info_t *this_thr,4094kmp_team_t *team);4095extern void __kmp_run_after_invoked_task(int gtid, int tid,4096kmp_info_t *this_thr,4097kmp_team_t *team);40984099// should never have been exported4100KMP_EXPORT int __kmpc_invoke_task_func(int gtid);4101extern int __kmp_invoke_teams_master(int gtid);4102extern void __kmp_teams_master(int gtid);4103extern int __kmp_aux_get_team_num();4104extern int __kmp_aux_get_num_teams();4105extern void __kmp_save_internal_controls(kmp_info_t *thread);4106extern void __kmp_user_set_library(enum library_type arg);4107extern void __kmp_aux_set_library(enum library_type arg);4108extern void __kmp_aux_set_stacksize(size_t arg);4109extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid);4110extern void __kmp_aux_set_defaults(char const *str, size_t len);41114112/* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */4113void kmpc_set_blocktime(int arg);4114void ompc_set_nested(int flag);4115void ompc_set_dynamic(int flag);4116void ompc_set_num_threads(int arg);41174118extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr,4119kmp_team_t *team, int tid);4120extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr);4121extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,4122kmp_tasking_flags_t *flags,4123size_t sizeof_kmp_task_t,4124size_t sizeof_shareds,4125kmp_routine_entry_t task_entry);4126extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr,4127kmp_team_t *team, int tid,4128int set_curr_task);4129extern void __kmp_finish_implicit_task(kmp_info_t *this_thr);4130extern void __kmp_free_implicit_task(kmp_info_t *this_thr);41314132extern kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,4133int gtid,4134kmp_task_t *task);4135extern void __kmp_fulfill_event(kmp_event_t *event);41364137extern void __kmp_free_task_team(kmp_info_t *thread,4138kmp_task_team_t *task_team);4139extern void __kmp_reap_task_teams(void);4140extern void __kmp_push_task_team_node(kmp_info_t *thread, kmp_team_t *team);4141extern void __kmp_pop_task_team_node(kmp_info_t *thread, kmp_team_t *team);4142extern void __kmp_wait_to_unref_task_teams(void);4143extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team);4144extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team);4145extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team4146#if USE_ITT_BUILD4147,4148void *itt_sync_obj4149#endif /* USE_ITT_BUILD */4150,4151int wait = 1);4152extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread,4153int gtid);4154#if KMP_DEBUG4155#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) \4156KMP_DEBUG_ASSERT( \4157__kmp_tasking_mode != tskm_task_teams || team->t.t_nproc == 1 || \4158thr->th.th_task_team == team->t.t_task_team[thr->th.th_task_state])4159#else4160#define KMP_DEBUG_ASSERT_TASKTEAM_INVARIANT(team, thr) /* Nothing */4161#endif41624163extern int __kmp_is_address_mapped(void *addr);4164extern kmp_uint64 __kmp_hardware_timestamp(void);41654166#if KMP_OS_UNIX4167extern int __kmp_read_from_file(char const *path, char const *format, ...);4168#endif41694170/* ------------------------------------------------------------------------ */4171//4172// Assembly routines that have no compiler intrinsic replacement4173//41744175extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc,4176void *argv[]4177#if OMPT_SUPPORT4178,4179void **exit_frame_ptr4180#endif4181);41824183/* ------------------------------------------------------------------------ */41844185KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags);4186KMP_EXPORT void __kmpc_end(ident_t *);41874188KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data,4189kmpc_ctor_vec ctor,4190kmpc_cctor_vec cctor,4191kmpc_dtor_vec dtor,4192size_t vector_length);4193KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data,4194kmpc_ctor ctor, kmpc_cctor cctor,4195kmpc_dtor dtor);4196KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid,4197void *data, size_t size);41984199KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *);4200KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *);4201KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *);4202KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *);42034204KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *);4205KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs,4206kmpc_micro microtask, ...);4207KMP_EXPORT void __kmpc_fork_call_if(ident_t *loc, kmp_int32 nargs,4208kmpc_micro microtask, kmp_int32 cond,4209void *args);42104211KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid);4212KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid);42134214KMP_EXPORT void __kmpc_flush(ident_t *);4215KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid);4216KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid);4217KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid);4218KMP_EXPORT kmp_int32 __kmpc_masked(ident_t *, kmp_int32 global_tid,4219kmp_int32 filter);4220KMP_EXPORT void __kmpc_end_masked(ident_t *, kmp_int32 global_tid);4221KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid);4222KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid);4223KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid,4224kmp_critical_name *);4225KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid,4226kmp_critical_name *);4227KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid,4228kmp_critical_name *, uint32_t hint);42294230KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid);4231KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid);42324233KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *,4234kmp_int32 global_tid);42354236KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid);4237KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid);42384239KMP_EXPORT kmp_int32 __kmpc_sections_init(ident_t *loc, kmp_int32 global_tid);4240KMP_EXPORT kmp_int32 __kmpc_next_section(ident_t *loc, kmp_int32 global_tid,4241kmp_int32 numberOfSections);4242KMP_EXPORT void __kmpc_end_sections(ident_t *loc, kmp_int32 global_tid);42434244KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid,4245kmp_int32 schedtype, kmp_int32 *plastiter,4246kmp_int *plower, kmp_int *pupper,4247kmp_int *pstride, kmp_int incr,4248kmp_int chunk);42494250KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid);42514252KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid,4253size_t cpy_size, void *cpy_data,4254void (*cpy_func)(void *, void *),4255kmp_int32 didit);42564257KMP_EXPORT void *__kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid,4258void *cpy_data);42594260extern void KMPC_SET_NUM_THREADS(int arg);4261extern void KMPC_SET_DYNAMIC(int flag);4262extern void KMPC_SET_NESTED(int flag);42634264/* OMP 3.0 tasking interface routines */4265KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,4266kmp_task_t *new_task);4267KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,4268kmp_int32 flags,4269size_t sizeof_kmp_task_t,4270size_t sizeof_shareds,4271kmp_routine_entry_t task_entry);4272KMP_EXPORT kmp_task_t *__kmpc_omp_target_task_alloc(4273ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, size_t sizeof_kmp_task_t,4274size_t sizeof_shareds, kmp_routine_entry_t task_entry, kmp_int64 device_id);4275KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid,4276kmp_task_t *task);4277KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid,4278kmp_task_t *task);4279KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,4280kmp_task_t *new_task);4281KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid);4282KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid,4283int end_part);42844285#if TASK_UNUSED4286void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task);4287void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid,4288kmp_task_t *task);4289#endif // TASK_UNUSED42904291/* ------------------------------------------------------------------------ */42924293KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid);4294KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid);42954296KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps(4297ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps,4298kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,4299kmp_depend_info_t *noalias_dep_list);43004301KMP_EXPORT kmp_base_depnode_t *__kmpc_task_get_depnode(kmp_task_t *task);43024303KMP_EXPORT kmp_depnode_list_t *__kmpc_task_get_successors(kmp_task_t *task);43044305KMP_EXPORT void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid,4306kmp_int32 ndeps,4307kmp_depend_info_t *dep_list,4308kmp_int32 ndeps_noalias,4309kmp_depend_info_t *noalias_dep_list);4310/* __kmpc_omp_taskwait_deps_51 : Function for OpenMP 5.1 nowait clause.4311* Placeholder for taskwait with nowait clause.*/4312KMP_EXPORT void __kmpc_omp_taskwait_deps_51(ident_t *loc_ref, kmp_int32 gtid,4313kmp_int32 ndeps,4314kmp_depend_info_t *dep_list,4315kmp_int32 ndeps_noalias,4316kmp_depend_info_t *noalias_dep_list,4317kmp_int32 has_no_wait);43184319extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task,4320bool serialize_immediate);43214322KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid,4323kmp_int32 cncl_kind);4324KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid,4325kmp_int32 cncl_kind);4326KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid);4327KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind);43284329KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask);4330KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask);4331KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,4332kmp_int32 if_val, kmp_uint64 *lb,4333kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup,4334kmp_int32 sched, kmp_uint64 grainsize,4335void *task_dup);4336KMP_EXPORT void __kmpc_taskloop_5(ident_t *loc, kmp_int32 gtid,4337kmp_task_t *task, kmp_int32 if_val,4338kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st,4339kmp_int32 nogroup, kmp_int32 sched,4340kmp_uint64 grainsize, kmp_int32 modifier,4341void *task_dup);4342KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);4343KMP_EXPORT void *__kmpc_taskred_init(int gtid, int num_data, void *data);4344KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);4345KMP_EXPORT void *__kmpc_task_reduction_modifier_init(ident_t *loc, int gtid,4346int is_ws, int num,4347void *data);4348KMP_EXPORT void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int is_ws,4349int num, void *data);4350KMP_EXPORT void __kmpc_task_reduction_modifier_fini(ident_t *loc, int gtid,4351int is_ws);4352KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(4353ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,4354kmp_task_affinity_info_t *affin_list);4355KMP_EXPORT void __kmp_set_num_teams(int num_teams);4356KMP_EXPORT int __kmp_get_max_teams(void);4357KMP_EXPORT void __kmp_set_teams_thread_limit(int limit);4358KMP_EXPORT int __kmp_get_teams_thread_limit(void);43594360/* Interface target task integration */4361KMP_EXPORT void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid);4362KMP_EXPORT bool __kmpc_omp_has_task_team(kmp_int32 gtid);43634364/* Lock interface routines (fast versions with gtid passed in) */4365KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid,4366void **user_lock);4367KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid,4368void **user_lock);4369KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid,4370void **user_lock);4371KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid,4372void **user_lock);4373KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);4374KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid,4375void **user_lock);4376KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid,4377void **user_lock);4378KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid,4379void **user_lock);4380KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock);4381KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid,4382void **user_lock);43834384KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid,4385void **user_lock, uintptr_t hint);4386KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,4387void **user_lock,4388uintptr_t hint);43894390#if OMPX_TASKGRAPH4391// Taskgraph's Record & Replay mechanism4392// __kmp_tdg_is_recording: check whether a given TDG is recording4393// status: the tdg's current status4394static inline bool __kmp_tdg_is_recording(kmp_tdg_status_t status) {4395return status == KMP_TDG_RECORDING;4396}43974398KMP_EXPORT kmp_int32 __kmpc_start_record_task(ident_t *loc, kmp_int32 gtid,4399kmp_int32 input_flags,4400kmp_int32 tdg_id);4401KMP_EXPORT void __kmpc_end_record_task(ident_t *loc, kmp_int32 gtid,4402kmp_int32 input_flags, kmp_int32 tdg_id);4403#endif4404/* Interface to fast scalable reduce methods routines */44054406KMP_EXPORT kmp_int32 __kmpc_reduce_nowait(4407ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,4408void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),4409kmp_critical_name *lck);4410KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,4411kmp_critical_name *lck);4412KMP_EXPORT kmp_int32 __kmpc_reduce(4413ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,4414void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),4415kmp_critical_name *lck);4416KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,4417kmp_critical_name *lck);44184419/* Internal fast reduction routines */44204421extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method(4422ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size,4423void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data),4424kmp_critical_name *lck);44254426// this function is for testing set/get/determine reduce method4427KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void);44284429KMP_EXPORT kmp_uint64 __kmpc_get_taskid();4430KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid();44314432// C++ port4433// missing 'extern "C"' declarations44344435KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc);4436KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid);4437KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,4438kmp_int32 num_threads);4439KMP_EXPORT void __kmpc_push_num_threads_strict(ident_t *loc,4440kmp_int32 global_tid,4441kmp_int32 num_threads,4442int severity,4443const char *message);44444445KMP_EXPORT void __kmpc_push_num_threads_list(ident_t *loc, kmp_int32 global_tid,4446kmp_uint32 list_length,4447kmp_int32 *num_threads_list);4448KMP_EXPORT void __kmpc_push_num_threads_list_strict(4449ident_t *loc, kmp_int32 global_tid, kmp_uint32 list_length,4450kmp_int32 *num_threads_list, int severity, const char *message);44514452KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,4453int proc_bind);4454KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,4455kmp_int32 num_teams,4456kmp_int32 num_threads);4457KMP_EXPORT void __kmpc_set_thread_limit(ident_t *loc, kmp_int32 global_tid,4458kmp_int32 thread_limit);4459/* Function for OpenMP 5.1 num_teams clause */4460KMP_EXPORT void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid,4461kmp_int32 num_teams_lb,4462kmp_int32 num_teams_ub,4463kmp_int32 num_threads);4464KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc,4465kmpc_micro microtask, ...);4466struct kmp_dim { // loop bounds info casted to kmp_int644467kmp_int64 lo; // lower4468kmp_int64 up; // upper4469kmp_int64 st; // stride4470};4471KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,4472kmp_int32 num_dims,4473const struct kmp_dim *dims);4474KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid,4475const kmp_int64 *vec);4476KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid,4477const kmp_int64 *vec);4478KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid);44794480KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid,4481void *data, size_t size,4482void ***cache);44834484// The routines below are not exported.4485// Consider making them 'static' in corresponding source files.4486void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr,4487void *data_addr, size_t pc_size);4488struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr,4489void *data_addr,4490size_t pc_size);4491void __kmp_threadprivate_resize_cache(int newCapacity);4492void __kmp_cleanup_threadprivate_caches();44934494// ompc_, kmpc_ entries moved from omp.h.4495#if KMP_OS_WINDOWS4496#define KMPC_CONVENTION __cdecl4497#else4498#define KMPC_CONVENTION4499#endif45004501#ifndef __OMP_H4502typedef enum omp_sched_t {4503omp_sched_static = 1,4504omp_sched_dynamic = 2,4505omp_sched_guided = 3,4506omp_sched_auto = 44507} omp_sched_t;4508typedef void *kmp_affinity_mask_t;4509#endif45104511KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int);4512KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int);4513KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int);4514KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int);4515KMP_EXPORT int KMPC_CONVENTION4516kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *);4517KMP_EXPORT int KMPC_CONVENTION4518kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *);4519KMP_EXPORT int KMPC_CONVENTION4520kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *);45214522KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int);4523KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t);4524KMP_EXPORT void KMPC_CONVENTION kmpc_set_library(int);4525KMP_EXPORT void KMPC_CONVENTION kmpc_set_defaults(char const *);4526KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int);4527void KMP_EXPAND_NAME(ompc_set_affinity_format)(char const *format);4528size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(char *buffer, size_t size);4529void KMP_EXPAND_NAME(ompc_display_affinity)(char const *format);4530size_t KMP_EXPAND_NAME(ompc_capture_affinity)(char *buffer, size_t buf_size,4531char const *format);45324533enum kmp_target_offload_kind {4534tgt_disabled = 0,4535tgt_default = 1,4536tgt_mandatory = 24537};4538typedef enum kmp_target_offload_kind kmp_target_offload_kind_t;4539// Set via OMP_TARGET_OFFLOAD if specified, defaults to tgt_default otherwise4540extern kmp_target_offload_kind_t __kmp_target_offload;4541extern int __kmpc_get_target_offload();45424543// Constants used in libomptarget4544#define KMP_DEVICE_DEFAULT -1 // This is libomptarget's default device.4545#define KMP_DEVICE_ALL -11 // This is libomptarget's "all devices".45464547// OMP Pause Resource45484549// The following enum is used both to set the status in __kmp_pause_status, and4550// as the internal equivalent of the externally-visible omp_pause_resource_t.4551typedef enum kmp_pause_status_t {4552kmp_not_paused = 0, // status is not paused, or, requesting resume4553kmp_soft_paused = 1, // status is soft-paused, or, requesting soft pause4554kmp_hard_paused = 2 // status is hard-paused, or, requesting hard pause4555} kmp_pause_status_t;45564557// This stores the pause state of the runtime4558extern kmp_pause_status_t __kmp_pause_status;4559extern int __kmpc_pause_resource(kmp_pause_status_t level);4560extern int __kmp_pause_resource(kmp_pause_status_t level);4561// Soft resume sets __kmp_pause_status, and wakes up all threads.4562extern void __kmp_resume_if_soft_paused();4563// Hard resume simply resets the status to not paused. Library will appear to4564// be uninitialized after hard pause. Let OMP constructs trigger required4565// initializations.4566static inline void __kmp_resume_if_hard_paused() {4567if (__kmp_pause_status == kmp_hard_paused) {4568__kmp_pause_status = kmp_not_paused;4569}4570}45714572extern void __kmp_omp_display_env(int verbose);45734574// 1: it is initializing hidden helper team4575extern volatile int __kmp_init_hidden_helper;4576// 1: the hidden helper team is done4577extern volatile int __kmp_hidden_helper_team_done;4578// 1: enable hidden helper task4579extern kmp_int32 __kmp_enable_hidden_helper;4580// Main thread of hidden helper team4581extern kmp_info_t *__kmp_hidden_helper_main_thread;4582// Descriptors for the hidden helper threads4583extern kmp_info_t **__kmp_hidden_helper_threads;4584// Number of hidden helper threads4585extern kmp_int32 __kmp_hidden_helper_threads_num;4586// Number of hidden helper tasks that have not been executed yet4587extern std::atomic<kmp_int32> __kmp_unexecuted_hidden_helper_tasks;45884589extern void __kmp_hidden_helper_initialize();4590extern void __kmp_hidden_helper_threads_initz_routine();4591extern void __kmp_do_initialize_hidden_helper_threads();4592extern void __kmp_hidden_helper_threads_initz_wait();4593extern void __kmp_hidden_helper_initz_release();4594extern void __kmp_hidden_helper_threads_deinitz_wait();4595extern void __kmp_hidden_helper_threads_deinitz_release();4596extern void __kmp_hidden_helper_main_thread_wait();4597extern void __kmp_hidden_helper_worker_thread_wait();4598extern void __kmp_hidden_helper_worker_thread_signal();4599extern void __kmp_hidden_helper_main_thread_release();46004601// Check whether a given thread is a hidden helper thread4602#define KMP_HIDDEN_HELPER_THREAD(gtid) \4603((gtid) >= 1 && (gtid) <= __kmp_hidden_helper_threads_num)46044605#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \4606((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)46074608#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid) \4609((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)46104611#define KMP_HIDDEN_HELPER_TEAM(team) \4612(team->t.t_threads[0] == __kmp_hidden_helper_main_thread)46134614// Map a gtid to a hidden helper thread. The first hidden helper thread, a.k.a4615// main thread, is skipped.4616#define KMP_GTID_TO_SHADOW_GTID(gtid) \4617((gtid) % (__kmp_hidden_helper_threads_num - 1) + 2)46184619// Return the adjusted gtid value by subtracting from gtid the number4620// of hidden helper threads. This adjusted value is the gtid the thread would4621// have received if there were no hidden helper threads.4622static inline int __kmp_adjust_gtid_for_hidden_helpers(int gtid) {4623int adjusted_gtid = gtid;4624if (__kmp_hidden_helper_threads_num > 0 && gtid > 0 &&4625gtid - __kmp_hidden_helper_threads_num >= 0) {4626adjusted_gtid -= __kmp_hidden_helper_threads_num;4627}4628return adjusted_gtid;4629}46304631// Support for error directive4632typedef enum kmp_severity_t {4633severity_warning = 1,4634severity_fatal = 24635} kmp_severity_t;4636extern void __kmpc_error(ident_t *loc, int severity, const char *message);46374638// Support for scope directive4639KMP_EXPORT void __kmpc_scope(ident_t *loc, kmp_int32 gtid, void *reserved);4640KMP_EXPORT void __kmpc_end_scope(ident_t *loc, kmp_int32 gtid, void *reserved);46414642#ifdef __cplusplus4643}4644#endif46454646template <bool C, bool S>4647extern void __kmp_suspend_32(int th_gtid, kmp_flag_32<C, S> *flag);4648template <bool C, bool S>4649extern void __kmp_suspend_64(int th_gtid, kmp_flag_64<C, S> *flag);4650template <bool C, bool S>4651extern void __kmp_atomic_suspend_64(int th_gtid,4652kmp_atomic_flag_64<C, S> *flag);4653extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag);4654#if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT4655template <bool C, bool S>4656extern void __kmp_mwait_32(int th_gtid, kmp_flag_32<C, S> *flag);4657template <bool C, bool S>4658extern void __kmp_mwait_64(int th_gtid, kmp_flag_64<C, S> *flag);4659template <bool C, bool S>4660extern void __kmp_atomic_mwait_64(int th_gtid, kmp_atomic_flag_64<C, S> *flag);4661extern void __kmp_mwait_oncore(int th_gtid, kmp_flag_oncore *flag);4662#endif4663template <bool C, bool S>4664extern void __kmp_resume_32(int target_gtid, kmp_flag_32<C, S> *flag);4665template <bool C, bool S>4666extern void __kmp_resume_64(int target_gtid, kmp_flag_64<C, S> *flag);4667template <bool C, bool S>4668extern void __kmp_atomic_resume_64(int target_gtid,4669kmp_atomic_flag_64<C, S> *flag);4670extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag);46714672template <bool C, bool S>4673int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid,4674kmp_flag_32<C, S> *flag, int final_spin,4675int *thread_finished,4676#if USE_ITT_BUILD4677void *itt_sync_obj,4678#endif /* USE_ITT_BUILD */4679kmp_int32 is_constrained);4680template <bool C, bool S>4681int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,4682kmp_flag_64<C, S> *flag, int final_spin,4683int *thread_finished,4684#if USE_ITT_BUILD4685void *itt_sync_obj,4686#endif /* USE_ITT_BUILD */4687kmp_int32 is_constrained);4688template <bool C, bool S>4689int __kmp_atomic_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid,4690kmp_atomic_flag_64<C, S> *flag,4691int final_spin, int *thread_finished,4692#if USE_ITT_BUILD4693void *itt_sync_obj,4694#endif /* USE_ITT_BUILD */4695kmp_int32 is_constrained);4696int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid,4697kmp_flag_oncore *flag, int final_spin,4698int *thread_finished,4699#if USE_ITT_BUILD4700void *itt_sync_obj,4701#endif /* USE_ITT_BUILD */4702kmp_int32 is_constrained);47034704extern int __kmp_nesting_mode;4705extern int __kmp_nesting_mode_nlevels;4706extern int *__kmp_nesting_nth_level;4707extern void __kmp_init_nesting_mode();4708extern void __kmp_set_nesting_mode_threads();47094710/// This class safely opens and closes a C-style FILE* object using RAII4711/// semantics. There are also methods which allow using stdout or stderr as4712/// the underlying FILE* object. With the implicit conversion operator to4713/// FILE*, an object with this type can be used in any function which takes4714/// a FILE* object e.g., fprintf().4715/// No close method is needed at use sites.4716class kmp_safe_raii_file_t {4717FILE *f;47184719void close() {4720if (f && f != stdout && f != stderr) {4721fclose(f);4722f = nullptr;4723}4724}47254726public:4727kmp_safe_raii_file_t() : f(nullptr) {}4728kmp_safe_raii_file_t(const char *filename, const char *mode,4729const char *env_var = nullptr)4730: f(nullptr) {4731open(filename, mode, env_var);4732}4733~kmp_safe_raii_file_t() { close(); }47344735/// Open filename using mode. This is automatically closed in the destructor.4736/// The env_var parameter indicates the environment variable the filename4737/// came from if != nullptr.4738void open(const char *filename, const char *mode,4739const char *env_var = nullptr) {4740KMP_ASSERT(!f);4741f = fopen(filename, mode);4742if (!f) {4743int code = errno;4744if (env_var) {4745__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),4746KMP_HNT(CheckEnvVar, env_var, filename), __kmp_msg_null);4747} else {4748__kmp_fatal(KMP_MSG(CantOpenFileForReading, filename), KMP_ERR(code),4749__kmp_msg_null);4750}4751}4752}4753/// Instead of erroring out, return non-zero when4754/// unsuccessful fopen() for any reason4755int try_open(const char *filename, const char *mode) {4756KMP_ASSERT(!f);4757f = fopen(filename, mode);4758if (!f)4759return errno;4760return 0;4761}4762/// Set the FILE* object to stdout and output there4763/// No open call should happen before this call.4764void set_stdout() {4765KMP_ASSERT(!f);4766f = stdout;4767}4768/// Set the FILE* object to stderr and output there4769/// No open call should happen before this call.4770void set_stderr() {4771KMP_ASSERT(!f);4772f = stderr;4773}4774operator bool() { return bool(f); }4775operator FILE *() { return f; }4776};47774778template <typename SourceType, typename TargetType,4779bool isSourceSmaller = (sizeof(SourceType) < sizeof(TargetType)),4780bool isSourceEqual = (sizeof(SourceType) == sizeof(TargetType)),4781bool isSourceSigned = std::is_signed<SourceType>::value,4782bool isTargetSigned = std::is_signed<TargetType>::value>4783struct kmp_convert {};47844785// Both types are signed; Source smaller4786template <typename SourceType, typename TargetType>4787struct kmp_convert<SourceType, TargetType, true, false, true, true> {4788static TargetType to(SourceType src) { return (TargetType)src; }4789};4790// Source equal4791template <typename SourceType, typename TargetType>4792struct kmp_convert<SourceType, TargetType, false, true, true, true> {4793static TargetType to(SourceType src) { return src; }4794};4795// Source bigger4796template <typename SourceType, typename TargetType>4797struct kmp_convert<SourceType, TargetType, false, false, true, true> {4798static TargetType to(SourceType src) {4799KMP_ASSERT(src <= static_cast<SourceType>(4800(std::numeric_limits<TargetType>::max)()));4801KMP_ASSERT(src >= static_cast<SourceType>(4802(std::numeric_limits<TargetType>::min)()));4803return (TargetType)src;4804}4805};48064807// Source signed, Target unsigned4808// Source smaller4809template <typename SourceType, typename TargetType>4810struct kmp_convert<SourceType, TargetType, true, false, true, false> {4811static TargetType to(SourceType src) {4812KMP_ASSERT(src >= 0);4813return (TargetType)src;4814}4815};4816// Source equal4817template <typename SourceType, typename TargetType>4818struct kmp_convert<SourceType, TargetType, false, true, true, false> {4819static TargetType to(SourceType src) {4820KMP_ASSERT(src >= 0);4821return (TargetType)src;4822}4823};4824// Source bigger4825template <typename SourceType, typename TargetType>4826struct kmp_convert<SourceType, TargetType, false, false, true, false> {4827static TargetType to(SourceType src) {4828KMP_ASSERT(src >= 0);4829KMP_ASSERT(src <= static_cast<SourceType>(4830(std::numeric_limits<TargetType>::max)()));4831return (TargetType)src;4832}4833};48344835// Source unsigned, Target signed4836// Source smaller4837template <typename SourceType, typename TargetType>4838struct kmp_convert<SourceType, TargetType, true, false, false, true> {4839static TargetType to(SourceType src) { return (TargetType)src; }4840};4841// Source equal4842template <typename SourceType, typename TargetType>4843struct kmp_convert<SourceType, TargetType, false, true, false, true> {4844static TargetType to(SourceType src) {4845KMP_ASSERT(src <= static_cast<SourceType>(4846(std::numeric_limits<TargetType>::max)()));4847return (TargetType)src;4848}4849};4850// Source bigger4851template <typename SourceType, typename TargetType>4852struct kmp_convert<SourceType, TargetType, false, false, false, true> {4853static TargetType to(SourceType src) {4854KMP_ASSERT(src <= static_cast<SourceType>(4855(std::numeric_limits<TargetType>::max)()));4856return (TargetType)src;4857}4858};48594860// Source unsigned, Target unsigned4861// Source smaller4862template <typename SourceType, typename TargetType>4863struct kmp_convert<SourceType, TargetType, true, false, false, false> {4864static TargetType to(SourceType src) { return (TargetType)src; }4865};4866// Source equal4867template <typename SourceType, typename TargetType>4868struct kmp_convert<SourceType, TargetType, false, true, false, false> {4869static TargetType to(SourceType src) { return src; }4870};4871// Source bigger4872template <typename SourceType, typename TargetType>4873struct kmp_convert<SourceType, TargetType, false, false, false, false> {4874static TargetType to(SourceType src) {4875KMP_ASSERT(src <= static_cast<SourceType>(4876(std::numeric_limits<TargetType>::max)()));4877return (TargetType)src;4878}4879};48804881template <typename T1, typename T2>4882static inline void __kmp_type_convert(T1 src, T2 *dest) {4883*dest = kmp_convert<T1, T2>::to(src);4884}48854886#endif /* KMP_H */488748884889