Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_os.h
35258 views
/*1* kmp_os.h -- KPTS runtime header file.2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#ifndef KMP_OS_H13#define KMP_OS_H1415#include "kmp_config.h"16#include <atomic>17#include <stdarg.h>18#include <stdlib.h>19#include <string.h>2021#define KMP_FTN_PLAIN 122#define KMP_FTN_APPEND 223#define KMP_FTN_UPPER 324/*25#define KMP_FTN_PREPEND 426#define KMP_FTN_UAPPEND 527*/2829#define KMP_PTR_SKIP (sizeof(void *))3031/* -------------------------- Compiler variations ------------------------ */3233#define KMP_OFF 034#define KMP_ON 13536#define KMP_MEM_CONS_VOLATILE 037#define KMP_MEM_CONS_FENCE 13839#ifndef KMP_MEM_CONS_MODEL40#define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE41#endif4243#ifndef __has_cpp_attribute44#define __has_cpp_attribute(x) 045#endif4647#ifndef __has_attribute48#define __has_attribute(x) 049#endif5051/* ------------------------- Compiler recognition ---------------------- */52#define KMP_COMPILER_ICC 053#define KMP_COMPILER_GCC 054#define KMP_COMPILER_CLANG 055#define KMP_COMPILER_MSVC 056#define KMP_COMPILER_ICX 05758#if __INTEL_CLANG_COMPILER59#undef KMP_COMPILER_ICX60#define KMP_COMPILER_ICX 161#elif defined(__INTEL_COMPILER)62#undef KMP_COMPILER_ICC63#define KMP_COMPILER_ICC 164#elif defined(__clang__)65#undef KMP_COMPILER_CLANG66#define KMP_COMPILER_CLANG 167#elif defined(__GNUC__)68#undef KMP_COMPILER_GCC69#define KMP_COMPILER_GCC 170#elif defined(_MSC_VER)71#undef KMP_COMPILER_MSVC72#define KMP_COMPILER_MSVC 173#else74#error Unknown compiler75#endif7677#if (KMP_OS_LINUX || KMP_OS_WINDOWS || KMP_OS_FREEBSD || KMP_OS_NETBSD || \78KMP_OS_DRAGONFLY || KMP_OS_AIX) && \79!KMP_OS_WASI80#define KMP_AFFINITY_SUPPORTED 181#if KMP_OS_WINDOWS && KMP_ARCH_X86_6482#define KMP_GROUP_AFFINITY 183#else84#define KMP_GROUP_AFFINITY 085#endif86#else87#define KMP_AFFINITY_SUPPORTED 088#define KMP_GROUP_AFFINITY 089#endif9091#if (KMP_OS_LINUX || (KMP_OS_FREEBSD && __FreeBSD_version >= 1301000))92#define KMP_HAVE_SCHED_GETCPU 193#else94#define KMP_HAVE_SCHED_GETCPU 095#endif9697/* Check for quad-precision extension. */98#define KMP_HAVE_QUAD 099#if KMP_ARCH_X86 || KMP_ARCH_X86_64100#if KMP_COMPILER_ICC || KMP_COMPILER_ICX101/* _Quad is already defined for icc */102#undef KMP_HAVE_QUAD103#define KMP_HAVE_QUAD 1104#elif KMP_COMPILER_CLANG105/* Clang doesn't support a software-implemented106128-bit extended precision type yet */107typedef long double _Quad;108#elif KMP_COMPILER_GCC109/* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad until110NetBSD 10.0 which ships with GCC 10.5 */111#if (!KMP_OS_NETBSD || __GNUC__ >= 10)112typedef __float128 _Quad;113#undef KMP_HAVE_QUAD114#define KMP_HAVE_QUAD 1115#endif116#elif KMP_COMPILER_MSVC117typedef long double _Quad;118#endif119#else120#if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC121typedef long double _Quad;122#undef KMP_HAVE_QUAD123#define KMP_HAVE_QUAD 1124#endif125#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */126127#define KMP_USE_X87CONTROL 0128#if KMP_OS_WINDOWS129#define KMP_END_OF_LINE "\r\n"130typedef char kmp_int8;131typedef unsigned char kmp_uint8;132typedef short kmp_int16;133typedef unsigned short kmp_uint16;134typedef int kmp_int32;135typedef unsigned int kmp_uint32;136#define KMP_INT32_SPEC "d"137#define KMP_UINT32_SPEC "u"138#ifndef KMP_STRUCT64139typedef __int64 kmp_int64;140typedef unsigned __int64 kmp_uint64;141#define KMP_INT64_SPEC "I64d"142#define KMP_UINT64_SPEC "I64u"143#else144struct kmp_struct64 {145kmp_int32 a, b;146};147typedef struct kmp_struct64 kmp_int64;148typedef struct kmp_struct64 kmp_uint64;149/* Not sure what to use for KMP_[U]INT64_SPEC here */150#endif151#if KMP_ARCH_X86 && KMP_MSVC_COMPAT152#undef KMP_USE_X87CONTROL153#define KMP_USE_X87CONTROL 1154#endif155#if KMP_ARCH_X86_64 || KMP_ARCH_AARCH64156#define KMP_INTPTR 1157typedef __int64 kmp_intptr_t;158typedef unsigned __int64 kmp_uintptr_t;159#define KMP_INTPTR_SPEC "I64d"160#define KMP_UINTPTR_SPEC "I64u"161#endif162#endif /* KMP_OS_WINDOWS */163164#if KMP_OS_UNIX165#define KMP_END_OF_LINE "\n"166typedef char kmp_int8;167typedef unsigned char kmp_uint8;168typedef short kmp_int16;169typedef unsigned short kmp_uint16;170typedef int kmp_int32;171typedef unsigned int kmp_uint32;172typedef long long kmp_int64;173typedef unsigned long long kmp_uint64;174#define KMP_INT32_SPEC "d"175#define KMP_UINT32_SPEC "u"176#define KMP_INT64_SPEC "lld"177#define KMP_UINT64_SPEC "llu"178#endif /* KMP_OS_UNIX */179180#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_WASM || \181KMP_ARCH_PPC || KMP_ARCH_AARCH64_32182#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC183#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || \184KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \185KMP_ARCH_VE || KMP_ARCH_S390X186#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC187#else188#error "Can't determine size_t printf format specifier."189#endif190191#if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_WASM || KMP_ARCH_PPC192#define KMP_SIZE_T_MAX (0xFFFFFFFF)193#else194#define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF)195#endif196197typedef size_t kmp_size_t;198typedef float kmp_real32;199typedef double kmp_real64;200201#ifndef KMP_INTPTR202#define KMP_INTPTR 1203typedef long kmp_intptr_t;204typedef unsigned long kmp_uintptr_t;205#define KMP_INTPTR_SPEC "ld"206#define KMP_UINTPTR_SPEC "lu"207#endif208209#ifdef BUILD_I8210typedef kmp_int64 kmp_int;211typedef kmp_uint64 kmp_uint;212#else213typedef kmp_int32 kmp_int;214typedef kmp_uint32 kmp_uint;215#endif /* BUILD_I8 */216#define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF)217#define KMP_INT_MIN ((kmp_int32)0x80000000)218219// stdarg handling220#if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64 || KMP_ARCH_WASM) && \221(KMP_OS_FREEBSD || KMP_OS_LINUX || KMP_OS_WASI)222typedef va_list *kmp_va_list;223#define kmp_va_deref(ap) (*(ap))224#define kmp_va_addr_of(ap) (&(ap))225#else226typedef va_list kmp_va_list;227#define kmp_va_deref(ap) (ap)228#define kmp_va_addr_of(ap) (ap)229#endif230231#ifdef __cplusplus232// macros to cast out qualifiers and to re-interpret types233#define CCAST(type, var) const_cast<type>(var)234#define RCAST(type, var) reinterpret_cast<type>(var)235//-------------------------------------------------------------------------236// template for debug prints specification ( d, u, lld, llu ), and to obtain237// signed/unsigned flavors of a type238template <typename T> struct traits_t {};239// int240template <> struct traits_t<signed int> {241typedef signed int signed_t;242typedef unsigned int unsigned_t;243typedef double floating_t;244static char const *spec;245static const signed_t max_value = 0x7fffffff;246static const signed_t min_value = 0x80000000;247static const int type_size = sizeof(signed_t);248};249// unsigned int250template <> struct traits_t<unsigned int> {251typedef signed int signed_t;252typedef unsigned int unsigned_t;253typedef double floating_t;254static char const *spec;255static const unsigned_t max_value = 0xffffffff;256static const unsigned_t min_value = 0x00000000;257static const int type_size = sizeof(unsigned_t);258};259// long260template <> struct traits_t<signed long> {261typedef signed long signed_t;262typedef unsigned long unsigned_t;263typedef long double floating_t;264static char const *spec;265static const int type_size = sizeof(signed_t);266};267// long long268template <> struct traits_t<signed long long> {269typedef signed long long signed_t;270typedef unsigned long long unsigned_t;271typedef long double floating_t;272static char const *spec;273static const signed_t max_value = 0x7fffffffffffffffLL;274static const signed_t min_value = 0x8000000000000000LL;275static const int type_size = sizeof(signed_t);276};277// unsigned long long278template <> struct traits_t<unsigned long long> {279typedef signed long long signed_t;280typedef unsigned long long unsigned_t;281typedef long double floating_t;282static char const *spec;283static const unsigned_t max_value = 0xffffffffffffffffLL;284static const unsigned_t min_value = 0x0000000000000000LL;285static const int type_size = sizeof(unsigned_t);286};287//-------------------------------------------------------------------------288#else289#define CCAST(type, var) (type)(var)290#define RCAST(type, var) (type)(var)291#endif // __cplusplus292293#define KMP_EXPORT extern /* export declaration in guide libraries */294295#if __GNUC__ >= 4 && !defined(__MINGW32__)296#define __forceinline __inline297#endif298299/* Check if the OS/arch can support user-level mwait */300// All mwait code tests for UMWAIT first, so it should only fall back to ring3301// MWAIT for KNL.302#define KMP_HAVE_MWAIT \303((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \304!KMP_MIC2)305#define KMP_HAVE_UMWAIT \306((KMP_ARCH_X86 || KMP_ARCH_X86_64) && (KMP_OS_LINUX || KMP_OS_WINDOWS) && \307!KMP_MIC)308309#if KMP_OS_WINDOWS310// Don't include everything related to NT status code, we'll do that explicitly311#define WIN32_NO_STATUS312#include <windows.h>313314static inline int KMP_GET_PAGE_SIZE(void) {315SYSTEM_INFO si;316GetSystemInfo(&si);317return si.dwPageSize;318}319#else320#define KMP_GET_PAGE_SIZE() getpagesize()321#endif322323#define PAGE_ALIGNED(_addr) \324(!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1)))325#define ALIGN_TO_PAGE(x) \326(void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1)))327328/* ---------- Support for cache alignment, padding, etc. ----------------*/329330#ifdef __cplusplus331extern "C" {332#endif // __cplusplus333334#define INTERNODE_CACHE_LINE 4096 /* for multi-node systems */335336/* Define the default size of the cache line */337#ifndef CACHE_LINE338#define CACHE_LINE 128 /* cache line size in bytes */339#else340#if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN)341// 2006-02-13: This produces too many warnings on OS X*. Disable for now342#warning CACHE_LINE is too small.343#endif344#endif /* CACHE_LINE */345346#define KMP_CACHE_PREFETCH(ADDR) /* nothing */347348// Define attribute that indicates that the fall through from the previous349// case label is intentional and should not be diagnosed by a compiler350// Code from libcxx/include/__config351// Use a function like macro to imply that it must be followed by a semicolon352#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)353#define KMP_FALLTHROUGH() [[fallthrough]]354// icc cannot properly tell this attribute is absent so force off355#elif KMP_COMPILER_ICC356#define KMP_FALLTHROUGH() ((void)0)357#elif __has_cpp_attribute(clang::fallthrough)358#define KMP_FALLTHROUGH() [[clang::fallthrough]]359#elif __has_attribute(fallthrough) || __GNUC__ >= 7360#define KMP_FALLTHROUGH() __attribute__((__fallthrough__))361#else362#define KMP_FALLTHROUGH() ((void)0)363#endif364365#if KMP_HAVE_ATTRIBUTE_WAITPKG366#define KMP_ATTRIBUTE_TARGET_WAITPKG __attribute__((target("waitpkg")))367#else368#define KMP_ATTRIBUTE_TARGET_WAITPKG /* Nothing */369#endif370371#if KMP_HAVE_ATTRIBUTE_RTM372#define KMP_ATTRIBUTE_TARGET_RTM __attribute__((target("rtm")))373#else374#define KMP_ATTRIBUTE_TARGET_RTM /* Nothing */375#endif376377// Define attribute that indicates a function does not return378#if __cplusplus >= 201103L379#define KMP_NORETURN [[noreturn]]380#elif KMP_OS_WINDOWS381#define KMP_NORETURN __declspec(noreturn)382#else383#define KMP_NORETURN __attribute__((noreturn))384#endif385386#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT387#define KMP_ALIGN(bytes) __declspec(align(bytes))388#define KMP_THREAD_LOCAL __declspec(thread)389#define KMP_ALIAS /* Nothing */390#else391#define KMP_ALIGN(bytes) __attribute__((aligned(bytes)))392#define KMP_THREAD_LOCAL __thread393#define KMP_ALIAS(alias_of) __attribute__((alias(alias_of)))394#endif395396#if KMP_HAVE_WEAK_ATTRIBUTE && !KMP_DYNAMIC_LIB397#define KMP_WEAK_ATTRIBUTE_EXTERNAL __attribute__((weak))398#else399#define KMP_WEAK_ATTRIBUTE_EXTERNAL /* Nothing */400#endif401402#if KMP_HAVE_WEAK_ATTRIBUTE403#define KMP_WEAK_ATTRIBUTE_INTERNAL __attribute__((weak))404#else405#define KMP_WEAK_ATTRIBUTE_INTERNAL /* Nothing */406#endif407408// Define KMP_VERSION_SYMBOL and KMP_EXPAND_NAME409#ifndef KMP_STR410#define KMP_STR(x) _KMP_STR(x)411#define _KMP_STR(x) #x412#endif413414#ifdef KMP_USE_VERSION_SYMBOLS415// If using versioned symbols, KMP_EXPAND_NAME prepends416// __kmp_api_ to the real API name417#define KMP_EXPAND_NAME(api_name) _KMP_EXPAND_NAME(api_name)418#define _KMP_EXPAND_NAME(api_name) __kmp_api_##api_name419#define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) \420_KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, "VERSION")421#define _KMP_VERSION_SYMBOL(api_name, ver_num, ver_str, default_ver) \422__typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver_num##_alias \423__attribute__((alias(KMP_STR(__kmp_api_##api_name)))); \424__asm__( \425".symver " KMP_STR(__kmp_api_##api_name##_##ver_num##_alias) "," KMP_STR( \426api_name) "@" ver_str "\n\t"); \427__asm__(".symver " KMP_STR(__kmp_api_##api_name) "," KMP_STR( \428api_name) "@@" default_ver "\n\t")429430#define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str) \431_KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, "VERSION")432#define _KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, ver_str, \433default_ver) \434__typeof__(__kmp_api_##apic_name) __kmp_api_##apic_name##_##ver_num##_alias \435__attribute__((alias(KMP_STR(__kmp_api_##apic_name)))); \436__asm__(".symver " KMP_STR(__kmp_api_##apic_name) "," KMP_STR( \437apic_name) "@@" default_ver "\n\t"); \438__asm__( \439".symver " KMP_STR(__kmp_api_##apic_name##_##ver_num##_alias) "," KMP_STR( \440api_name) "@" ver_str "\n\t")441442#else // KMP_USE_VERSION_SYMBOLS443#define KMP_EXPAND_NAME(api_name) api_name444#define KMP_VERSION_SYMBOL(api_name, ver_num, ver_str) /* Nothing */445#define KMP_VERSION_OMPC_SYMBOL(apic_name, api_name, ver_num, \446ver_str) /* Nothing */447#endif // KMP_USE_VERSION_SYMBOLS448449/* Temporary note: if performance testing of this passes, we can remove450all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */451#define KMP_DO_ALIGN(bytes) KMP_ALIGN(bytes)452#define KMP_ALIGN_CACHE KMP_ALIGN(CACHE_LINE)453#define KMP_ALIGN_CACHE_INTERNODE KMP_ALIGN(INTERNODE_CACHE_LINE)454455/* General purpose fence types for memory operations */456enum kmp_mem_fence_type {457kmp_no_fence, /* No memory fence */458kmp_acquire_fence, /* Acquire (read) memory fence */459kmp_release_fence, /* Release (write) memory fence */460kmp_full_fence /* Full (read+write) memory fence */461};462463// Synchronization primitives464465#if KMP_ASM_INTRINS && KMP_OS_WINDOWS && !((KMP_ARCH_AARCH64 || KMP_ARCH_ARM) && (KMP_COMPILER_CLANG || KMP_COMPILER_GCC))466467#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG468#pragma intrinsic(InterlockedExchangeAdd)469#pragma intrinsic(InterlockedCompareExchange)470#pragma intrinsic(InterlockedExchange)471#if !KMP_32_BIT_ARCH472#pragma intrinsic(InterlockedExchange64)473#endif474#endif475476// Using InterlockedIncrement / InterlockedDecrement causes a library loading477// ordering problem, so we use InterlockedExchangeAdd instead.478#define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1)479#define KMP_TEST_THEN_INC_ACQ32(p) \480InterlockedExchangeAdd((volatile long *)(p), 1)481#define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4)482#define KMP_TEST_THEN_ADD4_ACQ32(p) \483InterlockedExchangeAdd((volatile long *)(p), 4)484#define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1)485#define KMP_TEST_THEN_DEC_ACQ32(p) \486InterlockedExchangeAdd((volatile long *)(p), -1)487#define KMP_TEST_THEN_ADD32(p, v) \488InterlockedExchangeAdd((volatile long *)(p), (v))489490#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \491InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv))492493#define KMP_XCHG_FIXED32(p, v) \494InterlockedExchange((volatile long *)(p), (long)(v))495#define KMP_XCHG_FIXED64(p, v) \496InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v))497498inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) {499kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v);500return *(kmp_real32 *)&tmp;501}502503#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v))504#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v))505#define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v))506#define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v))507#define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v))508#define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v))509510extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v);511extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v);512extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v);513extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v);514extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v);515extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v);516extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v);517extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v);518519#if KMP_ARCH_AARCH64 && KMP_COMPILER_MSVC && !KMP_COMPILER_CLANG520#define KMP_TEST_THEN_INC64(p) _InterlockedExchangeAdd64((p), 1LL)521#define KMP_TEST_THEN_INC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 1LL)522#define KMP_TEST_THEN_ADD4_64(p) _InterlockedExchangeAdd64((p), 4LL)523// #define KMP_TEST_THEN_ADD4_ACQ64(p) _InterlockedExchangeAdd64_acq((p), 4LL)524// #define KMP_TEST_THEN_DEC64(p) _InterlockedExchangeAdd64((p), -1LL)525// #define KMP_TEST_THEN_DEC_ACQ64(p) _InterlockedExchangeAdd64_acq((p), -1LL)526// #define KMP_TEST_THEN_ADD8(p, v) _InterlockedExchangeAdd8((p), (v))527#define KMP_TEST_THEN_ADD64(p, v) _InterlockedExchangeAdd64((p), (v))528529#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \530__kmp_compare_and_store_acq8((p), (cv), (sv))531#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \532__kmp_compare_and_store_rel8((p), (cv), (sv))533#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \534__kmp_compare_and_store_acq16((p), (cv), (sv))535/*536#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \537__kmp_compare_and_store_rel16((p), (cv), (sv))538*/539#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \540__kmp_compare_and_store_acq32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \541(kmp_int32)(sv))542#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \543__kmp_compare_and_store_rel32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \544(kmp_int32)(sv))545#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \546__kmp_compare_and_store_acq64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \547(kmp_int64)(sv))548#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \549__kmp_compare_and_store_rel64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \550(kmp_int64)(sv))551#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \552__kmp_compare_and_store_ptr((void *volatile *)(p), (void *)(cv), (void *)(sv))553554// KMP_COMPARE_AND_STORE expects this order: pointer, compare, exchange555// _InterlockedCompareExchange expects this order: pointer, exchange, compare556// KMP_COMPARE_AND_STORE also returns a bool indicating a successful write. A557// write is successful if the return value of _InterlockedCompareExchange is the558// same as the compare value.559inline kmp_int8 __kmp_compare_and_store_acq8(volatile kmp_int8 *p, kmp_int8 cv,560kmp_int8 sv) {561return _InterlockedCompareExchange8_acq(p, sv, cv) == cv;562}563564inline kmp_int8 __kmp_compare_and_store_rel8(volatile kmp_int8 *p, kmp_int8 cv,565kmp_int8 sv) {566return _InterlockedCompareExchange8_rel(p, sv, cv) == cv;567}568569inline kmp_int16 __kmp_compare_and_store_acq16(volatile kmp_int16 *p,570kmp_int16 cv, kmp_int16 sv) {571return _InterlockedCompareExchange16_acq(p, sv, cv) == cv;572}573574inline kmp_int16 __kmp_compare_and_store_rel16(volatile kmp_int16 *p,575kmp_int16 cv, kmp_int16 sv) {576return _InterlockedCompareExchange16_rel(p, sv, cv) == cv;577}578579inline kmp_int32 __kmp_compare_and_store_acq32(volatile kmp_int32 *p,580kmp_int32 cv, kmp_int32 sv) {581return _InterlockedCompareExchange_acq((volatile long *)p, sv, cv) == cv;582}583584inline kmp_int32 __kmp_compare_and_store_rel32(volatile kmp_int32 *p,585kmp_int32 cv, kmp_int32 sv) {586return _InterlockedCompareExchange_rel((volatile long *)p, sv, cv) == cv;587}588589inline kmp_int32 __kmp_compare_and_store_acq64(volatile kmp_int64 *p,590kmp_int64 cv, kmp_int64 sv) {591return _InterlockedCompareExchange64_acq(p, sv, cv) == cv;592}593594inline kmp_int32 __kmp_compare_and_store_rel64(volatile kmp_int64 *p,595kmp_int64 cv, kmp_int64 sv) {596return _InterlockedCompareExchange64_rel(p, sv, cv) == cv;597}598599inline kmp_int32 __kmp_compare_and_store_ptr(void *volatile *p, void *cv,600void *sv) {601return _InterlockedCompareExchangePointer(p, sv, cv) == cv;602}603604// The _RET versions return the value instead of a bool605606#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \607_InterlockedCompareExchange8((p), (sv), (cv))608#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \609_InterlockedCompareExchange16((p), (sv), (cv))610611#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \612_InterlockedCompareExchange64((volatile kmp_int64 *)(p), (kmp_int64)(sv), \613(kmp_int64)(cv))614615616#define KMP_XCHG_FIXED8(p, v) \617_InterlockedExchange8((volatile kmp_int8 *)(p), (kmp_int8)(v));618#define KMP_XCHG_FIXED16(p, v) _InterlockedExchange16((p), (v));619#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v));620621inline kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v) {622kmp_int64 tmp = _InterlockedExchange64((volatile kmp_int64 *)p, *(kmp_int64623*)&v); return *(kmp_real64 *)&tmp;624}625626#else // !KMP_ARCH_AARCH64627628// Routines that we still need to implement in assembly.629extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v);630631extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv,632kmp_int8 sv);633extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv,634kmp_int16 sv);635extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv,636kmp_int32 sv);637extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv,638kmp_int64 sv);639extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv,640kmp_int8 sv);641extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p,642kmp_int16 cv, kmp_int16 sv);643extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p,644kmp_int32 cv, kmp_int32 sv);645extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p,646kmp_int64 cv, kmp_int64 sv);647648extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v);649extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v);650extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v);651extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v);652extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v);653extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);654655//#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1)656//#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1)657#define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL)658#define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL)659//#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4)660//#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4)661#define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL)662#define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL)663//#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1)664//#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1)665#define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL)666#define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL)667//#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v))668#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v))669#define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v))670671672#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \673__kmp_compare_and_store8((p), (cv), (sv))674#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \675__kmp_compare_and_store8((p), (cv), (sv))676#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \677__kmp_compare_and_store16((p), (cv), (sv))678#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \679__kmp_compare_and_store16((p), (cv), (sv))680#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \681__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \682(kmp_int32)(sv))683#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \684__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \685(kmp_int32)(sv))686#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \687__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \688(kmp_int64)(sv))689#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \690__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \691(kmp_int64)(sv))692693#if KMP_ARCH_X86694#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \695__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \696(kmp_int32)(sv))697#else /* 64 bit pointers */698#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \699__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \700(kmp_int64)(sv))701#endif /* KMP_ARCH_X86 */702703#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \704__kmp_compare_and_store_ret8((p), (cv), (sv))705#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \706__kmp_compare_and_store_ret16((p), (cv), (sv))707#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \708__kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \709(kmp_int64)(sv))710711#define KMP_XCHG_FIXED8(p, v) \712__kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v));713#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v));714//#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v));715//#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v));716//#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v));717#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v));718#endif719720#elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64)721722/* cast p to correct type so that proper intrinsic will be used */723#define KMP_TEST_THEN_INC32(p) \724__sync_fetch_and_add((volatile kmp_int32 *)(p), 1)725#define KMP_TEST_THEN_INC_ACQ32(p) \726__sync_fetch_and_add((volatile kmp_int32 *)(p), 1)727#if KMP_ARCH_MIPS728#define KMP_TEST_THEN_INC64(p) \729__atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)730#define KMP_TEST_THEN_INC_ACQ64(p) \731__atomic_fetch_add((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)732#else733#define KMP_TEST_THEN_INC64(p) \734__sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL)735#define KMP_TEST_THEN_INC_ACQ64(p) \736__sync_fetch_and_add((volatile kmp_int64 *)(p), 1LL)737#endif738#define KMP_TEST_THEN_ADD4_32(p) \739__sync_fetch_and_add((volatile kmp_int32 *)(p), 4)740#define KMP_TEST_THEN_ADD4_ACQ32(p) \741__sync_fetch_and_add((volatile kmp_int32 *)(p), 4)742#if KMP_ARCH_MIPS743#define KMP_TEST_THEN_ADD4_64(p) \744__atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST)745#define KMP_TEST_THEN_ADD4_ACQ64(p) \746__atomic_fetch_add((volatile kmp_int64 *)(p), 4LL, __ATOMIC_SEQ_CST)747#define KMP_TEST_THEN_DEC64(p) \748__atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)749#define KMP_TEST_THEN_DEC_ACQ64(p) \750__atomic_fetch_sub((volatile kmp_int64 *)(p), 1LL, __ATOMIC_SEQ_CST)751#else752#define KMP_TEST_THEN_ADD4_64(p) \753__sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL)754#define KMP_TEST_THEN_ADD4_ACQ64(p) \755__sync_fetch_and_add((volatile kmp_int64 *)(p), 4LL)756#define KMP_TEST_THEN_DEC64(p) \757__sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL)758#define KMP_TEST_THEN_DEC_ACQ64(p) \759__sync_fetch_and_sub((volatile kmp_int64 *)(p), 1LL)760#endif761#define KMP_TEST_THEN_DEC32(p) \762__sync_fetch_and_sub((volatile kmp_int32 *)(p), 1)763#define KMP_TEST_THEN_DEC_ACQ32(p) \764__sync_fetch_and_sub((volatile kmp_int32 *)(p), 1)765#define KMP_TEST_THEN_ADD8(p, v) \766__sync_fetch_and_add((volatile kmp_int8 *)(p), (kmp_int8)(v))767#define KMP_TEST_THEN_ADD32(p, v) \768__sync_fetch_and_add((volatile kmp_int32 *)(p), (kmp_int32)(v))769#if KMP_ARCH_MIPS770#define KMP_TEST_THEN_ADD64(p, v) \771__atomic_fetch_add((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \772__ATOMIC_SEQ_CST)773#else774#define KMP_TEST_THEN_ADD64(p, v) \775__sync_fetch_and_add((volatile kmp_int64 *)(p), (kmp_int64)(v))776#endif777778#define KMP_TEST_THEN_OR8(p, v) \779__sync_fetch_and_or((volatile kmp_int8 *)(p), (kmp_int8)(v))780#define KMP_TEST_THEN_AND8(p, v) \781__sync_fetch_and_and((volatile kmp_int8 *)(p), (kmp_int8)(v))782#define KMP_TEST_THEN_OR32(p, v) \783__sync_fetch_and_or((volatile kmp_uint32 *)(p), (kmp_uint32)(v))784#define KMP_TEST_THEN_AND32(p, v) \785__sync_fetch_and_and((volatile kmp_uint32 *)(p), (kmp_uint32)(v))786#if KMP_ARCH_MIPS787#define KMP_TEST_THEN_OR64(p, v) \788__atomic_fetch_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \789__ATOMIC_SEQ_CST)790#define KMP_TEST_THEN_AND64(p, v) \791__atomic_fetch_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v), \792__ATOMIC_SEQ_CST)793#else794#define KMP_TEST_THEN_OR64(p, v) \795__sync_fetch_and_or((volatile kmp_uint64 *)(p), (kmp_uint64)(v))796#define KMP_TEST_THEN_AND64(p, v) \797__sync_fetch_and_and((volatile kmp_uint64 *)(p), (kmp_uint64)(v))798#endif799800#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \801__sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \802(kmp_uint8)(sv))803#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \804__sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \805(kmp_uint8)(sv))806#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \807__sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \808(kmp_uint16)(sv))809#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \810__sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \811(kmp_uint16)(sv))812#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \813__sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \814(kmp_uint32)(sv))815#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \816__sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \817(kmp_uint32)(sv))818#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \819__sync_bool_compare_and_swap((void *volatile *)(p), (void *)(cv), \820(void *)(sv))821822#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \823__sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \824(kmp_uint8)(sv))825#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \826__sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \827(kmp_uint16)(sv))828#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \829__sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \830(kmp_uint32)(sv))831#if KMP_ARCH_MIPS832static inline bool mips_sync_bool_compare_and_swap(volatile kmp_uint64 *p,833kmp_uint64 cv,834kmp_uint64 sv) {835return __atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST,836__ATOMIC_SEQ_CST);837}838static inline bool mips_sync_val_compare_and_swap(volatile kmp_uint64 *p,839kmp_uint64 cv,840kmp_uint64 sv) {841__atomic_compare_exchange(p, &cv, &sv, false, __ATOMIC_SEQ_CST,842__ATOMIC_SEQ_CST);843return cv;844}845#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \846mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \847(kmp_uint64)(cv), (kmp_uint64)(sv))848#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \849mips_sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), \850(kmp_uint64)(cv), (kmp_uint64)(sv))851#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \852mips_sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \853(kmp_uint64)(sv))854#else855#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \856__sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \857(kmp_uint64)(sv))858#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \859__sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \860(kmp_uint64)(sv))861#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \862__sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \863(kmp_uint64)(sv))864#endif865866#if KMP_OS_DARWIN && defined(__INTEL_COMPILER) && __INTEL_COMPILER >= 1800867#define KMP_XCHG_FIXED8(p, v) \868__atomic_exchange_1((volatile kmp_uint8 *)(p), (kmp_uint8)(v), \869__ATOMIC_SEQ_CST)870#else871#define KMP_XCHG_FIXED8(p, v) \872__sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v))873#endif874#define KMP_XCHG_FIXED16(p, v) \875__sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v))876#define KMP_XCHG_FIXED32(p, v) \877__sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v))878#define KMP_XCHG_FIXED64(p, v) \879__sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v))880881inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) {882volatile kmp_uint32 *up;883kmp_uint32 uv;884memcpy(&up, &p, sizeof(up));885memcpy(&uv, &v, sizeof(uv));886kmp_int32 tmp = __sync_lock_test_and_set(up, uv);887kmp_real32 ftmp;888memcpy(&ftmp, &tmp, sizeof(tmp));889return ftmp;890}891892inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) {893volatile kmp_uint64 *up;894kmp_uint64 uv;895memcpy(&up, &p, sizeof(up));896memcpy(&uv, &v, sizeof(uv));897kmp_int64 tmp = __sync_lock_test_and_set(up, uv);898kmp_real64 dtmp;899memcpy(&dtmp, &tmp, sizeof(tmp));900return dtmp;901}902903#else904905extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v);906extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v);907extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v);908extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v);909extern kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 v);910extern kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 v);911extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v);912extern kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 v);913extern kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 v);914915extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv,916kmp_int8 sv);917extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv,918kmp_int16 sv);919extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv,920kmp_int32 sv);921extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv,922kmp_int64 sv);923extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv,924kmp_int8 sv);925extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p,926kmp_int16 cv, kmp_int16 sv);927extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p,928kmp_int32 cv, kmp_int32 sv);929extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p,930kmp_int64 cv, kmp_int64 sv);931932extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v);933extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v);934extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v);935extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v);936extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v);937extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v);938939#define KMP_TEST_THEN_INC32(p) \940__kmp_test_then_add32((volatile kmp_int32 *)(p), 1)941#define KMP_TEST_THEN_INC_ACQ32(p) \942__kmp_test_then_add32((volatile kmp_int32 *)(p), 1)943#define KMP_TEST_THEN_INC64(p) \944__kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL)945#define KMP_TEST_THEN_INC_ACQ64(p) \946__kmp_test_then_add64((volatile kmp_int64 *)(p), 1LL)947#define KMP_TEST_THEN_ADD4_32(p) \948__kmp_test_then_add32((volatile kmp_int32 *)(p), 4)949#define KMP_TEST_THEN_ADD4_ACQ32(p) \950__kmp_test_then_add32((volatile kmp_int32 *)(p), 4)951#define KMP_TEST_THEN_ADD4_64(p) \952__kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL)953#define KMP_TEST_THEN_ADD4_ACQ64(p) \954__kmp_test_then_add64((volatile kmp_int64 *)(p), 4LL)955#define KMP_TEST_THEN_DEC32(p) \956__kmp_test_then_add32((volatile kmp_int32 *)(p), -1)957#define KMP_TEST_THEN_DEC_ACQ32(p) \958__kmp_test_then_add32((volatile kmp_int32 *)(p), -1)959#define KMP_TEST_THEN_DEC64(p) \960__kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL)961#define KMP_TEST_THEN_DEC_ACQ64(p) \962__kmp_test_then_add64((volatile kmp_int64 *)(p), -1LL)963#define KMP_TEST_THEN_ADD8(p, v) \964__kmp_test_then_add8((volatile kmp_int8 *)(p), (kmp_int8)(v))965#define KMP_TEST_THEN_ADD32(p, v) \966__kmp_test_then_add32((volatile kmp_int32 *)(p), (kmp_int32)(v))967#define KMP_TEST_THEN_ADD64(p, v) \968__kmp_test_then_add64((volatile kmp_int64 *)(p), (kmp_int64)(v))969970#define KMP_TEST_THEN_OR8(p, v) \971__kmp_test_then_or8((volatile kmp_int8 *)(p), (kmp_int8)(v))972#define KMP_TEST_THEN_AND8(p, v) \973__kmp_test_then_and8((volatile kmp_int8 *)(p), (kmp_int8)(v))974#define KMP_TEST_THEN_OR32(p, v) \975__kmp_test_then_or32((volatile kmp_uint32 *)(p), (kmp_uint32)(v))976#define KMP_TEST_THEN_AND32(p, v) \977__kmp_test_then_and32((volatile kmp_uint32 *)(p), (kmp_uint32)(v))978#define KMP_TEST_THEN_OR64(p, v) \979__kmp_test_then_or64((volatile kmp_uint64 *)(p), (kmp_uint64)(v))980#define KMP_TEST_THEN_AND64(p, v) \981__kmp_test_then_and64((volatile kmp_uint64 *)(p), (kmp_uint64)(v))982983#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \984__kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \985(kmp_int8)(sv))986#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \987__kmp_compare_and_store8((volatile kmp_int8 *)(p), (kmp_int8)(cv), \988(kmp_int8)(sv))989#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \990__kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \991(kmp_int16)(sv))992#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \993__kmp_compare_and_store16((volatile kmp_int16 *)(p), (kmp_int16)(cv), \994(kmp_int16)(sv))995#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \996__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \997(kmp_int32)(sv))998#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \999__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \1000(kmp_int32)(sv))1001#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \1002__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \1003(kmp_int64)(sv))1004#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \1005__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \1006(kmp_int64)(sv))10071008#if KMP_ARCH_X861009#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \1010__kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \1011(kmp_int32)(sv))1012#else /* 64 bit pointers */1013#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \1014__kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \1015(kmp_int64)(sv))1016#endif /* KMP_ARCH_X86 */10171018#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \1019__kmp_compare_and_store_ret8((p), (cv), (sv))1020#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \1021__kmp_compare_and_store_ret16((p), (cv), (sv))1022#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \1023__kmp_compare_and_store_ret32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \1024(kmp_int32)(sv))1025#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \1026__kmp_compare_and_store_ret64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \1027(kmp_int64)(sv))10281029#define KMP_XCHG_FIXED8(p, v) \1030__kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v));1031#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v));1032#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v));1033#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v));1034#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v));1035#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v));10361037#endif /* KMP_ASM_INTRINS */10381039/* ------------- relaxed consistency memory model stuff ------------------ */10401041#if KMP_OS_WINDOWS1042#ifdef __ABSOFT_WIN1043#define KMP_MB() asm("nop")1044#define KMP_IMB() asm("nop")1045#else1046#define KMP_MB() /* _asm{ nop } */1047#define KMP_IMB() /* _asm{ nop } */1048#endif1049#endif /* KMP_OS_WINDOWS */10501051#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \1052KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || \1053KMP_ARCH_VE || KMP_ARCH_S390X || KMP_ARCH_PPC || KMP_ARCH_AARCH64_321054#if KMP_OS_WINDOWS1055#undef KMP_MB1056#define KMP_MB() std::atomic_thread_fence(std::memory_order_seq_cst)1057#else /* !KMP_OS_WINDOWS */1058#define KMP_MB() __sync_synchronize()1059#endif1060#endif10611062#ifndef KMP_MB1063#define KMP_MB() /* nothing to do */1064#endif10651066#if KMP_ARCH_X86 || KMP_ARCH_X86_641067#if KMP_MIC1068// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.1069// We shouldn't need it, though, since the ABI rules require that1070// * If the compiler generates NGO stores it also generates the fence1071// * If users hand-code NGO stores they should insert the fence1072// therefore no incomplete unordered stores should be visible.1073#define KMP_MFENCE() /* Nothing */1074#define KMP_SFENCE() /* Nothing */1075#else1076#if KMP_COMPILER_ICC || KMP_COMPILER_ICX1077#define KMP_MFENCE_() _mm_mfence()1078#define KMP_SFENCE_() _mm_sfence()1079#elif KMP_COMPILER_MSVC1080#define KMP_MFENCE_() MemoryBarrier()1081#define KMP_SFENCE_() MemoryBarrier()1082#else1083#define KMP_MFENCE_() __sync_synchronize()1084#define KMP_SFENCE_() __sync_synchronize()1085#endif1086#define KMP_MFENCE() \1087if (UNLIKELY(!__kmp_cpuinfo.initialized)) { \1088__kmp_query_cpuid(&__kmp_cpuinfo); \1089} \1090if (__kmp_cpuinfo.flags.sse2) { \1091KMP_MFENCE_(); \1092}1093#define KMP_SFENCE() KMP_SFENCE_()1094#endif1095#else1096#define KMP_MFENCE() KMP_MB()1097#define KMP_SFENCE() KMP_MB()1098#endif10991100#ifndef KMP_IMB1101#define KMP_IMB() /* nothing to do */1102#endif11031104#ifndef KMP_ST_REL321105#define KMP_ST_REL32(A, D) (*(A) = (D))1106#endif11071108#ifndef KMP_ST_REL641109#define KMP_ST_REL64(A, D) (*(A) = (D))1110#endif11111112#ifndef KMP_LD_ACQ321113#define KMP_LD_ACQ32(A) (*(A))1114#endif11151116#ifndef KMP_LD_ACQ641117#define KMP_LD_ACQ64(A) (*(A))1118#endif11191120/* ------------------------------------------------------------------------ */1121// FIXME - maybe this should this be1122//1123// #define TCR_4(a) (*(volatile kmp_int32 *)(&a))1124// #define TCW_4(a,b) (a) = (*(volatile kmp_int32 *)&(b))1125//1126// #define TCR_8(a) (*(volatile kmp_int64 *)(a))1127// #define TCW_8(a,b) (a) = (*(volatile kmp_int64 *)(&b))1128//1129// I'm fairly certain this is the correct thing to do, but I'm afraid1130// of performance regressions.11311132#define TCR_1(a) (a)1133#define TCW_1(a, b) (a) = (b)1134#define TCR_4(a) (a)1135#define TCW_4(a, b) (a) = (b)1136#define TCI_4(a) (++(a))1137#define TCD_4(a) (--(a))1138#define TCR_8(a) (a)1139#define TCW_8(a, b) (a) = (b)1140#define TCI_8(a) (++(a))1141#define TCD_8(a) (--(a))1142#define TCR_SYNC_4(a) (a)1143#define TCW_SYNC_4(a, b) (a) = (b)1144#define TCX_SYNC_4(a, b, c) \1145KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \1146(kmp_int32)(b), (kmp_int32)(c))1147#define TCR_SYNC_8(a) (a)1148#define TCW_SYNC_8(a, b) (a) = (b)1149#define TCX_SYNC_8(a, b, c) \1150KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \1151(kmp_int64)(b), (kmp_int64)(c))11521153#if KMP_ARCH_X86 || KMP_ARCH_MIPS || KMP_ARCH_WASM || KMP_ARCH_PPC1154// What about ARM?1155#define TCR_PTR(a) ((void *)TCR_4(a))1156#define TCW_PTR(a, b) TCW_4((a), (b))1157#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a))1158#define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b))1159#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c)))11601161#else /* 64 bit pointers */11621163#define TCR_PTR(a) ((void *)TCR_8(a))1164#define TCW_PTR(a, b) TCW_8((a), (b))1165#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a))1166#define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b))1167#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c)))11681169#endif /* KMP_ARCH_X86 */11701171/* If these FTN_{TRUE,FALSE} values change, may need to change several places1172where they are used to check that language is Fortran, not C. */11731174#ifndef FTN_TRUE1175#define FTN_TRUE TRUE1176#endif11771178#ifndef FTN_FALSE1179#define FTN_FALSE FALSE1180#endif11811182typedef void (*microtask_t)(int *gtid, int *npr, ...);11831184#ifdef USE_VOLATILE_CAST1185#define VOLATILE_CAST(x) (volatile x)1186#else1187#define VOLATILE_CAST(x) (x)1188#endif11891190#define KMP_WAIT __kmp_wait_41191#define KMP_WAIT_PTR __kmp_wait_4_ptr1192#define KMP_EQ __kmp_eq_41193#define KMP_NEQ __kmp_neq_41194#define KMP_LT __kmp_lt_41195#define KMP_GE __kmp_ge_41196#define KMP_LE __kmp_le_411971198/* Workaround for Intel(R) 64 code gen bug when taking address of static array1199* (Intel(R) 64 Tracker #138) */1200#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX1201#define STATIC_EFI2_WORKAROUND1202#else1203#define STATIC_EFI2_WORKAROUND static1204#endif12051206// Support of BGET usage1207#ifndef KMP_USE_BGET1208#define KMP_USE_BGET 11209#endif12101211// Switches for OSS builds1212#ifndef USE_CMPXCHG_FIX1213#define USE_CMPXCHG_FIX 11214#endif12151216// Enable dynamic user lock1217#define KMP_USE_DYNAMIC_LOCK 112181219// Enable Intel(R) Transactional Synchronization Extensions (Intel(R) TSX) if1220// dynamic user lock is turned on1221#if KMP_USE_DYNAMIC_LOCK1222// Visual studio can't handle the asm sections in this code1223#define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC1224#ifdef KMP_USE_ADAPTIVE_LOCKS1225#undef KMP_USE_ADAPTIVE_LOCKS1226#endif1227#define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX1228#endif12291230// Enable tick time conversion of ticks to seconds1231#if KMP_STATS_ENABLED1232#define KMP_HAVE_TICK_TIME \1233(KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64))1234#endif12351236// Warning levels1237enum kmp_warnings_level {1238kmp_warnings_off = 0, /* No warnings */1239kmp_warnings_low, /* Minimal warnings (default) */1240kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */1241kmp_warnings_verbose /* reserved */1242};12431244#ifdef __cplusplus1245} // extern "C"1246#endif // __cplusplus12471248// Safe C API1249#include "kmp_safe_c_api.h"12501251// Macros for C++11 atomic functions1252#define KMP_ATOMIC_LD(p, order) (p)->load(std::memory_order_##order)1253#define KMP_ATOMIC_OP(op, p, v, order) (p)->op(v, std::memory_order_##order)12541255// For non-default load/store1256#define KMP_ATOMIC_LD_ACQ(p) KMP_ATOMIC_LD(p, acquire)1257#define KMP_ATOMIC_LD_RLX(p) KMP_ATOMIC_LD(p, relaxed)1258#define KMP_ATOMIC_ST_REL(p, v) KMP_ATOMIC_OP(store, p, v, release)1259#define KMP_ATOMIC_ST_RLX(p, v) KMP_ATOMIC_OP(store, p, v, relaxed)12601261// For non-default fetch_<op>1262#define KMP_ATOMIC_ADD(p, v) KMP_ATOMIC_OP(fetch_add, p, v, acq_rel)1263#define KMP_ATOMIC_SUB(p, v) KMP_ATOMIC_OP(fetch_sub, p, v, acq_rel)1264#define KMP_ATOMIC_AND(p, v) KMP_ATOMIC_OP(fetch_and, p, v, acq_rel)1265#define KMP_ATOMIC_OR(p, v) KMP_ATOMIC_OP(fetch_or, p, v, acq_rel)1266#define KMP_ATOMIC_INC(p) KMP_ATOMIC_OP(fetch_add, p, 1, acq_rel)1267#define KMP_ATOMIC_DEC(p) KMP_ATOMIC_OP(fetch_sub, p, 1, acq_rel)1268#define KMP_ATOMIC_ADD_RLX(p, v) KMP_ATOMIC_OP(fetch_add, p, v, relaxed)1269#define KMP_ATOMIC_INC_RLX(p) KMP_ATOMIC_OP(fetch_add, p, 1, relaxed)12701271// Callers of the following functions cannot see the side effect on "expected".1272template <typename T>1273bool __kmp_atomic_compare_store(std::atomic<T> *p, T expected, T desired) {1274return p->compare_exchange_strong(1275expected, desired, std::memory_order_acq_rel, std::memory_order_relaxed);1276}12771278template <typename T>1279bool __kmp_atomic_compare_store_acq(std::atomic<T> *p, T expected, T desired) {1280return p->compare_exchange_strong(1281expected, desired, std::memory_order_acquire, std::memory_order_relaxed);1282}12831284template <typename T>1285bool __kmp_atomic_compare_store_rel(std::atomic<T> *p, T expected, T desired) {1286return p->compare_exchange_strong(1287expected, desired, std::memory_order_release, std::memory_order_relaxed);1288}12891290// Symbol lookup on Linux/Windows1291#if KMP_OS_WINDOWS1292extern void *__kmp_lookup_symbol(const char *name, bool next = false);1293#define KMP_DLSYM(name) __kmp_lookup_symbol(name)1294#define KMP_DLSYM_NEXT(name) __kmp_lookup_symbol(name, true)1295#elif KMP_OS_WASI1296#define KMP_DLSYM(name) nullptr1297#define KMP_DLSYM_NEXT(name) nullptr1298#else1299#define KMP_DLSYM(name) dlsym(RTLD_DEFAULT, name)1300#define KMP_DLSYM_NEXT(name) dlsym(RTLD_NEXT, name)1301#endif13021303// MSVC doesn't have this, but clang/clang-cl does.1304#ifndef __has_builtin1305#define __has_builtin(x) 01306#endif13071308// Same as LLVM_BUILTIN_UNREACHABLE. States that it is UB to reach this point.1309#if __has_builtin(__builtin_unreachable) || defined(__GNUC__)1310#define KMP_BUILTIN_UNREACHABLE __builtin_unreachable()1311#elif defined(_MSC_VER)1312#define KMP_BUILTIN_UNREACHABLE __assume(false)1313#else1314#define KMP_BUILTIN_UNREACHABLE1315#endif13161317#endif /* KMP_OS_H */131813191320