Path: blob/main/contrib/llvm-project/openmp/runtime/src/kmp_lock.cpp
35258 views
/*1* kmp_lock.cpp -- lock-related functions2*/34//===----------------------------------------------------------------------===//5//6// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.7// See https://llvm.org/LICENSE.txt for license information.8// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception9//10//===----------------------------------------------------------------------===//1112#include <stddef.h>13#include <atomic>1415#include "kmp.h"16#include "kmp_i18n.h"17#include "kmp_io.h"18#include "kmp_itt.h"19#include "kmp_lock.h"20#include "kmp_wait_release.h"21#include "kmp_wrapper_getpid.h"2223#if KMP_USE_FUTEX24#include <sys/syscall.h>25#include <unistd.h>26// We should really include <futex.h>, but that causes compatibility problems on27// different Linux* OS distributions that either require that you include (or28// break when you try to include) <pci/types.h>. Since all we need is the two29// macros below (which are part of the kernel ABI, so can't change) we just30// define the constants here and don't include <futex.h>31#ifndef FUTEX_WAIT32#define FUTEX_WAIT 033#endif34#ifndef FUTEX_WAKE35#define FUTEX_WAKE 136#endif37#endif3839/* Implement spin locks for internal library use. */40/* The algorithm implemented is Lamport's bakery lock [1974]. */4142void __kmp_validate_locks(void) {43int i;44kmp_uint32 x, y;4546/* Check to make sure unsigned arithmetic does wraps properly */47x = ~((kmp_uint32)0) - 2;48y = x - 2;4950for (i = 0; i < 8; ++i, ++x, ++y) {51kmp_uint32 z = (x - y);52KMP_ASSERT(z == 2);53}5455KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);56}5758/* ------------------------------------------------------------------------ */59/* test and set locks */6061// For the non-nested locks, we can only assume that the first 4 bytes were62// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel63// compiler only allocates a 4 byte pointer on IA-32 architecture. On64// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.65//66// gcc reserves >= 8 bytes for nested locks, so we can assume that the67// entire 8 bytes were allocated for nested locks on all 64-bit platforms.6869static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {70return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;71}7273static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {74return lck->lk.depth_locked != -1;75}7677__forceinline static int78__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {79KMP_MB();8081#ifdef USE_LOCK_PROFILE82kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);83if ((curr != 0) && (curr != gtid + 1))84__kmp_printf("LOCK CONTENTION: %p\n", lck);85/* else __kmp_printf( "." );*/86#endif /* USE_LOCK_PROFILE */8788kmp_int32 tas_free = KMP_LOCK_FREE(tas);89kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);9091if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&92__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {93KMP_FSYNC_ACQUIRED(lck);94return KMP_LOCK_ACQUIRED_FIRST;95}9697kmp_uint32 spins;98kmp_uint64 time;99KMP_FSYNC_PREPARE(lck);100KMP_INIT_YIELD(spins);101KMP_INIT_BACKOFF(time);102kmp_backoff_t backoff = __kmp_spin_backoff_params;103do {104#if !KMP_HAVE_UMWAIT105__kmp_spin_backoff(&backoff);106#else107if (!__kmp_tpause_enabled)108__kmp_spin_backoff(&backoff);109#endif110KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);111} while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||112!__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));113KMP_FSYNC_ACQUIRED(lck);114return KMP_LOCK_ACQUIRED_FIRST;115}116117int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {118int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);119return retval;120}121122static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,123kmp_int32 gtid) {124char const *const func = "omp_set_lock";125if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&126__kmp_is_tas_lock_nestable(lck)) {127KMP_FATAL(LockNestableUsedAsSimple, func);128}129if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {130KMP_FATAL(LockIsAlreadyOwned, func);131}132return __kmp_acquire_tas_lock(lck, gtid);133}134135int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {136kmp_int32 tas_free = KMP_LOCK_FREE(tas);137kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);138if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&139__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {140KMP_FSYNC_ACQUIRED(lck);141return TRUE;142}143return FALSE;144}145146static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,147kmp_int32 gtid) {148char const *const func = "omp_test_lock";149if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&150__kmp_is_tas_lock_nestable(lck)) {151KMP_FATAL(LockNestableUsedAsSimple, func);152}153return __kmp_test_tas_lock(lck, gtid);154}155156int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {157KMP_MB(); /* Flush all pending memory write invalidates. */158159KMP_FSYNC_RELEASING(lck);160KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(tas));161KMP_MB(); /* Flush all pending memory write invalidates. */162163KMP_YIELD_OVERSUB();164return KMP_LOCK_RELEASED;165}166167static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck,168kmp_int32 gtid) {169char const *const func = "omp_unset_lock";170KMP_MB(); /* in case another processor initialized lock */171if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&172__kmp_is_tas_lock_nestable(lck)) {173KMP_FATAL(LockNestableUsedAsSimple, func);174}175if (__kmp_get_tas_lock_owner(lck) == -1) {176KMP_FATAL(LockUnsettingFree, func);177}178if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) &&179(__kmp_get_tas_lock_owner(lck) != gtid)) {180KMP_FATAL(LockUnsettingSetByAnother, func);181}182return __kmp_release_tas_lock(lck, gtid);183}184185void __kmp_init_tas_lock(kmp_tas_lock_t *lck) {186lck->lk.poll = KMP_LOCK_FREE(tas);187}188189void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; }190191static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) {192char const *const func = "omp_destroy_lock";193if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&194__kmp_is_tas_lock_nestable(lck)) {195KMP_FATAL(LockNestableUsedAsSimple, func);196}197if (__kmp_get_tas_lock_owner(lck) != -1) {198KMP_FATAL(LockStillOwned, func);199}200__kmp_destroy_tas_lock(lck);201}202203// nested test and set locks204205int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {206KMP_DEBUG_ASSERT(gtid >= 0);207208if (__kmp_get_tas_lock_owner(lck) == gtid) {209lck->lk.depth_locked += 1;210return KMP_LOCK_ACQUIRED_NEXT;211} else {212__kmp_acquire_tas_lock_timed_template(lck, gtid);213lck->lk.depth_locked = 1;214return KMP_LOCK_ACQUIRED_FIRST;215}216}217218static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,219kmp_int32 gtid) {220char const *const func = "omp_set_nest_lock";221if (!__kmp_is_tas_lock_nestable(lck)) {222KMP_FATAL(LockSimpleUsedAsNestable, func);223}224return __kmp_acquire_nested_tas_lock(lck, gtid);225}226227int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {228int retval;229230KMP_DEBUG_ASSERT(gtid >= 0);231232if (__kmp_get_tas_lock_owner(lck) == gtid) {233retval = ++lck->lk.depth_locked;234} else if (!__kmp_test_tas_lock(lck, gtid)) {235retval = 0;236} else {237KMP_MB();238retval = lck->lk.depth_locked = 1;239}240return retval;241}242243static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,244kmp_int32 gtid) {245char const *const func = "omp_test_nest_lock";246if (!__kmp_is_tas_lock_nestable(lck)) {247KMP_FATAL(LockSimpleUsedAsNestable, func);248}249return __kmp_test_nested_tas_lock(lck, gtid);250}251252int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {253KMP_DEBUG_ASSERT(gtid >= 0);254255KMP_MB();256if (--(lck->lk.depth_locked) == 0) {257__kmp_release_tas_lock(lck, gtid);258return KMP_LOCK_RELEASED;259}260return KMP_LOCK_STILL_HELD;261}262263static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck,264kmp_int32 gtid) {265char const *const func = "omp_unset_nest_lock";266KMP_MB(); /* in case another processor initialized lock */267if (!__kmp_is_tas_lock_nestable(lck)) {268KMP_FATAL(LockSimpleUsedAsNestable, func);269}270if (__kmp_get_tas_lock_owner(lck) == -1) {271KMP_FATAL(LockUnsettingFree, func);272}273if (__kmp_get_tas_lock_owner(lck) != gtid) {274KMP_FATAL(LockUnsettingSetByAnother, func);275}276return __kmp_release_nested_tas_lock(lck, gtid);277}278279void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) {280__kmp_init_tas_lock(lck);281lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks282}283284void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) {285__kmp_destroy_tas_lock(lck);286lck->lk.depth_locked = 0;287}288289static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {290char const *const func = "omp_destroy_nest_lock";291if (!__kmp_is_tas_lock_nestable(lck)) {292KMP_FATAL(LockSimpleUsedAsNestable, func);293}294if (__kmp_get_tas_lock_owner(lck) != -1) {295KMP_FATAL(LockStillOwned, func);296}297__kmp_destroy_nested_tas_lock(lck);298}299300#if KMP_USE_FUTEX301302/* ------------------------------------------------------------------------ */303/* futex locks */304305// futex locks are really just test and set locks, with a different method306// of handling contention. They take the same amount of space as test and307// set locks, and are allocated the same way (i.e. use the area allocated by308// the compiler for non-nested locks / allocate nested locks on the heap).309310static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) {311return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1;312}313314static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) {315return lck->lk.depth_locked != -1;316}317318__forceinline static int319__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) {320kmp_int32 gtid_code = (gtid + 1) << 1;321322KMP_MB();323324#ifdef USE_LOCK_PROFILE325kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll));326if ((curr != 0) && (curr != gtid_code))327__kmp_printf("LOCK CONTENTION: %p\n", lck);328/* else __kmp_printf( "." );*/329#endif /* USE_LOCK_PROFILE */330331KMP_FSYNC_PREPARE(lck);332KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n",333lck, lck->lk.poll, gtid));334335kmp_int32 poll_val;336337while ((poll_val = KMP_COMPARE_AND_STORE_RET32(338&(lck->lk.poll), KMP_LOCK_FREE(futex),339KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) {340341kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1;342KA_TRACE(3431000,344("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n",345lck, gtid, poll_val, cond));346347// NOTE: if you try to use the following condition for this branch348//349// if ( poll_val & 1 == 0 )350//351// Then the 12.0 compiler has a bug where the following block will352// always be skipped, regardless of the value of the LSB of poll_val.353if (!cond) {354// Try to set the lsb in the poll to indicate to the owner355// thread that they need to wake this thread up.356if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val,357poll_val | KMP_LOCK_BUSY(1, futex))) {358KA_TRACE(3591000,360("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n",361lck, lck->lk.poll, gtid));362continue;363}364poll_val |= KMP_LOCK_BUSY(1, futex);365366KA_TRACE(1000,367("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck,368lck->lk.poll, gtid));369}370371KA_TRACE(3721000,373("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n",374lck, gtid, poll_val));375376long rc;377if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL,378NULL, 0)) != 0) {379KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) "380"failed (rc=%ld errno=%d)\n",381lck, gtid, poll_val, rc, errno));382continue;383}384385KA_TRACE(1000,386("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n",387lck, gtid, poll_val));388// This thread has now done a successful futex wait call and was entered on389// the OS futex queue. We must now perform a futex wake call when releasing390// the lock, as we have no idea how many other threads are in the queue.391gtid_code |= 1;392}393394KMP_FSYNC_ACQUIRED(lck);395KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,396lck->lk.poll, gtid));397return KMP_LOCK_ACQUIRED_FIRST;398}399400int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {401int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid);402return retval;403}404405static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck,406kmp_int32 gtid) {407char const *const func = "omp_set_lock";408if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&409__kmp_is_futex_lock_nestable(lck)) {410KMP_FATAL(LockNestableUsedAsSimple, func);411}412if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) {413KMP_FATAL(LockIsAlreadyOwned, func);414}415return __kmp_acquire_futex_lock(lck, gtid);416}417418int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {419if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex),420KMP_LOCK_BUSY((gtid + 1) << 1, futex))) {421KMP_FSYNC_ACQUIRED(lck);422return TRUE;423}424return FALSE;425}426427static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck,428kmp_int32 gtid) {429char const *const func = "omp_test_lock";430if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&431__kmp_is_futex_lock_nestable(lck)) {432KMP_FATAL(LockNestableUsedAsSimple, func);433}434return __kmp_test_futex_lock(lck, gtid);435}436437int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {438KMP_MB(); /* Flush all pending memory write invalidates. */439440KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n",441lck, lck->lk.poll, gtid));442443KMP_FSYNC_RELEASING(lck);444445kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex));446447KA_TRACE(1000,448("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n",449lck, gtid, poll_val));450451if (KMP_LOCK_STRIP(poll_val) & 1) {452KA_TRACE(1000,453("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n",454lck, gtid));455syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex),456NULL, NULL, 0);457}458459KMP_MB(); /* Flush all pending memory write invalidates. */460461KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck,462lck->lk.poll, gtid));463464KMP_YIELD_OVERSUB();465return KMP_LOCK_RELEASED;466}467468static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck,469kmp_int32 gtid) {470char const *const func = "omp_unset_lock";471KMP_MB(); /* in case another processor initialized lock */472if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&473__kmp_is_futex_lock_nestable(lck)) {474KMP_FATAL(LockNestableUsedAsSimple, func);475}476if (__kmp_get_futex_lock_owner(lck) == -1) {477KMP_FATAL(LockUnsettingFree, func);478}479if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) &&480(__kmp_get_futex_lock_owner(lck) != gtid)) {481KMP_FATAL(LockUnsettingSetByAnother, func);482}483return __kmp_release_futex_lock(lck, gtid);484}485486void __kmp_init_futex_lock(kmp_futex_lock_t *lck) {487TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex));488}489490void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; }491492static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) {493char const *const func = "omp_destroy_lock";494if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) &&495__kmp_is_futex_lock_nestable(lck)) {496KMP_FATAL(LockNestableUsedAsSimple, func);497}498if (__kmp_get_futex_lock_owner(lck) != -1) {499KMP_FATAL(LockStillOwned, func);500}501__kmp_destroy_futex_lock(lck);502}503504// nested futex locks505506int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {507KMP_DEBUG_ASSERT(gtid >= 0);508509if (__kmp_get_futex_lock_owner(lck) == gtid) {510lck->lk.depth_locked += 1;511return KMP_LOCK_ACQUIRED_NEXT;512} else {513__kmp_acquire_futex_lock_timed_template(lck, gtid);514lck->lk.depth_locked = 1;515return KMP_LOCK_ACQUIRED_FIRST;516}517}518519static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,520kmp_int32 gtid) {521char const *const func = "omp_set_nest_lock";522if (!__kmp_is_futex_lock_nestable(lck)) {523KMP_FATAL(LockSimpleUsedAsNestable, func);524}525return __kmp_acquire_nested_futex_lock(lck, gtid);526}527528int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {529int retval;530531KMP_DEBUG_ASSERT(gtid >= 0);532533if (__kmp_get_futex_lock_owner(lck) == gtid) {534retval = ++lck->lk.depth_locked;535} else if (!__kmp_test_futex_lock(lck, gtid)) {536retval = 0;537} else {538KMP_MB();539retval = lck->lk.depth_locked = 1;540}541return retval;542}543544static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,545kmp_int32 gtid) {546char const *const func = "omp_test_nest_lock";547if (!__kmp_is_futex_lock_nestable(lck)) {548KMP_FATAL(LockSimpleUsedAsNestable, func);549}550return __kmp_test_nested_futex_lock(lck, gtid);551}552553int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) {554KMP_DEBUG_ASSERT(gtid >= 0);555556KMP_MB();557if (--(lck->lk.depth_locked) == 0) {558__kmp_release_futex_lock(lck, gtid);559return KMP_LOCK_RELEASED;560}561return KMP_LOCK_STILL_HELD;562}563564static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck,565kmp_int32 gtid) {566char const *const func = "omp_unset_nest_lock";567KMP_MB(); /* in case another processor initialized lock */568if (!__kmp_is_futex_lock_nestable(lck)) {569KMP_FATAL(LockSimpleUsedAsNestable, func);570}571if (__kmp_get_futex_lock_owner(lck) == -1) {572KMP_FATAL(LockUnsettingFree, func);573}574if (__kmp_get_futex_lock_owner(lck) != gtid) {575KMP_FATAL(LockUnsettingSetByAnother, func);576}577return __kmp_release_nested_futex_lock(lck, gtid);578}579580void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) {581__kmp_init_futex_lock(lck);582lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks583}584585void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) {586__kmp_destroy_futex_lock(lck);587lck->lk.depth_locked = 0;588}589590static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {591char const *const func = "omp_destroy_nest_lock";592if (!__kmp_is_futex_lock_nestable(lck)) {593KMP_FATAL(LockSimpleUsedAsNestable, func);594}595if (__kmp_get_futex_lock_owner(lck) != -1) {596KMP_FATAL(LockStillOwned, func);597}598__kmp_destroy_nested_futex_lock(lck);599}600601#endif // KMP_USE_FUTEX602603/* ------------------------------------------------------------------------ */604/* ticket (bakery) locks */605606static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) {607return std::atomic_load_explicit(&lck->lk.owner_id,608std::memory_order_relaxed) -6091;610}611612static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) {613return std::atomic_load_explicit(&lck->lk.depth_locked,614std::memory_order_relaxed) != -1;615}616617static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) {618return std::atomic_load_explicit((std::atomic<unsigned> *)now_serving,619std::memory_order_acquire) == my_ticket;620}621622__forceinline static int623__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck,624kmp_int32 gtid) {625kmp_uint32 my_ticket = std::atomic_fetch_add_explicit(626&lck->lk.next_ticket, 1U, std::memory_order_relaxed);627628#ifdef USE_LOCK_PROFILE629if (std::atomic_load_explicit(&lck->lk.now_serving,630std::memory_order_relaxed) != my_ticket)631__kmp_printf("LOCK CONTENTION: %p\n", lck);632/* else __kmp_printf( "." );*/633#endif /* USE_LOCK_PROFILE */634635if (std::atomic_load_explicit(&lck->lk.now_serving,636std::memory_order_acquire) == my_ticket) {637return KMP_LOCK_ACQUIRED_FIRST;638}639KMP_WAIT_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck);640return KMP_LOCK_ACQUIRED_FIRST;641}642643int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {644int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid);645return retval;646}647648static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck,649kmp_int32 gtid) {650char const *const func = "omp_set_lock";651652if (!std::atomic_load_explicit(&lck->lk.initialized,653std::memory_order_relaxed)) {654KMP_FATAL(LockIsUninitialized, func);655}656if (lck->lk.self != lck) {657KMP_FATAL(LockIsUninitialized, func);658}659if (__kmp_is_ticket_lock_nestable(lck)) {660KMP_FATAL(LockNestableUsedAsSimple, func);661}662if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) {663KMP_FATAL(LockIsAlreadyOwned, func);664}665666__kmp_acquire_ticket_lock(lck, gtid);667668std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,669std::memory_order_relaxed);670return KMP_LOCK_ACQUIRED_FIRST;671}672673int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {674kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket,675std::memory_order_relaxed);676677if (std::atomic_load_explicit(&lck->lk.now_serving,678std::memory_order_relaxed) == my_ticket) {679kmp_uint32 next_ticket = my_ticket + 1;680if (std::atomic_compare_exchange_strong_explicit(681&lck->lk.next_ticket, &my_ticket, next_ticket,682std::memory_order_acquire, std::memory_order_acquire)) {683return TRUE;684}685}686return FALSE;687}688689static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck,690kmp_int32 gtid) {691char const *const func = "omp_test_lock";692693if (!std::atomic_load_explicit(&lck->lk.initialized,694std::memory_order_relaxed)) {695KMP_FATAL(LockIsUninitialized, func);696}697if (lck->lk.self != lck) {698KMP_FATAL(LockIsUninitialized, func);699}700if (__kmp_is_ticket_lock_nestable(lck)) {701KMP_FATAL(LockNestableUsedAsSimple, func);702}703704int retval = __kmp_test_ticket_lock(lck, gtid);705706if (retval) {707std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,708std::memory_order_relaxed);709}710return retval;711}712713int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {714kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket,715std::memory_order_relaxed) -716std::atomic_load_explicit(&lck->lk.now_serving,717std::memory_order_relaxed);718719std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U,720std::memory_order_release);721722KMP_YIELD(distance >723(kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc));724return KMP_LOCK_RELEASED;725}726727static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck,728kmp_int32 gtid) {729char const *const func = "omp_unset_lock";730731if (!std::atomic_load_explicit(&lck->lk.initialized,732std::memory_order_relaxed)) {733KMP_FATAL(LockIsUninitialized, func);734}735if (lck->lk.self != lck) {736KMP_FATAL(LockIsUninitialized, func);737}738if (__kmp_is_ticket_lock_nestable(lck)) {739KMP_FATAL(LockNestableUsedAsSimple, func);740}741if (__kmp_get_ticket_lock_owner(lck) == -1) {742KMP_FATAL(LockUnsettingFree, func);743}744if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) &&745(__kmp_get_ticket_lock_owner(lck) != gtid)) {746KMP_FATAL(LockUnsettingSetByAnother, func);747}748std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);749return __kmp_release_ticket_lock(lck, gtid);750}751752void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) {753lck->lk.location = NULL;754lck->lk.self = lck;755std::atomic_store_explicit(&lck->lk.next_ticket, 0U,756std::memory_order_relaxed);757std::atomic_store_explicit(&lck->lk.now_serving, 0U,758std::memory_order_relaxed);759std::atomic_store_explicit(760&lck->lk.owner_id, 0,761std::memory_order_relaxed); // no thread owns the lock.762std::atomic_store_explicit(763&lck->lk.depth_locked, -1,764std::memory_order_relaxed); // -1 => not a nested lock.765std::atomic_store_explicit(&lck->lk.initialized, true,766std::memory_order_release);767}768769void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) {770std::atomic_store_explicit(&lck->lk.initialized, false,771std::memory_order_release);772lck->lk.self = NULL;773lck->lk.location = NULL;774std::atomic_store_explicit(&lck->lk.next_ticket, 0U,775std::memory_order_relaxed);776std::atomic_store_explicit(&lck->lk.now_serving, 0U,777std::memory_order_relaxed);778std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);779std::atomic_store_explicit(&lck->lk.depth_locked, -1,780std::memory_order_relaxed);781}782783static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {784char const *const func = "omp_destroy_lock";785786if (!std::atomic_load_explicit(&lck->lk.initialized,787std::memory_order_relaxed)) {788KMP_FATAL(LockIsUninitialized, func);789}790if (lck->lk.self != lck) {791KMP_FATAL(LockIsUninitialized, func);792}793if (__kmp_is_ticket_lock_nestable(lck)) {794KMP_FATAL(LockNestableUsedAsSimple, func);795}796if (__kmp_get_ticket_lock_owner(lck) != -1) {797KMP_FATAL(LockStillOwned, func);798}799__kmp_destroy_ticket_lock(lck);800}801802// nested ticket locks803804int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {805KMP_DEBUG_ASSERT(gtid >= 0);806807if (__kmp_get_ticket_lock_owner(lck) == gtid) {808std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,809std::memory_order_relaxed);810return KMP_LOCK_ACQUIRED_NEXT;811} else {812__kmp_acquire_ticket_lock_timed_template(lck, gtid);813std::atomic_store_explicit(&lck->lk.depth_locked, 1,814std::memory_order_relaxed);815std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,816std::memory_order_relaxed);817return KMP_LOCK_ACQUIRED_FIRST;818}819}820821static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,822kmp_int32 gtid) {823char const *const func = "omp_set_nest_lock";824825if (!std::atomic_load_explicit(&lck->lk.initialized,826std::memory_order_relaxed)) {827KMP_FATAL(LockIsUninitialized, func);828}829if (lck->lk.self != lck) {830KMP_FATAL(LockIsUninitialized, func);831}832if (!__kmp_is_ticket_lock_nestable(lck)) {833KMP_FATAL(LockSimpleUsedAsNestable, func);834}835return __kmp_acquire_nested_ticket_lock(lck, gtid);836}837838int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {839int retval;840841KMP_DEBUG_ASSERT(gtid >= 0);842843if (__kmp_get_ticket_lock_owner(lck) == gtid) {844retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1,845std::memory_order_relaxed) +8461;847} else if (!__kmp_test_ticket_lock(lck, gtid)) {848retval = 0;849} else {850std::atomic_store_explicit(&lck->lk.depth_locked, 1,851std::memory_order_relaxed);852std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1,853std::memory_order_relaxed);854retval = 1;855}856return retval;857}858859static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,860kmp_int32 gtid) {861char const *const func = "omp_test_nest_lock";862863if (!std::atomic_load_explicit(&lck->lk.initialized,864std::memory_order_relaxed)) {865KMP_FATAL(LockIsUninitialized, func);866}867if (lck->lk.self != lck) {868KMP_FATAL(LockIsUninitialized, func);869}870if (!__kmp_is_ticket_lock_nestable(lck)) {871KMP_FATAL(LockSimpleUsedAsNestable, func);872}873return __kmp_test_nested_ticket_lock(lck, gtid);874}875876int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) {877KMP_DEBUG_ASSERT(gtid >= 0);878879if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1,880std::memory_order_relaxed) -8811) == 0) {882std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed);883__kmp_release_ticket_lock(lck, gtid);884return KMP_LOCK_RELEASED;885}886return KMP_LOCK_STILL_HELD;887}888889static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck,890kmp_int32 gtid) {891char const *const func = "omp_unset_nest_lock";892893if (!std::atomic_load_explicit(&lck->lk.initialized,894std::memory_order_relaxed)) {895KMP_FATAL(LockIsUninitialized, func);896}897if (lck->lk.self != lck) {898KMP_FATAL(LockIsUninitialized, func);899}900if (!__kmp_is_ticket_lock_nestable(lck)) {901KMP_FATAL(LockSimpleUsedAsNestable, func);902}903if (__kmp_get_ticket_lock_owner(lck) == -1) {904KMP_FATAL(LockUnsettingFree, func);905}906if (__kmp_get_ticket_lock_owner(lck) != gtid) {907KMP_FATAL(LockUnsettingSetByAnother, func);908}909return __kmp_release_nested_ticket_lock(lck, gtid);910}911912void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) {913__kmp_init_ticket_lock(lck);914std::atomic_store_explicit(&lck->lk.depth_locked, 0,915std::memory_order_relaxed);916// >= 0 for nestable locks, -1 for simple locks917}918919void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) {920__kmp_destroy_ticket_lock(lck);921std::atomic_store_explicit(&lck->lk.depth_locked, 0,922std::memory_order_relaxed);923}924925static void926__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {927char const *const func = "omp_destroy_nest_lock";928929if (!std::atomic_load_explicit(&lck->lk.initialized,930std::memory_order_relaxed)) {931KMP_FATAL(LockIsUninitialized, func);932}933if (lck->lk.self != lck) {934KMP_FATAL(LockIsUninitialized, func);935}936if (!__kmp_is_ticket_lock_nestable(lck)) {937KMP_FATAL(LockSimpleUsedAsNestable, func);938}939if (__kmp_get_ticket_lock_owner(lck) != -1) {940KMP_FATAL(LockStillOwned, func);941}942__kmp_destroy_nested_ticket_lock(lck);943}944945// access functions to fields which don't exist for all lock kinds.946947static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) {948return lck->lk.location;949}950951static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck,952const ident_t *loc) {953lck->lk.location = loc;954}955956static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) {957return lck->lk.flags;958}959960static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck,961kmp_lock_flags_t flags) {962lck->lk.flags = flags;963}964965/* ------------------------------------------------------------------------ */966/* queuing locks */967968/* First the states969(head,tail) = 0, 0 means lock is unheld, nobody on queue970UINT_MAX or -1, 0 means lock is held, nobody on queue971h, h means lock held or about to transition,9721 element on queue973h, t h <> t, means lock is held or about to974transition, >1 elements on queue975976Now the transitions977Acquire(0,0) = -1 ,0978Release(0,0) = Error979Acquire(-1,0) = h ,h h > 0980Release(-1,0) = 0 ,0981Acquire(h,h) = h ,t h > 0, t > 0, h <> t982Release(h,h) = -1 ,0 h > 0983Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t'984Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t985986And pictorially987988+-----+989| 0, 0|------- release -------> Error990+-----+991| ^992acquire| |release993| |994| |995v |996+-----+997|-1, 0|998+-----+999| ^1000acquire| |release1001| |1002| |1003v |1004+-----+1005| h, h|1006+-----+1007| ^1008acquire| |release1009| |1010| |1011v |1012+-----+1013| h, t|----- acquire, release loopback ---+1014+-----+ |1015^ |1016| |1017+------------------------------------+1018*/10191020#ifdef DEBUG_QUEUING_LOCKS10211022/* Stuff for circular trace buffer */1023#define TRACE_BUF_ELE 10241024static char traces[TRACE_BUF_ELE][128] = {0};1025static int tc = 0;1026#define TRACE_LOCK(X, Y) \1027KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y);1028#define TRACE_LOCK_T(X, Y, Z) \1029KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z);1030#define TRACE_LOCK_HT(X, Y, Z, Q) \1031KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \1032Z, Q);10331034static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid,1035kmp_queuing_lock_t *lck, kmp_int32 head_id,1036kmp_int32 tail_id) {1037kmp_int32 t, i;10381039__kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n");10401041i = tc % TRACE_BUF_ELE;1042__kmp_printf_no_lock("%s\n", traces[i]);1043i = (i + 1) % TRACE_BUF_ELE;1044while (i != (tc % TRACE_BUF_ELE)) {1045__kmp_printf_no_lock("%s", traces[i]);1046i = (i + 1) % TRACE_BUF_ELE;1047}1048__kmp_printf_no_lock("\n");10491050__kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, "1051"next_wait:%d, head_id:%d, tail_id:%d\n",1052gtid + 1, this_thr->th.th_spin_here,1053this_thr->th.th_next_waiting, head_id, tail_id);10541055__kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id);10561057if (lck->lk.head_id >= 1) {1058t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting;1059while (t > 0) {1060__kmp_printf_no_lock("-> %d ", t);1061t = __kmp_threads[t - 1]->th.th_next_waiting;1062}1063}1064__kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id);1065__kmp_printf_no_lock("\n\n");1066}10671068#endif /* DEBUG_QUEUING_LOCKS */10691070static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) {1071return TCR_4(lck->lk.owner_id) - 1;1072}10731074static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) {1075return lck->lk.depth_locked != -1;1076}10771078/* Acquire a lock using a the queuing lock implementation */1079template <bool takeTime>1080/* [TLW] The unused template above is left behind because of what BEB believes1081is a potential compiler problem with __forceinline. */1082__forceinline static int1083__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,1084kmp_int32 gtid) {1085kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);1086volatile kmp_int32 *head_id_p = &lck->lk.head_id;1087volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;1088volatile kmp_uint32 *spin_here_p;10891090#if OMPT_SUPPORT1091ompt_state_t prev_state = ompt_state_undefined;1092#endif10931094KA_TRACE(1000,1095("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));10961097KMP_FSYNC_PREPARE(lck);1098KMP_DEBUG_ASSERT(this_thr != NULL);1099spin_here_p = &this_thr->th.th_spin_here;11001101#ifdef DEBUG_QUEUING_LOCKS1102TRACE_LOCK(gtid + 1, "acq ent");1103if (*spin_here_p)1104__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);1105if (this_thr->th.th_next_waiting != 0)1106__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);1107#endif1108KMP_DEBUG_ASSERT(!*spin_here_p);1109KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);11101111/* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to1112head_id_p that may follow, not just in execution order, but also in1113visibility order. This way, when a releasing thread observes the changes to1114the queue by this thread, it can rightly assume that spin_here_p has1115already been set to TRUE, so that when it sets spin_here_p to FALSE, it is1116not premature. If the releasing thread sets spin_here_p to FALSE before1117this thread sets it to TRUE, this thread will hang. */1118*spin_here_p = TRUE; /* before enqueuing to prevent race */11191120while (1) {1121kmp_int32 enqueued;1122kmp_int32 head;1123kmp_int32 tail;11241125head = *head_id_p;11261127switch (head) {11281129case -1: {1130#ifdef DEBUG_QUEUING_LOCKS1131tail = *tail_id_p;1132TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);1133#endif1134tail = 0; /* to make sure next link asynchronously read is not set1135accidentally; this assignment prevents us from entering the1136if ( t > 0 ) condition in the enqueued case below, which is not1137necessary for this state transition */11381139/* try (-1,0)->(tid,tid) */1140enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p,1141KMP_PACK_64(-1, 0),1142KMP_PACK_64(gtid + 1, gtid + 1));1143#ifdef DEBUG_QUEUING_LOCKS1144if (enqueued)1145TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)");1146#endif1147} break;11481149default: {1150tail = *tail_id_p;1151KMP_DEBUG_ASSERT(tail != gtid + 1);11521153#ifdef DEBUG_QUEUING_LOCKS1154TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);1155#endif11561157if (tail == 0) {1158enqueued = FALSE;1159} else {1160/* try (h,t) or (h,h)->(h,tid) */1161enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1);11621163#ifdef DEBUG_QUEUING_LOCKS1164if (enqueued)1165TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)");1166#endif1167}1168} break;11691170case 0: /* empty queue */1171{1172kmp_int32 grabbed_lock;11731174#ifdef DEBUG_QUEUING_LOCKS1175tail = *tail_id_p;1176TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail);1177#endif1178/* try (0,0)->(-1,0) */11791180/* only legal transition out of head = 0 is head = -1 with no change to1181* tail */1182grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1);11831184if (grabbed_lock) {11851186*spin_here_p = FALSE;11871188KA_TRACE(11891000,1190("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n",1191lck, gtid));1192#ifdef DEBUG_QUEUING_LOCKS1193TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0);1194#endif11951196#if OMPT_SUPPORT1197if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {1198/* change the state before clearing wait_id */1199this_thr->th.ompt_thread_info.state = prev_state;1200this_thr->th.ompt_thread_info.wait_id = 0;1201}1202#endif12031204KMP_FSYNC_ACQUIRED(lck);1205return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */1206}1207enqueued = FALSE;1208} break;1209}12101211#if OMPT_SUPPORT1212if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {1213/* this thread will spin; set wait_id before entering wait state */1214prev_state = this_thr->th.ompt_thread_info.state;1215this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;1216this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;1217}1218#endif12191220if (enqueued) {1221if (tail > 0) {1222kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1);1223KMP_ASSERT(tail_thr != NULL);1224tail_thr->th.th_next_waiting = gtid + 1;1225/* corresponding wait for this write in release code */1226}1227KA_TRACE(1000,1228("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n",1229lck, gtid));12301231KMP_MB();1232// ToDo: Use __kmp_wait_sleep or similar when blocktime != inf1233KMP_WAIT(spin_here_p, FALSE, KMP_EQ, lck);1234// Synchronize writes to both runtime thread structures1235// and writes in user code.1236KMP_MB();12371238#ifdef DEBUG_QUEUING_LOCKS1239TRACE_LOCK(gtid + 1, "acq spin");12401241if (this_thr->th.th_next_waiting != 0)1242__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);1243#endif1244KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);1245KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after "1246"waiting on queue\n",1247lck, gtid));12481249#ifdef DEBUG_QUEUING_LOCKS1250TRACE_LOCK(gtid + 1, "acq exit 2");1251#endif12521253#if OMPT_SUPPORT1254/* change the state before clearing wait_id */1255this_thr->th.ompt_thread_info.state = prev_state;1256this_thr->th.ompt_thread_info.wait_id = 0;1257#endif12581259/* got lock, we were dequeued by the thread that released lock */1260return KMP_LOCK_ACQUIRED_FIRST;1261}12621263/* Yield if number of threads > number of logical processors */1264/* ToDo: Not sure why this should only be in oversubscription case,1265maybe should be traditional YIELD_INIT/YIELD_WHEN loop */1266KMP_YIELD_OVERSUB();12671268#ifdef DEBUG_QUEUING_LOCKS1269TRACE_LOCK(gtid + 1, "acq retry");1270#endif1271}1272KMP_ASSERT2(0, "should not get here");1273return KMP_LOCK_ACQUIRED_FIRST;1274}12751276int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1277KMP_DEBUG_ASSERT(gtid >= 0);12781279int retval = __kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);1280return retval;1281}12821283static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1284kmp_int32 gtid) {1285char const *const func = "omp_set_lock";1286if (lck->lk.initialized != lck) {1287KMP_FATAL(LockIsUninitialized, func);1288}1289if (__kmp_is_queuing_lock_nestable(lck)) {1290KMP_FATAL(LockNestableUsedAsSimple, func);1291}1292if (__kmp_get_queuing_lock_owner(lck) == gtid) {1293KMP_FATAL(LockIsAlreadyOwned, func);1294}12951296__kmp_acquire_queuing_lock(lck, gtid);12971298lck->lk.owner_id = gtid + 1;1299return KMP_LOCK_ACQUIRED_FIRST;1300}13011302int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1303volatile kmp_int32 *head_id_p = &lck->lk.head_id;1304kmp_int32 head;1305#ifdef KMP_DEBUG1306kmp_info_t *this_thr;1307#endif13081309KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid));1310KMP_DEBUG_ASSERT(gtid >= 0);1311#ifdef KMP_DEBUG1312this_thr = __kmp_thread_from_gtid(gtid);1313KMP_DEBUG_ASSERT(this_thr != NULL);1314KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);1315#endif13161317head = *head_id_p;13181319if (head == 0) { /* nobody on queue, nobody holding */1320/* try (0,0)->(-1,0) */1321if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) {1322KA_TRACE(1000,1323("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid));1324KMP_FSYNC_ACQUIRED(lck);1325return TRUE;1326}1327}13281329KA_TRACE(1000,1330("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid));1331return FALSE;1332}13331334static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1335kmp_int32 gtid) {1336char const *const func = "omp_test_lock";1337if (lck->lk.initialized != lck) {1338KMP_FATAL(LockIsUninitialized, func);1339}1340if (__kmp_is_queuing_lock_nestable(lck)) {1341KMP_FATAL(LockNestableUsedAsSimple, func);1342}13431344int retval = __kmp_test_queuing_lock(lck, gtid);13451346if (retval) {1347lck->lk.owner_id = gtid + 1;1348}1349return retval;1350}13511352int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1353volatile kmp_int32 *head_id_p = &lck->lk.head_id;1354volatile kmp_int32 *tail_id_p = &lck->lk.tail_id;13551356KA_TRACE(1000,1357("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid));1358KMP_DEBUG_ASSERT(gtid >= 0);1359#if KMP_DEBUG || DEBUG_QUEUING_LOCKS1360kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid);1361#endif1362KMP_DEBUG_ASSERT(this_thr != NULL);1363#ifdef DEBUG_QUEUING_LOCKS1364TRACE_LOCK(gtid + 1, "rel ent");13651366if (this_thr->th.th_spin_here)1367__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);1368if (this_thr->th.th_next_waiting != 0)1369__kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p);1370#endif1371KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);1372KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);13731374KMP_FSYNC_RELEASING(lck);13751376while (1) {1377kmp_int32 dequeued;1378kmp_int32 head;1379kmp_int32 tail;13801381head = *head_id_p;13821383#ifdef DEBUG_QUEUING_LOCKS1384tail = *tail_id_p;1385TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail);1386if (head == 0)1387__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);1388#endif1389KMP_DEBUG_ASSERT(head !=13900); /* holding the lock, head must be -1 or queue head */13911392if (head == -1) { /* nobody on queue */1393/* try (-1,0)->(0,0) */1394if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) {1395KA_TRACE(13961000,1397("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n",1398lck, gtid));1399#ifdef DEBUG_QUEUING_LOCKS1400TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0);1401#endif14021403#if OMPT_SUPPORT1404/* nothing to do - no other thread is trying to shift blame */1405#endif1406return KMP_LOCK_RELEASED;1407}1408dequeued = FALSE;1409} else {1410KMP_MB();1411tail = *tail_id_p;1412if (head == tail) { /* only one thread on the queue */1413#ifdef DEBUG_QUEUING_LOCKS1414if (head <= 0)1415__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);1416#endif1417KMP_DEBUG_ASSERT(head > 0);14181419/* try (h,h)->(-1,0) */1420dequeued = KMP_COMPARE_AND_STORE_REL64(1421RCAST(volatile kmp_int64 *, tail_id_p), KMP_PACK_64(head, head),1422KMP_PACK_64(-1, 0));1423#ifdef DEBUG_QUEUING_LOCKS1424TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)");1425#endif14261427} else {1428volatile kmp_int32 *waiting_id_p;1429kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);1430KMP_DEBUG_ASSERT(head_thr != NULL);1431waiting_id_p = &head_thr->th.th_next_waiting;14321433/* Does this require synchronous reads? */1434#ifdef DEBUG_QUEUING_LOCKS1435if (head <= 0 || tail <= 0)1436__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);1437#endif1438KMP_DEBUG_ASSERT(head > 0 && tail > 0);14391440/* try (h,t)->(h',t) or (t,t) */1441KMP_MB();1442/* make sure enqueuing thread has time to update next waiting thread1443* field */1444*head_id_p =1445KMP_WAIT((volatile kmp_uint32 *)waiting_id_p, 0, KMP_NEQ, NULL);1446#ifdef DEBUG_QUEUING_LOCKS1447TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)");1448#endif1449dequeued = TRUE;1450}1451}14521453if (dequeued) {1454kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1);1455KMP_DEBUG_ASSERT(head_thr != NULL);14561457/* Does this require synchronous reads? */1458#ifdef DEBUG_QUEUING_LOCKS1459if (head <= 0 || tail <= 0)1460__kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail);1461#endif1462KMP_DEBUG_ASSERT(head > 0 && tail > 0);14631464/* For clean code only. Thread not released until next statement prevents1465race with acquire code. */1466head_thr->th.th_next_waiting = 0;1467#ifdef DEBUG_QUEUING_LOCKS1468TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head);1469#endif14701471KMP_MB();1472/* reset spin value */1473head_thr->th.th_spin_here = FALSE;14741475KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after "1476"dequeuing\n",1477lck, gtid));1478#ifdef DEBUG_QUEUING_LOCKS1479TRACE_LOCK(gtid + 1, "rel exit 2");1480#endif1481return KMP_LOCK_RELEASED;1482}1483/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring1484threads */14851486#ifdef DEBUG_QUEUING_LOCKS1487TRACE_LOCK(gtid + 1, "rel retry");1488#endif14891490} /* while */1491KMP_ASSERT2(0, "should not get here");1492return KMP_LOCK_RELEASED;1493}14941495static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1496kmp_int32 gtid) {1497char const *const func = "omp_unset_lock";1498KMP_MB(); /* in case another processor initialized lock */1499if (lck->lk.initialized != lck) {1500KMP_FATAL(LockIsUninitialized, func);1501}1502if (__kmp_is_queuing_lock_nestable(lck)) {1503KMP_FATAL(LockNestableUsedAsSimple, func);1504}1505if (__kmp_get_queuing_lock_owner(lck) == -1) {1506KMP_FATAL(LockUnsettingFree, func);1507}1508if (__kmp_get_queuing_lock_owner(lck) != gtid) {1509KMP_FATAL(LockUnsettingSetByAnother, func);1510}1511lck->lk.owner_id = 0;1512return __kmp_release_queuing_lock(lck, gtid);1513}15141515void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) {1516lck->lk.location = NULL;1517lck->lk.head_id = 0;1518lck->lk.tail_id = 0;1519lck->lk.next_ticket = 0;1520lck->lk.now_serving = 0;1521lck->lk.owner_id = 0; // no thread owns the lock.1522lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.1523lck->lk.initialized = lck;15241525KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck));1526}15271528void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) {1529lck->lk.initialized = NULL;1530lck->lk.location = NULL;1531lck->lk.head_id = 0;1532lck->lk.tail_id = 0;1533lck->lk.next_ticket = 0;1534lck->lk.now_serving = 0;1535lck->lk.owner_id = 0;1536lck->lk.depth_locked = -1;1537}15381539static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {1540char const *const func = "omp_destroy_lock";1541if (lck->lk.initialized != lck) {1542KMP_FATAL(LockIsUninitialized, func);1543}1544if (__kmp_is_queuing_lock_nestable(lck)) {1545KMP_FATAL(LockNestableUsedAsSimple, func);1546}1547if (__kmp_get_queuing_lock_owner(lck) != -1) {1548KMP_FATAL(LockStillOwned, func);1549}1550__kmp_destroy_queuing_lock(lck);1551}15521553// nested queuing locks15541555int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1556KMP_DEBUG_ASSERT(gtid >= 0);15571558if (__kmp_get_queuing_lock_owner(lck) == gtid) {1559lck->lk.depth_locked += 1;1560return KMP_LOCK_ACQUIRED_NEXT;1561} else {1562__kmp_acquire_queuing_lock_timed_template<false>(lck, gtid);1563KMP_MB();1564lck->lk.depth_locked = 1;1565KMP_MB();1566lck->lk.owner_id = gtid + 1;1567return KMP_LOCK_ACQUIRED_FIRST;1568}1569}15701571static int1572__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1573kmp_int32 gtid) {1574char const *const func = "omp_set_nest_lock";1575if (lck->lk.initialized != lck) {1576KMP_FATAL(LockIsUninitialized, func);1577}1578if (!__kmp_is_queuing_lock_nestable(lck)) {1579KMP_FATAL(LockSimpleUsedAsNestable, func);1580}1581return __kmp_acquire_nested_queuing_lock(lck, gtid);1582}15831584int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1585int retval;15861587KMP_DEBUG_ASSERT(gtid >= 0);15881589if (__kmp_get_queuing_lock_owner(lck) == gtid) {1590retval = ++lck->lk.depth_locked;1591} else if (!__kmp_test_queuing_lock(lck, gtid)) {1592retval = 0;1593} else {1594KMP_MB();1595retval = lck->lk.depth_locked = 1;1596KMP_MB();1597lck->lk.owner_id = gtid + 1;1598}1599return retval;1600}16011602static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1603kmp_int32 gtid) {1604char const *const func = "omp_test_nest_lock";1605if (lck->lk.initialized != lck) {1606KMP_FATAL(LockIsUninitialized, func);1607}1608if (!__kmp_is_queuing_lock_nestable(lck)) {1609KMP_FATAL(LockSimpleUsedAsNestable, func);1610}1611return __kmp_test_nested_queuing_lock(lck, gtid);1612}16131614int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) {1615KMP_DEBUG_ASSERT(gtid >= 0);16161617KMP_MB();1618if (--(lck->lk.depth_locked) == 0) {1619KMP_MB();1620lck->lk.owner_id = 0;1621__kmp_release_queuing_lock(lck, gtid);1622return KMP_LOCK_RELEASED;1623}1624return KMP_LOCK_STILL_HELD;1625}16261627static int1628__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck,1629kmp_int32 gtid) {1630char const *const func = "omp_unset_nest_lock";1631KMP_MB(); /* in case another processor initialized lock */1632if (lck->lk.initialized != lck) {1633KMP_FATAL(LockIsUninitialized, func);1634}1635if (!__kmp_is_queuing_lock_nestable(lck)) {1636KMP_FATAL(LockSimpleUsedAsNestable, func);1637}1638if (__kmp_get_queuing_lock_owner(lck) == -1) {1639KMP_FATAL(LockUnsettingFree, func);1640}1641if (__kmp_get_queuing_lock_owner(lck) != gtid) {1642KMP_FATAL(LockUnsettingSetByAnother, func);1643}1644return __kmp_release_nested_queuing_lock(lck, gtid);1645}16461647void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) {1648__kmp_init_queuing_lock(lck);1649lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks1650}16511652void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) {1653__kmp_destroy_queuing_lock(lck);1654lck->lk.depth_locked = 0;1655}16561657static void1658__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {1659char const *const func = "omp_destroy_nest_lock";1660if (lck->lk.initialized != lck) {1661KMP_FATAL(LockIsUninitialized, func);1662}1663if (!__kmp_is_queuing_lock_nestable(lck)) {1664KMP_FATAL(LockSimpleUsedAsNestable, func);1665}1666if (__kmp_get_queuing_lock_owner(lck) != -1) {1667KMP_FATAL(LockStillOwned, func);1668}1669__kmp_destroy_nested_queuing_lock(lck);1670}16711672// access functions to fields which don't exist for all lock kinds.16731674static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) {1675return lck->lk.location;1676}16771678static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck,1679const ident_t *loc) {1680lck->lk.location = loc;1681}16821683static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) {1684return lck->lk.flags;1685}16861687static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,1688kmp_lock_flags_t flags) {1689lck->lk.flags = flags;1690}16911692#if KMP_USE_ADAPTIVE_LOCKS16931694/* RTM Adaptive locks */16951696#if KMP_HAVE_RTM_INTRINSICS1697#include <immintrin.h>1698#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)16991700#else17011702// Values from the status register after failed speculation.1703#define _XBEGIN_STARTED (~0u)1704#define _XABORT_EXPLICIT (1 << 0)1705#define _XABORT_RETRY (1 << 1)1706#define _XABORT_CONFLICT (1 << 2)1707#define _XABORT_CAPACITY (1 << 3)1708#define _XABORT_DEBUG (1 << 4)1709#define _XABORT_NESTED (1 << 5)1710#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF))17111712// Aborts for which it's worth trying again immediately1713#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)17141715#define STRINGIZE_INTERNAL(arg) #arg1716#define STRINGIZE(arg) STRINGIZE_INTERNAL(arg)17171718// Access to RTM instructions1719/*A version of XBegin which returns -1 on speculation, and the value of EAX on1720an abort. This is the same definition as the compiler intrinsic that will be1721supported at some point. */1722static __inline int _xbegin() {1723int res = -1;17241725#if KMP_OS_WINDOWS1726#if KMP_ARCH_X86_641727_asm {1728_emit 0xC71729_emit 0xF81730_emit 21731_emit 01732_emit 01733_emit 01734jmp L21735mov res, eax1736L2:1737}1738#else /* IA32 */1739_asm {1740_emit 0xC71741_emit 0xF81742_emit 21743_emit 01744_emit 01745_emit 01746jmp L21747mov res, eax1748L2:1749}1750#endif // KMP_ARCH_X86_641751#else1752/* Note that %eax must be noted as killed (clobbered), because the XSR is1753returned in %eax(%rax) on abort. Other register values are restored, so1754don't need to be killed.17551756We must also mark 'res' as an input and an output, since otherwise1757'res=-1' may be dropped as being dead, whereas we do need the assignment on1758the successful (i.e., non-abort) path. */1759__asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n"1760" .long 1f-1b-6\n"1761" jmp 2f\n"1762"1: movl %%eax,%0\n"1763"2:"1764: "+r"(res)::"memory", "%eax");1765#endif // KMP_OS_WINDOWS1766return res;1767}17681769/* Transaction end */1770static __inline void _xend() {1771#if KMP_OS_WINDOWS1772__asm {1773_emit 0x0f1774_emit 0x011775_emit 0xd51776}1777#else1778__asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory");1779#endif1780}17811782/* This is a macro, the argument must be a single byte constant which can be1783evaluated by the inline assembler, since it is emitted as a byte into the1784assembly code. */1785// clang-format off1786#if KMP_OS_WINDOWS1787#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG1788#else1789#define _xabort(ARG) \1790__asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory");1791#endif1792// clang-format on1793#endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 130017941795// Statistics is collected for testing purpose1796#if KMP_DEBUG_ADAPTIVE_LOCKS17971798// We accumulate speculative lock statistics when the lock is destroyed. We1799// keep locks that haven't been destroyed in the liveLocks list so that we can1800// grab their statistics too.1801static kmp_adaptive_lock_statistics_t destroyedStats;18021803// To hold the list of live locks.1804static kmp_adaptive_lock_info_t liveLocks;18051806// A lock so we can safely update the list of locks.1807static kmp_bootstrap_lock_t chain_lock =1808KMP_BOOTSTRAP_LOCK_INITIALIZER(chain_lock);18091810// Initialize the list of stats.1811void __kmp_init_speculative_stats() {1812kmp_adaptive_lock_info_t *lck = &liveLocks;18131814memset(CCAST(kmp_adaptive_lock_statistics_t *, &(lck->stats)), 0,1815sizeof(lck->stats));1816lck->stats.next = lck;1817lck->stats.prev = lck;18181819KMP_ASSERT(lck->stats.next->stats.prev == lck);1820KMP_ASSERT(lck->stats.prev->stats.next == lck);18211822__kmp_init_bootstrap_lock(&chain_lock);1823}18241825// Insert the lock into the circular list1826static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) {1827__kmp_acquire_bootstrap_lock(&chain_lock);18281829lck->stats.next = liveLocks.stats.next;1830lck->stats.prev = &liveLocks;18311832liveLocks.stats.next = lck;1833lck->stats.next->stats.prev = lck;18341835KMP_ASSERT(lck->stats.next->stats.prev == lck);1836KMP_ASSERT(lck->stats.prev->stats.next == lck);18371838__kmp_release_bootstrap_lock(&chain_lock);1839}18401841static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) {1842KMP_ASSERT(lck->stats.next->stats.prev == lck);1843KMP_ASSERT(lck->stats.prev->stats.next == lck);18441845kmp_adaptive_lock_info_t *n = lck->stats.next;1846kmp_adaptive_lock_info_t *p = lck->stats.prev;18471848n->stats.prev = p;1849p->stats.next = n;1850}18511852static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) {1853memset(CCAST(kmp_adaptive_lock_statistics_t *, &lck->stats), 0,1854sizeof(lck->stats));1855__kmp_remember_lock(lck);1856}18571858static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t,1859kmp_adaptive_lock_info_t *lck) {1860kmp_adaptive_lock_statistics_t volatile *s = &lck->stats;18611862t->nonSpeculativeAcquireAttempts += lck->acquire_attempts;1863t->successfulSpeculations += s->successfulSpeculations;1864t->hardFailedSpeculations += s->hardFailedSpeculations;1865t->softFailedSpeculations += s->softFailedSpeculations;1866t->nonSpeculativeAcquires += s->nonSpeculativeAcquires;1867t->lemmingYields += s->lemmingYields;1868}18691870static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) {1871__kmp_acquire_bootstrap_lock(&chain_lock);18721873__kmp_add_stats(&destroyedStats, lck);1874__kmp_forget_lock(lck);18751876__kmp_release_bootstrap_lock(&chain_lock);1877}18781879static float percent(kmp_uint32 count, kmp_uint32 total) {1880return (total == 0) ? 0.0 : (100.0 * count) / total;1881}18821883void __kmp_print_speculative_stats() {1884kmp_adaptive_lock_statistics_t total = destroyedStats;1885kmp_adaptive_lock_info_t *lck;18861887for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) {1888__kmp_add_stats(&total, lck);1889}1890kmp_adaptive_lock_statistics_t *t = &total;1891kmp_uint32 totalSections =1892t->nonSpeculativeAcquires + t->successfulSpeculations;1893kmp_uint32 totalSpeculations = t->successfulSpeculations +1894t->hardFailedSpeculations +1895t->softFailedSpeculations;1896if (totalSections <= 0)1897return;18981899kmp_safe_raii_file_t statsFile;1900if (strcmp(__kmp_speculative_statsfile, "-") == 0) {1901statsFile.set_stdout();1902} else {1903size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20;1904char buffer[buffLen];1905KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile,1906(kmp_int32)getpid());1907statsFile.open(buffer, "w");1908}19091910fprintf(statsFile, "Speculative lock statistics (all approximate!)\n");1911fprintf(statsFile,1912" Lock parameters: \n"1913" max_soft_retries : %10d\n"1914" max_badness : %10d\n",1915__kmp_adaptive_backoff_params.max_soft_retries,1916__kmp_adaptive_backoff_params.max_badness);1917fprintf(statsFile, " Non-speculative acquire attempts : %10d\n",1918t->nonSpeculativeAcquireAttempts);1919fprintf(statsFile, " Total critical sections : %10d\n",1920totalSections);1921fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n",1922t->successfulSpeculations,1923percent(t->successfulSpeculations, totalSections));1924fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n",1925t->nonSpeculativeAcquires,1926percent(t->nonSpeculativeAcquires, totalSections));1927fprintf(statsFile, " Lemming yields : %10d\n\n",1928t->lemmingYields);19291930fprintf(statsFile, " Speculative acquire attempts : %10d\n",1931totalSpeculations);1932fprintf(statsFile, " Successes : %10d (%5.1f%%)\n",1933t->successfulSpeculations,1934percent(t->successfulSpeculations, totalSpeculations));1935fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n",1936t->softFailedSpeculations,1937percent(t->softFailedSpeculations, totalSpeculations));1938fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n",1939t->hardFailedSpeculations,1940percent(t->hardFailedSpeculations, totalSpeculations));1941}19421943#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++)1944#else1945#define KMP_INC_STAT(lck, stat)19461947#endif // KMP_DEBUG_ADAPTIVE_LOCKS19481949static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) {1950// It is enough to check that the head_id is zero.1951// We don't also need to check the tail.1952bool res = lck->lk.head_id == 0;19531954// We need a fence here, since we must ensure that no memory operations1955// from later in this thread float above that read.1956#if KMP_COMPILER_ICC || KMP_COMPILER_ICX1957_mm_mfence();1958#else1959__sync_synchronize();1960#endif19611962return res;1963}19641965// Functions for manipulating the badness1966static __inline void1967__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) {1968// Reset the badness to zero so we eagerly try to speculate again1969lck->lk.adaptive.badness = 0;1970KMP_INC_STAT(lck, successfulSpeculations);1971}19721973// Create a bit mask with one more set bit.1974static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) {1975kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1;1976if (newBadness > lck->lk.adaptive.max_badness) {1977return;1978} else {1979lck->lk.adaptive.badness = newBadness;1980}1981}19821983// Check whether speculation should be attempted.1984KMP_ATTRIBUTE_TARGET_RTM1985static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck,1986kmp_int32 gtid) {1987kmp_uint32 badness = lck->lk.adaptive.badness;1988kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts;1989int res = (attempts & badness) == 0;1990return res;1991}19921993// Attempt to acquire only the speculative lock.1994// Does not back off to the non-speculative lock.1995KMP_ATTRIBUTE_TARGET_RTM1996static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck,1997kmp_int32 gtid) {1998int retries = lck->lk.adaptive.max_soft_retries;19992000// We don't explicitly count the start of speculation, rather we record the2001// results (success, hard fail, soft fail). The sum of all of those is the2002// total number of times we started speculation since all speculations must2003// end one of those ways.2004do {2005kmp_uint32 status = _xbegin();2006// Switch this in to disable actual speculation but exercise at least some2007// of the rest of the code. Useful for debugging...2008// kmp_uint32 status = _XABORT_NESTED;20092010if (status == _XBEGIN_STARTED) {2011/* We have successfully started speculation. Check that no-one acquired2012the lock for real between when we last looked and now. This also gets2013the lock cache line into our read-set, which we need so that we'll2014abort if anyone later claims it for real. */2015if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {2016// Lock is now visibly acquired, so someone beat us to it. Abort the2017// transaction so we'll restart from _xbegin with the failure status.2018_xabort(0x01);2019KMP_ASSERT2(0, "should not get here");2020}2021return 1; // Lock has been acquired (speculatively)2022} else {2023// We have aborted, update the statistics2024if (status & SOFT_ABORT_MASK) {2025KMP_INC_STAT(lck, softFailedSpeculations);2026// and loop round to retry.2027} else {2028KMP_INC_STAT(lck, hardFailedSpeculations);2029// Give up if we had a hard failure.2030break;2031}2032}2033} while (retries--); // Loop while we have retries, and didn't fail hard.20342035// Either we had a hard failure or we didn't succeed softly after2036// the full set of attempts, so back off the badness.2037__kmp_step_badness(lck);2038return 0;2039}20402041// Attempt to acquire the speculative lock, or back off to the non-speculative2042// one if the speculative lock cannot be acquired.2043// We can succeed speculatively, non-speculatively, or fail.2044static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) {2045// First try to acquire the lock speculatively2046if (__kmp_should_speculate(lck, gtid) &&2047__kmp_test_adaptive_lock_only(lck, gtid))2048return 1;20492050// Speculative acquisition failed, so try to acquire it non-speculatively.2051// Count the non-speculative acquire attempt2052lck->lk.adaptive.acquire_attempts++;20532054// Use base, non-speculative lock.2055if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) {2056KMP_INC_STAT(lck, nonSpeculativeAcquires);2057return 1; // Lock is acquired (non-speculatively)2058} else {2059return 0; // Failed to acquire the lock, it's already visibly locked.2060}2061}20622063static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,2064kmp_int32 gtid) {2065char const *const func = "omp_test_lock";2066if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {2067KMP_FATAL(LockIsUninitialized, func);2068}20692070int retval = __kmp_test_adaptive_lock(lck, gtid);20712072if (retval) {2073lck->lk.qlk.owner_id = gtid + 1;2074}2075return retval;2076}20772078// Block until we can acquire a speculative, adaptive lock. We check whether we2079// should be trying to speculate. If we should be, we check the real lock to see2080// if it is free, and, if not, pause without attempting to acquire it until it2081// is. Then we try the speculative acquire. This means that although we suffer2082// from lemmings a little (because all we can't acquire the lock speculatively2083// until the queue of threads waiting has cleared), we don't get into a state2084// where we can never acquire the lock speculatively (because we force the queue2085// to clear by preventing new arrivals from entering the queue). This does mean2086// that when we're trying to break lemmings, the lock is no longer fair. However2087// OpenMP makes no guarantee that its locks are fair, so this isn't a real2088// problem.2089static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck,2090kmp_int32 gtid) {2091if (__kmp_should_speculate(lck, gtid)) {2092if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {2093if (__kmp_test_adaptive_lock_only(lck, gtid))2094return;2095// We tried speculation and failed, so give up.2096} else {2097// We can't try speculation until the lock is free, so we pause here2098// (without suspending on the queueing lock, to allow it to drain, then2099// try again. All other threads will also see the same result for2100// shouldSpeculate, so will be doing the same if they try to claim the2101// lock from now on.2102while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) {2103KMP_INC_STAT(lck, lemmingYields);2104KMP_YIELD(TRUE);2105}21062107if (__kmp_test_adaptive_lock_only(lck, gtid))2108return;2109}2110}21112112// Speculative acquisition failed, so acquire it non-speculatively.2113// Count the non-speculative acquire attempt2114lck->lk.adaptive.acquire_attempts++;21152116__kmp_acquire_queuing_lock_timed_template<FALSE>(GET_QLK_PTR(lck), gtid);2117// We have acquired the base lock, so count that.2118KMP_INC_STAT(lck, nonSpeculativeAcquires);2119}21202121static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,2122kmp_int32 gtid) {2123char const *const func = "omp_set_lock";2124if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {2125KMP_FATAL(LockIsUninitialized, func);2126}2127if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) {2128KMP_FATAL(LockIsAlreadyOwned, func);2129}21302131__kmp_acquire_adaptive_lock(lck, gtid);21322133lck->lk.qlk.owner_id = gtid + 1;2134}21352136KMP_ATTRIBUTE_TARGET_RTM2137static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck,2138kmp_int32 gtid) {2139if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(2140lck))) { // If the lock doesn't look claimed we must be speculating.2141// (Or the user's code is buggy and they're releasing without locking;2142// if we had XTEST we'd be able to check that case...)2143_xend(); // Exit speculation2144__kmp_update_badness_after_success(lck);2145} else { // Since the lock *is* visibly locked we're not speculating,2146// so should use the underlying lock's release scheme.2147__kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid);2148}2149return KMP_LOCK_RELEASED;2150}21512152static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck,2153kmp_int32 gtid) {2154char const *const func = "omp_unset_lock";2155KMP_MB(); /* in case another processor initialized lock */2156if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {2157KMP_FATAL(LockIsUninitialized, func);2158}2159if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) {2160KMP_FATAL(LockUnsettingFree, func);2161}2162if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) {2163KMP_FATAL(LockUnsettingSetByAnother, func);2164}2165lck->lk.qlk.owner_id = 0;2166__kmp_release_adaptive_lock(lck, gtid);2167return KMP_LOCK_RELEASED;2168}21692170static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) {2171__kmp_init_queuing_lock(GET_QLK_PTR(lck));2172lck->lk.adaptive.badness = 0;2173lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0;2174lck->lk.adaptive.max_soft_retries =2175__kmp_adaptive_backoff_params.max_soft_retries;2176lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness;2177#if KMP_DEBUG_ADAPTIVE_LOCKS2178__kmp_zero_speculative_stats(&lck->lk.adaptive);2179#endif2180KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck));2181}21822183static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) {2184#if KMP_DEBUG_ADAPTIVE_LOCKS2185__kmp_accumulate_speculative_stats(&lck->lk.adaptive);2186#endif2187__kmp_destroy_queuing_lock(GET_QLK_PTR(lck));2188// Nothing needed for the speculative part.2189}21902191static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {2192char const *const func = "omp_destroy_lock";2193if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) {2194KMP_FATAL(LockIsUninitialized, func);2195}2196if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) {2197KMP_FATAL(LockStillOwned, func);2198}2199__kmp_destroy_adaptive_lock(lck);2200}22012202#endif // KMP_USE_ADAPTIVE_LOCKS22032204/* ------------------------------------------------------------------------ */2205/* DRDPA ticket locks */2206/* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */22072208static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) {2209return lck->lk.owner_id - 1;2210}22112212static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) {2213return lck->lk.depth_locked != -1;2214}22152216__forceinline static int2217__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2218kmp_uint64 ticket = KMP_ATOMIC_INC(&lck->lk.next_ticket);2219kmp_uint64 mask = lck->lk.mask; // atomic load2220std::atomic<kmp_uint64> *polls = lck->lk.polls;22212222#ifdef USE_LOCK_PROFILE2223if (polls[ticket & mask] != ticket)2224__kmp_printf("LOCK CONTENTION: %p\n", lck);2225/* else __kmp_printf( "." );*/2226#endif /* USE_LOCK_PROFILE */22272228// Now spin-wait, but reload the polls pointer and mask, in case the2229// polling area has been reconfigured. Unless it is reconfigured, the2230// reloads stay in L1 cache and are cheap.2231//2232// Keep this code in sync with KMP_WAIT, in kmp_dispatch.cpp !!!2233// The current implementation of KMP_WAIT doesn't allow for mask2234// and poll to be re-read every spin iteration.2235kmp_uint32 spins;2236kmp_uint64 time;2237KMP_FSYNC_PREPARE(lck);2238KMP_INIT_YIELD(spins);2239KMP_INIT_BACKOFF(time);2240while (polls[ticket & mask] < ticket) { // atomic load2241KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);2242// Re-read the mask and the poll pointer from the lock structure.2243//2244// Make certain that "mask" is read before "polls" !!!2245//2246// If another thread picks reconfigures the polling area and updates their2247// values, and we get the new value of mask and the old polls pointer, we2248// could access memory beyond the end of the old polling area.2249mask = lck->lk.mask; // atomic load2250polls = lck->lk.polls; // atomic load2251}22522253// Critical section starts here2254KMP_FSYNC_ACQUIRED(lck);2255KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n",2256ticket, lck));2257lck->lk.now_serving = ticket; // non-volatile store22582259// Deallocate a garbage polling area if we know that we are the last2260// thread that could possibly access it.2261//2262// The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup2263// ticket.2264if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) {2265__kmp_free(lck->lk.old_polls);2266lck->lk.old_polls = NULL;2267lck->lk.cleanup_ticket = 0;2268}22692270// Check to see if we should reconfigure the polling area.2271// If there is still a garbage polling area to be deallocated from a2272// previous reconfiguration, let a later thread reconfigure it.2273if (lck->lk.old_polls == NULL) {2274bool reconfigure = false;2275std::atomic<kmp_uint64> *old_polls = polls;2276kmp_uint32 num_polls = TCR_4(lck->lk.num_polls);22772278if (TCR_4(__kmp_nth) >2279(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) {2280// We are in oversubscription mode. Contract the polling area2281// down to a single location, if that hasn't been done already.2282if (num_polls > 1) {2283reconfigure = true;2284num_polls = TCR_4(lck->lk.num_polls);2285mask = 0;2286num_polls = 1;2287polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *2288sizeof(*polls));2289polls[0] = ticket;2290}2291} else {2292// We are in under/fully subscribed mode. Check the number of2293// threads waiting on the lock. The size of the polling area2294// should be at least the number of threads waiting.2295kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1;2296if (num_waiting > num_polls) {2297kmp_uint32 old_num_polls = num_polls;2298reconfigure = true;2299do {2300mask = (mask << 1) | 1;2301num_polls *= 2;2302} while (num_polls <= num_waiting);23032304// Allocate the new polling area, and copy the relevant portion2305// of the old polling area to the new area. __kmp_allocate()2306// zeroes the memory it allocates, and most of the old area is2307// just zero padding, so we only copy the release counters.2308polls = (std::atomic<kmp_uint64> *)__kmp_allocate(num_polls *2309sizeof(*polls));2310kmp_uint32 i;2311for (i = 0; i < old_num_polls; i++) {2312polls[i].store(old_polls[i]);2313}2314}2315}23162317if (reconfigure) {2318// Now write the updated fields back to the lock structure.2319//2320// Make certain that "polls" is written before "mask" !!!2321//2322// If another thread picks up the new value of mask and the old polls2323// pointer , it could access memory beyond the end of the old polling2324// area.2325//2326// On x86, we need memory fences.2327KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring "2328"lock %p to %d polls\n",2329ticket, lck, num_polls));23302331lck->lk.old_polls = old_polls;2332lck->lk.polls = polls; // atomic store23332334KMP_MB();23352336lck->lk.num_polls = num_polls;2337lck->lk.mask = mask; // atomic store23382339KMP_MB();23402341// Only after the new polling area and mask have been flushed2342// to main memory can we update the cleanup ticket field.2343//2344// volatile load / non-volatile store2345lck->lk.cleanup_ticket = lck->lk.next_ticket;2346}2347}2348return KMP_LOCK_ACQUIRED_FIRST;2349}23502351int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2352int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid);2353return retval;2354}23552356static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2357kmp_int32 gtid) {2358char const *const func = "omp_set_lock";2359if (lck->lk.initialized != lck) {2360KMP_FATAL(LockIsUninitialized, func);2361}2362if (__kmp_is_drdpa_lock_nestable(lck)) {2363KMP_FATAL(LockNestableUsedAsSimple, func);2364}2365if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) {2366KMP_FATAL(LockIsAlreadyOwned, func);2367}23682369__kmp_acquire_drdpa_lock(lck, gtid);23702371lck->lk.owner_id = gtid + 1;2372return KMP_LOCK_ACQUIRED_FIRST;2373}23742375int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2376// First get a ticket, then read the polls pointer and the mask.2377// The polls pointer must be read before the mask!!! (See above)2378kmp_uint64 ticket = lck->lk.next_ticket; // atomic load2379std::atomic<kmp_uint64> *polls = lck->lk.polls;2380kmp_uint64 mask = lck->lk.mask; // atomic load2381if (polls[ticket & mask] == ticket) {2382kmp_uint64 next_ticket = ticket + 1;2383if (__kmp_atomic_compare_store_acq(&lck->lk.next_ticket, ticket,2384next_ticket)) {2385KMP_FSYNC_ACQUIRED(lck);2386KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n",2387ticket, lck));2388lck->lk.now_serving = ticket; // non-volatile store23892390// Since no threads are waiting, there is no possibility that we would2391// want to reconfigure the polling area. We might have the cleanup ticket2392// value (which says that it is now safe to deallocate old_polls), but2393// we'll let a later thread which calls __kmp_acquire_lock do that - this2394// routine isn't supposed to block, and we would risk blocks if we called2395// __kmp_free() to do the deallocation.2396return TRUE;2397}2398}2399return FALSE;2400}24012402static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2403kmp_int32 gtid) {2404char const *const func = "omp_test_lock";2405if (lck->lk.initialized != lck) {2406KMP_FATAL(LockIsUninitialized, func);2407}2408if (__kmp_is_drdpa_lock_nestable(lck)) {2409KMP_FATAL(LockNestableUsedAsSimple, func);2410}24112412int retval = __kmp_test_drdpa_lock(lck, gtid);24132414if (retval) {2415lck->lk.owner_id = gtid + 1;2416}2417return retval;2418}24192420int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2421// Read the ticket value from the lock data struct, then the polls pointer and2422// the mask. The polls pointer must be read before the mask!!! (See above)2423kmp_uint64 ticket = lck->lk.now_serving + 1; // non-atomic load2424std::atomic<kmp_uint64> *polls = lck->lk.polls; // atomic load2425kmp_uint64 mask = lck->lk.mask; // atomic load2426KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n",2427ticket - 1, lck));2428KMP_FSYNC_RELEASING(lck);2429polls[ticket & mask] = ticket; // atomic store2430return KMP_LOCK_RELEASED;2431}24322433static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2434kmp_int32 gtid) {2435char const *const func = "omp_unset_lock";2436KMP_MB(); /* in case another processor initialized lock */2437if (lck->lk.initialized != lck) {2438KMP_FATAL(LockIsUninitialized, func);2439}2440if (__kmp_is_drdpa_lock_nestable(lck)) {2441KMP_FATAL(LockNestableUsedAsSimple, func);2442}2443if (__kmp_get_drdpa_lock_owner(lck) == -1) {2444KMP_FATAL(LockUnsettingFree, func);2445}2446if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) &&2447(__kmp_get_drdpa_lock_owner(lck) != gtid)) {2448KMP_FATAL(LockUnsettingSetByAnother, func);2449}2450lck->lk.owner_id = 0;2451return __kmp_release_drdpa_lock(lck, gtid);2452}24532454void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) {2455lck->lk.location = NULL;2456lck->lk.mask = 0;2457lck->lk.num_polls = 1;2458lck->lk.polls = (std::atomic<kmp_uint64> *)__kmp_allocate(2459lck->lk.num_polls * sizeof(*(lck->lk.polls)));2460lck->lk.cleanup_ticket = 0;2461lck->lk.old_polls = NULL;2462lck->lk.next_ticket = 0;2463lck->lk.now_serving = 0;2464lck->lk.owner_id = 0; // no thread owns the lock.2465lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks.2466lck->lk.initialized = lck;24672468KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck));2469}24702471void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) {2472lck->lk.initialized = NULL;2473lck->lk.location = NULL;2474if (lck->lk.polls.load() != NULL) {2475__kmp_free(lck->lk.polls.load());2476lck->lk.polls = NULL;2477}2478if (lck->lk.old_polls != NULL) {2479__kmp_free(lck->lk.old_polls);2480lck->lk.old_polls = NULL;2481}2482lck->lk.mask = 0;2483lck->lk.num_polls = 0;2484lck->lk.cleanup_ticket = 0;2485lck->lk.next_ticket = 0;2486lck->lk.now_serving = 0;2487lck->lk.owner_id = 0;2488lck->lk.depth_locked = -1;2489}24902491static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {2492char const *const func = "omp_destroy_lock";2493if (lck->lk.initialized != lck) {2494KMP_FATAL(LockIsUninitialized, func);2495}2496if (__kmp_is_drdpa_lock_nestable(lck)) {2497KMP_FATAL(LockNestableUsedAsSimple, func);2498}2499if (__kmp_get_drdpa_lock_owner(lck) != -1) {2500KMP_FATAL(LockStillOwned, func);2501}2502__kmp_destroy_drdpa_lock(lck);2503}25042505// nested drdpa ticket locks25062507int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2508KMP_DEBUG_ASSERT(gtid >= 0);25092510if (__kmp_get_drdpa_lock_owner(lck) == gtid) {2511lck->lk.depth_locked += 1;2512return KMP_LOCK_ACQUIRED_NEXT;2513} else {2514__kmp_acquire_drdpa_lock_timed_template(lck, gtid);2515KMP_MB();2516lck->lk.depth_locked = 1;2517KMP_MB();2518lck->lk.owner_id = gtid + 1;2519return KMP_LOCK_ACQUIRED_FIRST;2520}2521}25222523static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2524kmp_int32 gtid) {2525char const *const func = "omp_set_nest_lock";2526if (lck->lk.initialized != lck) {2527KMP_FATAL(LockIsUninitialized, func);2528}2529if (!__kmp_is_drdpa_lock_nestable(lck)) {2530KMP_FATAL(LockSimpleUsedAsNestable, func);2531}2532__kmp_acquire_nested_drdpa_lock(lck, gtid);2533}25342535int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2536int retval;25372538KMP_DEBUG_ASSERT(gtid >= 0);25392540if (__kmp_get_drdpa_lock_owner(lck) == gtid) {2541retval = ++lck->lk.depth_locked;2542} else if (!__kmp_test_drdpa_lock(lck, gtid)) {2543retval = 0;2544} else {2545KMP_MB();2546retval = lck->lk.depth_locked = 1;2547KMP_MB();2548lck->lk.owner_id = gtid + 1;2549}2550return retval;2551}25522553static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2554kmp_int32 gtid) {2555char const *const func = "omp_test_nest_lock";2556if (lck->lk.initialized != lck) {2557KMP_FATAL(LockIsUninitialized, func);2558}2559if (!__kmp_is_drdpa_lock_nestable(lck)) {2560KMP_FATAL(LockSimpleUsedAsNestable, func);2561}2562return __kmp_test_nested_drdpa_lock(lck, gtid);2563}25642565int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) {2566KMP_DEBUG_ASSERT(gtid >= 0);25672568KMP_MB();2569if (--(lck->lk.depth_locked) == 0) {2570KMP_MB();2571lck->lk.owner_id = 0;2572__kmp_release_drdpa_lock(lck, gtid);2573return KMP_LOCK_RELEASED;2574}2575return KMP_LOCK_STILL_HELD;2576}25772578static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck,2579kmp_int32 gtid) {2580char const *const func = "omp_unset_nest_lock";2581KMP_MB(); /* in case another processor initialized lock */2582if (lck->lk.initialized != lck) {2583KMP_FATAL(LockIsUninitialized, func);2584}2585if (!__kmp_is_drdpa_lock_nestable(lck)) {2586KMP_FATAL(LockSimpleUsedAsNestable, func);2587}2588if (__kmp_get_drdpa_lock_owner(lck) == -1) {2589KMP_FATAL(LockUnsettingFree, func);2590}2591if (__kmp_get_drdpa_lock_owner(lck) != gtid) {2592KMP_FATAL(LockUnsettingSetByAnother, func);2593}2594return __kmp_release_nested_drdpa_lock(lck, gtid);2595}25962597void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {2598__kmp_init_drdpa_lock(lck);2599lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks2600}26012602void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) {2603__kmp_destroy_drdpa_lock(lck);2604lck->lk.depth_locked = 0;2605}26062607static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {2608char const *const func = "omp_destroy_nest_lock";2609if (lck->lk.initialized != lck) {2610KMP_FATAL(LockIsUninitialized, func);2611}2612if (!__kmp_is_drdpa_lock_nestable(lck)) {2613KMP_FATAL(LockSimpleUsedAsNestable, func);2614}2615if (__kmp_get_drdpa_lock_owner(lck) != -1) {2616KMP_FATAL(LockStillOwned, func);2617}2618__kmp_destroy_nested_drdpa_lock(lck);2619}26202621// access functions to fields which don't exist for all lock kinds.26222623static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) {2624return lck->lk.location;2625}26262627static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck,2628const ident_t *loc) {2629lck->lk.location = loc;2630}26312632static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) {2633return lck->lk.flags;2634}26352636static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck,2637kmp_lock_flags_t flags) {2638lck->lk.flags = flags;2639}26402641// Time stamp counter2642#if KMP_ARCH_X86 || KMP_ARCH_X86_642643#define __kmp_tsc() __kmp_hardware_timestamp()2644// Runtime's default backoff parameters2645kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100};2646#else2647// Use nanoseconds for other platforms2648extern kmp_uint64 __kmp_now_nsec();2649kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100};2650#define __kmp_tsc() __kmp_now_nsec()2651#endif26522653// A useful predicate for dealing with timestamps that may wrap.2654// Is a before b? Since the timestamps may wrap, this is asking whether it's2655// shorter to go clockwise from a to b around the clock-face, or anti-clockwise.2656// Times where going clockwise is less distance than going anti-clockwise2657// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0),2658// then a > b (true) does not mean a reached b; whereas signed(a) = -2,2659// signed(b) = 0 captures the actual difference2660static inline bool before(kmp_uint64 a, kmp_uint64 b) {2661return ((kmp_int64)b - (kmp_int64)a) > 0;2662}26632664// Truncated binary exponential backoff function2665void __kmp_spin_backoff(kmp_backoff_t *boff) {2666// We could flatten this loop, but making it a nested loop gives better result2667kmp_uint32 i;2668for (i = boff->step; i > 0; i--) {2669kmp_uint64 goal = __kmp_tsc() + boff->min_tick;2670#if KMP_HAVE_UMWAIT2671if (__kmp_umwait_enabled) {2672__kmp_tpause(0, boff->min_tick);2673} else {2674#endif2675do {2676KMP_CPU_PAUSE();2677} while (before(__kmp_tsc(), goal));2678#if KMP_HAVE_UMWAIT2679}2680#endif2681}2682boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1);2683}26842685#if KMP_USE_DYNAMIC_LOCK26862687// Direct lock initializers. It simply writes a tag to the low 8 bits of the2688// lock word.2689static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck,2690kmp_dyna_lockseq_t seq) {2691TCW_4(((kmp_base_tas_lock_t *)lck)->poll, KMP_GET_D_TAG(seq));2692KA_TRACE(269320,2694("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq));2695}26962697#if KMP_USE_TSX26982699// HLE lock functions - imported from the testbed runtime.2700#define HLE_ACQUIRE ".byte 0xf2;"2701#define HLE_RELEASE ".byte 0xf3;"27022703static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) {2704__asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory");2705return v;2706}27072708static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); }27092710static void __kmp_destroy_hle_lock_with_checks(kmp_dyna_lock_t *lck) {2711TCW_4(*lck, 0);2712}27132714static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {2715// Use gtid for KMP_LOCK_BUSY if necessary2716if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) {2717int delay = 1;2718do {2719while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) {2720for (int i = delay; i != 0; --i)2721KMP_CPU_PAUSE();2722delay = ((delay << 1) | 1) & 7;2723}2724} while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle));2725}2726}27272728static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck,2729kmp_int32 gtid) {2730__kmp_acquire_hle_lock(lck, gtid); // TODO: add checks2731}27322733static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {2734__asm__ volatile(HLE_RELEASE "movl %1,%0"2735: "=m"(*lck)2736: "r"(KMP_LOCK_FREE(hle))2737: "memory");2738return KMP_LOCK_RELEASED;2739}27402741static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck,2742kmp_int32 gtid) {2743return __kmp_release_hle_lock(lck, gtid); // TODO: add checks2744}27452746static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) {2747return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle);2748}27492750static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck,2751kmp_int32 gtid) {2752return __kmp_test_hle_lock(lck, gtid); // TODO: add checks2753}27542755static void __kmp_init_rtm_queuing_lock(kmp_queuing_lock_t *lck) {2756__kmp_init_queuing_lock(lck);2757}27582759static void __kmp_destroy_rtm_queuing_lock(kmp_queuing_lock_t *lck) {2760__kmp_destroy_queuing_lock(lck);2761}27622763static void2764__kmp_destroy_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {2765__kmp_destroy_queuing_lock_with_checks(lck);2766}27672768KMP_ATTRIBUTE_TARGET_RTM2769static void __kmp_acquire_rtm_queuing_lock(kmp_queuing_lock_t *lck,2770kmp_int32 gtid) {2771unsigned retries = 3, status;2772do {2773status = _xbegin();2774if (status == _XBEGIN_STARTED) {2775if (__kmp_is_unlocked_queuing_lock(lck))2776return;2777_xabort(0xff);2778}2779if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {2780// Wait until lock becomes free2781while (!__kmp_is_unlocked_queuing_lock(lck)) {2782KMP_YIELD(TRUE);2783}2784} else if (!(status & _XABORT_RETRY))2785break;2786} while (retries--);27872788// Fall-back non-speculative lock (xchg)2789__kmp_acquire_queuing_lock(lck, gtid);2790}27912792static void __kmp_acquire_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,2793kmp_int32 gtid) {2794__kmp_acquire_rtm_queuing_lock(lck, gtid);2795}27962797KMP_ATTRIBUTE_TARGET_RTM2798static int __kmp_release_rtm_queuing_lock(kmp_queuing_lock_t *lck,2799kmp_int32 gtid) {2800if (__kmp_is_unlocked_queuing_lock(lck)) {2801// Releasing from speculation2802_xend();2803} else {2804// Releasing from a real lock2805__kmp_release_queuing_lock(lck, gtid);2806}2807return KMP_LOCK_RELEASED;2808}28092810static int __kmp_release_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,2811kmp_int32 gtid) {2812return __kmp_release_rtm_queuing_lock(lck, gtid);2813}28142815KMP_ATTRIBUTE_TARGET_RTM2816static int __kmp_test_rtm_queuing_lock(kmp_queuing_lock_t *lck,2817kmp_int32 gtid) {2818unsigned retries = 3, status;2819do {2820status = _xbegin();2821if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) {2822return 1;2823}2824if (!(status & _XABORT_RETRY))2825break;2826} while (retries--);28272828return __kmp_test_queuing_lock(lck, gtid);2829}28302831static int __kmp_test_rtm_queuing_lock_with_checks(kmp_queuing_lock_t *lck,2832kmp_int32 gtid) {2833return __kmp_test_rtm_queuing_lock(lck, gtid);2834}28352836// Reuse kmp_tas_lock_t for TSX lock which use RTM with fall-back spin lock.2837typedef kmp_tas_lock_t kmp_rtm_spin_lock_t;28382839static void __kmp_destroy_rtm_spin_lock(kmp_rtm_spin_lock_t *lck) {2840KMP_ATOMIC_ST_REL(&lck->lk.poll, 0);2841}28422843static void __kmp_destroy_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck) {2844__kmp_destroy_rtm_spin_lock(lck);2845}28462847KMP_ATTRIBUTE_TARGET_RTM2848static int __kmp_acquire_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,2849kmp_int32 gtid) {2850unsigned retries = 3, status;2851kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);2852kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);2853do {2854status = _xbegin();2855if (status == _XBEGIN_STARTED) {2856if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free)2857return KMP_LOCK_ACQUIRED_FIRST;2858_xabort(0xff);2859}2860if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) {2861// Wait until lock becomes free2862while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free) {2863KMP_YIELD(TRUE);2864}2865} else if (!(status & _XABORT_RETRY))2866break;2867} while (retries--);28682869// Fall-back spin lock2870KMP_FSYNC_PREPARE(lck);2871kmp_backoff_t backoff = __kmp_spin_backoff_params;2872while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != lock_free ||2873!__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {2874__kmp_spin_backoff(&backoff);2875}2876KMP_FSYNC_ACQUIRED(lck);2877return KMP_LOCK_ACQUIRED_FIRST;2878}28792880static int __kmp_acquire_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,2881kmp_int32 gtid) {2882return __kmp_acquire_rtm_spin_lock(lck, gtid);2883}28842885KMP_ATTRIBUTE_TARGET_RTM2886static int __kmp_release_rtm_spin_lock(kmp_rtm_spin_lock_t *lck,2887kmp_int32 gtid) {2888if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == KMP_LOCK_FREE(rtm_spin)) {2889// Releasing from speculation2890_xend();2891} else {2892// Releasing from a real lock2893KMP_FSYNC_RELEASING(lck);2894KMP_ATOMIC_ST_REL(&lck->lk.poll, KMP_LOCK_FREE(rtm_spin));2895}2896return KMP_LOCK_RELEASED;2897}28982899static int __kmp_release_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,2900kmp_int32 gtid) {2901return __kmp_release_rtm_spin_lock(lck, gtid);2902}29032904KMP_ATTRIBUTE_TARGET_RTM2905static int __kmp_test_rtm_spin_lock(kmp_rtm_spin_lock_t *lck, kmp_int32 gtid) {2906unsigned retries = 3, status;2907kmp_int32 lock_free = KMP_LOCK_FREE(rtm_spin);2908kmp_int32 lock_busy = KMP_LOCK_BUSY(1, rtm_spin);2909do {2910status = _xbegin();2911if (status == _XBEGIN_STARTED &&2912KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free) {2913return TRUE;2914}2915if (!(status & _XABORT_RETRY))2916break;2917} while (retries--);29182919if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == lock_free &&2920__kmp_atomic_compare_store_acq(&lck->lk.poll, lock_free, lock_busy)) {2921KMP_FSYNC_ACQUIRED(lck);2922return TRUE;2923}2924return FALSE;2925}29262927static int __kmp_test_rtm_spin_lock_with_checks(kmp_rtm_spin_lock_t *lck,2928kmp_int32 gtid) {2929return __kmp_test_rtm_spin_lock(lck, gtid);2930}29312932#endif // KMP_USE_TSX29332934// Entry functions for indirect locks (first element of direct lock jump tables)2935static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l,2936kmp_dyna_lockseq_t tag);2937static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock);2938static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);2939static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);2940static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32);2941static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,2942kmp_int32);2943static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,2944kmp_int32);2945static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,2946kmp_int32);29472948// Lock function definitions for the union parameter type2949#define KMP_FOREACH_LOCK_KIND(m, a) m(ticket, a) m(queuing, a) m(drdpa, a)29502951#define expand1(lk, op) \2952static void __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock) { \2953__kmp_##op##_##lk##_##lock(&lock->lk); \2954}2955#define expand2(lk, op) \2956static int __kmp_##op##_##lk##_##lock(kmp_user_lock_p lock, \2957kmp_int32 gtid) { \2958return __kmp_##op##_##lk##_##lock(&lock->lk, gtid); \2959}2960#define expand3(lk, op) \2961static void __kmp_set_##lk##_##lock_flags(kmp_user_lock_p lock, \2962kmp_lock_flags_t flags) { \2963__kmp_set_##lk##_lock_flags(&lock->lk, flags); \2964}2965#define expand4(lk, op) \2966static void __kmp_set_##lk##_##lock_location(kmp_user_lock_p lock, \2967const ident_t *loc) { \2968__kmp_set_##lk##_lock_location(&lock->lk, loc); \2969}29702971KMP_FOREACH_LOCK_KIND(expand1, init)2972KMP_FOREACH_LOCK_KIND(expand1, init_nested)2973KMP_FOREACH_LOCK_KIND(expand1, destroy)2974KMP_FOREACH_LOCK_KIND(expand1, destroy_nested)2975KMP_FOREACH_LOCK_KIND(expand2, acquire)2976KMP_FOREACH_LOCK_KIND(expand2, acquire_nested)2977KMP_FOREACH_LOCK_KIND(expand2, release)2978KMP_FOREACH_LOCK_KIND(expand2, release_nested)2979KMP_FOREACH_LOCK_KIND(expand2, test)2980KMP_FOREACH_LOCK_KIND(expand2, test_nested)2981KMP_FOREACH_LOCK_KIND(expand3, )2982KMP_FOREACH_LOCK_KIND(expand4, )29832984#undef expand12985#undef expand22986#undef expand32987#undef expand429882989// Jump tables for the indirect lock functions2990// Only fill in the odd entries, that avoids the need to shift out the low bit29912992// init functions2993#define expand(l, op) 0, __kmp_init_direct_lock,2994void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = {2995__kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)};2996#undef expand29972998// destroy functions2999#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock,3000static void (*direct_destroy[])(kmp_dyna_lock_t *) = {3001__kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};3002#undef expand3003#define expand(l, op) \30040, (void (*)(kmp_dyna_lock_t *))__kmp_destroy_##l##_lock_with_checks,3005static void (*direct_destroy_check[])(kmp_dyna_lock_t *) = {3006__kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)};3007#undef expand30083009// set/acquire functions3010#define expand(l, op) \30110, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,3012static int (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = {3013__kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)};3014#undef expand3015#define expand(l, op) \30160, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,3017static int (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = {3018__kmp_set_indirect_lock_with_checks, 0,3019KMP_FOREACH_D_LOCK(expand, acquire)};3020#undef expand30213022// unset/release and test functions3023#define expand(l, op) \30240, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock,3025static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = {3026__kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)};3027static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = {3028__kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)};3029#undef expand3030#define expand(l, op) \30310, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks,3032static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = {3033__kmp_unset_indirect_lock_with_checks, 0,3034KMP_FOREACH_D_LOCK(expand, release)};3035static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = {3036__kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)};3037#undef expand30383039// Exposes only one set of jump tables (*lock or *lock_with_checks).3040void (**__kmp_direct_destroy)(kmp_dyna_lock_t *) = 0;3041int (**__kmp_direct_set)(kmp_dyna_lock_t *, kmp_int32) = 0;3042int (**__kmp_direct_unset)(kmp_dyna_lock_t *, kmp_int32) = 0;3043int (**__kmp_direct_test)(kmp_dyna_lock_t *, kmp_int32) = 0;30443045// Jump tables for the indirect lock functions3046#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,3047void (*__kmp_indirect_init[])(kmp_user_lock_p) = {3048KMP_FOREACH_I_LOCK(expand, init)};3049#undef expand30503051#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock,3052static void (*indirect_destroy[])(kmp_user_lock_p) = {3053KMP_FOREACH_I_LOCK(expand, destroy)};3054#undef expand3055#define expand(l, op) \3056(void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock_with_checks,3057static void (*indirect_destroy_check[])(kmp_user_lock_p) = {3058KMP_FOREACH_I_LOCK(expand, destroy)};3059#undef expand30603061// set/acquire functions3062#define expand(l, op) \3063(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,3064static int (*indirect_set[])(kmp_user_lock_p,3065kmp_int32) = {KMP_FOREACH_I_LOCK(expand, acquire)};3066#undef expand3067#define expand(l, op) \3068(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,3069static int (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = {3070KMP_FOREACH_I_LOCK(expand, acquire)};3071#undef expand30723073// unset/release and test functions3074#define expand(l, op) \3075(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock,3076static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = {3077KMP_FOREACH_I_LOCK(expand, release)};3078static int (*indirect_test[])(kmp_user_lock_p,3079kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)};3080#undef expand3081#define expand(l, op) \3082(int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks,3083static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = {3084KMP_FOREACH_I_LOCK(expand, release)};3085static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = {3086KMP_FOREACH_I_LOCK(expand, test)};3087#undef expand30883089// Exposes only one jump tables (*lock or *lock_with_checks).3090void (**__kmp_indirect_destroy)(kmp_user_lock_p) = 0;3091int (**__kmp_indirect_set)(kmp_user_lock_p, kmp_int32) = 0;3092int (**__kmp_indirect_unset)(kmp_user_lock_p, kmp_int32) = 0;3093int (**__kmp_indirect_test)(kmp_user_lock_p, kmp_int32) = 0;30943095// Lock index table.3096kmp_indirect_lock_table_t __kmp_i_lock_table;30973098// Size of indirect locks.3099static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0};31003101// Jump tables for lock accessor/modifier.3102void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p,3103const ident_t *) = {0};3104void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p,3105kmp_lock_flags_t) = {0};3106const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(3107kmp_user_lock_p) = {0};3108kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(3109kmp_user_lock_p) = {0};31103111// Use different lock pools for different lock types.3112static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0};31133114// User lock allocator for dynamically dispatched indirect locks. Every entry of3115// the indirect lock table holds the address and type of the allocated indirect3116// lock (kmp_indirect_lock_t), and the size of the table doubles when it is3117// full. A destroyed indirect lock object is returned to the reusable pool of3118// locks, unique to each lock type.3119kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock,3120kmp_int32 gtid,3121kmp_indirect_locktag_t tag) {3122kmp_indirect_lock_t *lck;3123kmp_lock_index_t idx, table_idx;31243125__kmp_acquire_lock(&__kmp_global_lock, gtid);31263127if (__kmp_indirect_lock_pool[tag] != NULL) {3128// Reuse the allocated and destroyed lock object3129lck = __kmp_indirect_lock_pool[tag];3130if (OMP_LOCK_T_SIZE < sizeof(void *))3131idx = lck->lock->pool.index;3132__kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next;3133KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n",3134lck));3135} else {3136kmp_uint32 row, col;3137kmp_indirect_lock_table_t *lock_table = &__kmp_i_lock_table;3138idx = 0;3139// Find location in list of lock tables to put new lock3140while (1) {3141table_idx = lock_table->next; // index within this table3142idx += lock_table->next; // global index within list of tables3143if (table_idx < lock_table->nrow_ptrs * KMP_I_LOCK_CHUNK) {3144row = table_idx / KMP_I_LOCK_CHUNK;3145col = table_idx % KMP_I_LOCK_CHUNK;3146// Allocate a new row of locks if necessary3147if (!lock_table->table[row]) {3148lock_table->table[row] = (kmp_indirect_lock_t *)__kmp_allocate(3149sizeof(kmp_indirect_lock_t) * KMP_I_LOCK_CHUNK);3150}3151break;3152}3153// Allocate a new lock table if necessary with double the capacity3154if (!lock_table->next_table) {3155kmp_indirect_lock_table_t *next_table =3156(kmp_indirect_lock_table_t *)__kmp_allocate(3157sizeof(kmp_indirect_lock_table_t));3158next_table->table = (kmp_indirect_lock_t **)__kmp_allocate(3159sizeof(kmp_indirect_lock_t *) * 2 * lock_table->nrow_ptrs);3160next_table->nrow_ptrs = 2 * lock_table->nrow_ptrs;3161next_table->next = 0;3162next_table->next_table = nullptr;3163lock_table->next_table = next_table;3164}3165lock_table = lock_table->next_table;3166KMP_ASSERT(lock_table);3167}3168lock_table->next++;31693170lck = &lock_table->table[row][col];3171// Allocate a new base lock object3172lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]);3173KA_TRACE(20,3174("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck));3175}31763177__kmp_release_lock(&__kmp_global_lock, gtid);31783179lck->type = tag;31803181if (OMP_LOCK_T_SIZE < sizeof(void *)) {3182*(kmp_lock_index_t *)&(((kmp_base_tas_lock_t *)user_lock)->poll) =3183idx << 1; // indirect lock word must be even3184} else {3185*((kmp_indirect_lock_t **)user_lock) = lck;3186}31873188return lck;3189}31903191// User lock lookup for dynamically dispatched locks.3192static __forceinline kmp_indirect_lock_t *3193__kmp_lookup_indirect_lock(void **user_lock, const char *func) {3194if (__kmp_env_consistency_check) {3195kmp_indirect_lock_t *lck = NULL;3196if (user_lock == NULL) {3197KMP_FATAL(LockIsUninitialized, func);3198}3199if (OMP_LOCK_T_SIZE < sizeof(void *)) {3200kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock);3201lck = __kmp_get_i_lock(idx);3202} else {3203lck = *((kmp_indirect_lock_t **)user_lock);3204}3205if (lck == NULL) {3206KMP_FATAL(LockIsUninitialized, func);3207}3208return lck;3209} else {3210if (OMP_LOCK_T_SIZE < sizeof(void *)) {3211return __kmp_get_i_lock(KMP_EXTRACT_I_INDEX(user_lock));3212} else {3213return *((kmp_indirect_lock_t **)user_lock);3214}3215}3216}32173218static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock,3219kmp_dyna_lockseq_t seq) {3220#if KMP_USE_ADAPTIVE_LOCKS3221if (seq == lockseq_adaptive && !__kmp_cpuinfo.flags.rtm) {3222KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive");3223seq = lockseq_queuing;3224}3225#endif3226#if KMP_USE_TSX3227if (seq == lockseq_rtm_queuing && !__kmp_cpuinfo.flags.rtm) {3228seq = lockseq_queuing;3229}3230#endif3231kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq);3232kmp_indirect_lock_t *l =3233__kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag);3234KMP_I_LOCK_FUNC(l, init)(l->lock);3235KA_TRACE(323620, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n",3237seq));3238}32393240static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) {3241kmp_uint32 gtid = __kmp_entry_gtid();3242kmp_indirect_lock_t *l =3243__kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock");3244KMP_I_LOCK_FUNC(l, destroy)(l->lock);3245kmp_indirect_locktag_t tag = l->type;32463247__kmp_acquire_lock(&__kmp_global_lock, gtid);32483249// Use the base lock's space to keep the pool chain.3250l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag];3251if (OMP_LOCK_T_SIZE < sizeof(void *)) {3252l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock);3253}3254__kmp_indirect_lock_pool[tag] = l;32553256__kmp_release_lock(&__kmp_global_lock, gtid);3257}32583259static int __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {3260kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);3261return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);3262}32633264static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {3265kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);3266return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);3267}32683269static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) {3270kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock);3271return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);3272}32733274static int __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock,3275kmp_int32 gtid) {3276kmp_indirect_lock_t *l =3277__kmp_lookup_indirect_lock((void **)lock, "omp_set_lock");3278return KMP_I_LOCK_FUNC(l, set)(l->lock, gtid);3279}32803281static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock,3282kmp_int32 gtid) {3283kmp_indirect_lock_t *l =3284__kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock");3285return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid);3286}32873288static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock,3289kmp_int32 gtid) {3290kmp_indirect_lock_t *l =3291__kmp_lookup_indirect_lock((void **)lock, "omp_test_lock");3292return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid);3293}32943295kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing;32963297// This is used only in kmp_error.cpp when consistency checking is on.3298kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) {3299switch (seq) {3300case lockseq_tas:3301case lockseq_nested_tas:3302return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck);3303#if KMP_USE_FUTEX3304case lockseq_futex:3305case lockseq_nested_futex:3306return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck);3307#endif3308case lockseq_ticket:3309case lockseq_nested_ticket:3310return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck);3311case lockseq_queuing:3312case lockseq_nested_queuing:3313#if KMP_USE_ADAPTIVE_LOCKS3314case lockseq_adaptive:3315#endif3316return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck);3317case lockseq_drdpa:3318case lockseq_nested_drdpa:3319return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck);3320default:3321return 0;3322}3323}33243325// Initializes data for dynamic user locks.3326void __kmp_init_dynamic_user_locks() {3327// Initialize jump table for the lock functions3328if (__kmp_env_consistency_check) {3329__kmp_direct_set = direct_set_check;3330__kmp_direct_unset = direct_unset_check;3331__kmp_direct_test = direct_test_check;3332__kmp_direct_destroy = direct_destroy_check;3333__kmp_indirect_set = indirect_set_check;3334__kmp_indirect_unset = indirect_unset_check;3335__kmp_indirect_test = indirect_test_check;3336__kmp_indirect_destroy = indirect_destroy_check;3337} else {3338__kmp_direct_set = direct_set;3339__kmp_direct_unset = direct_unset;3340__kmp_direct_test = direct_test;3341__kmp_direct_destroy = direct_destroy;3342__kmp_indirect_set = indirect_set;3343__kmp_indirect_unset = indirect_unset;3344__kmp_indirect_test = indirect_test;3345__kmp_indirect_destroy = indirect_destroy;3346}3347// If the user locks have already been initialized, then return. Allow the3348// switch between different KMP_CONSISTENCY_CHECK values, but do not allocate3349// new lock tables if they have already been allocated.3350if (__kmp_init_user_locks)3351return;33523353// Initialize lock index table3354__kmp_i_lock_table.nrow_ptrs = KMP_I_LOCK_TABLE_INIT_NROW_PTRS;3355__kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(3356sizeof(kmp_indirect_lock_t *) * KMP_I_LOCK_TABLE_INIT_NROW_PTRS);3357*(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate(3358KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t));3359__kmp_i_lock_table.next = 0;3360__kmp_i_lock_table.next_table = nullptr;33613362// Indirect lock size3363__kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t);3364__kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t);3365#if KMP_USE_ADAPTIVE_LOCKS3366__kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t);3367#endif3368__kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t);3369#if KMP_USE_TSX3370__kmp_indirect_lock_size[locktag_rtm_queuing] = sizeof(kmp_queuing_lock_t);3371#endif3372__kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t);3373#if KMP_USE_FUTEX3374__kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t);3375#endif3376__kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t);3377__kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t);3378__kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t);33793380// Initialize lock accessor/modifier3381#define fill_jumps(table, expand, sep) \3382{ \3383table[locktag##sep##ticket] = expand(ticket); \3384table[locktag##sep##queuing] = expand(queuing); \3385table[locktag##sep##drdpa] = expand(drdpa); \3386}33873388#if KMP_USE_ADAPTIVE_LOCKS3389#define fill_table(table, expand) \3390{ \3391fill_jumps(table, expand, _); \3392table[locktag_adaptive] = expand(queuing); \3393fill_jumps(table, expand, _nested_); \3394}3395#else3396#define fill_table(table, expand) \3397{ \3398fill_jumps(table, expand, _); \3399fill_jumps(table, expand, _nested_); \3400}3401#endif // KMP_USE_ADAPTIVE_LOCKS34023403#define expand(l) \3404(void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location3405fill_table(__kmp_indirect_set_location, expand);3406#undef expand3407#define expand(l) \3408(void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags3409fill_table(__kmp_indirect_set_flags, expand);3410#undef expand3411#define expand(l) \3412(const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location3413fill_table(__kmp_indirect_get_location, expand);3414#undef expand3415#define expand(l) \3416(kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags3417fill_table(__kmp_indirect_get_flags, expand);3418#undef expand34193420__kmp_init_user_locks = TRUE;3421}34223423// Clean up the lock table.3424void __kmp_cleanup_indirect_user_locks() {3425int k;34263427// Clean up locks in the pools first (they were already destroyed before going3428// into the pools).3429for (k = 0; k < KMP_NUM_I_LOCKS; ++k) {3430kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k];3431while (l != NULL) {3432kmp_indirect_lock_t *ll = l;3433l = (kmp_indirect_lock_t *)l->lock->pool.next;3434KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n",3435ll));3436__kmp_free(ll->lock);3437ll->lock = NULL;3438}3439__kmp_indirect_lock_pool[k] = NULL;3440}3441// Clean up the remaining undestroyed locks.3442kmp_indirect_lock_table_t *ptr = &__kmp_i_lock_table;3443while (ptr) {3444for (kmp_uint32 row = 0; row < ptr->nrow_ptrs; ++row) {3445if (!ptr->table[row])3446continue;3447for (kmp_uint32 col = 0; col < KMP_I_LOCK_CHUNK; ++col) {3448kmp_indirect_lock_t *l = &ptr->table[row][col];3449if (l->lock) {3450// Locks not destroyed explicitly need to be destroyed here.3451KMP_I_LOCK_FUNC(l, destroy)(l->lock);3452KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p "3453"from table\n",3454l));3455__kmp_free(l->lock);3456}3457}3458__kmp_free(ptr->table[row]);3459}3460kmp_indirect_lock_table_t *next_table = ptr->next_table;3461if (ptr != &__kmp_i_lock_table)3462__kmp_free(ptr);3463ptr = next_table;3464}34653466__kmp_init_user_locks = FALSE;3467}34683469enum kmp_lock_kind __kmp_user_lock_kind = lk_default;3470int __kmp_num_locks_in_block = 1; // FIXME - tune this value34713472#else // KMP_USE_DYNAMIC_LOCK34733474static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) {3475__kmp_init_tas_lock(lck);3476}34773478static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) {3479__kmp_init_nested_tas_lock(lck);3480}34813482#if KMP_USE_FUTEX3483static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) {3484__kmp_init_futex_lock(lck);3485}34863487static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {3488__kmp_init_nested_futex_lock(lck);3489}3490#endif34913492static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {3493return lck == lck->lk.self;3494}34953496static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {3497__kmp_init_ticket_lock(lck);3498}34993500static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {3501__kmp_init_nested_ticket_lock(lck);3502}35033504static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) {3505return lck == lck->lk.initialized;3506}35073508static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {3509__kmp_init_queuing_lock(lck);3510}35113512static void3513__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) {3514__kmp_init_nested_queuing_lock(lck);3515}35163517#if KMP_USE_ADAPTIVE_LOCKS3518static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) {3519__kmp_init_adaptive_lock(lck);3520}3521#endif35223523static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) {3524return lck == lck->lk.initialized;3525}35263527static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {3528__kmp_init_drdpa_lock(lck);3529}35303531static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) {3532__kmp_init_nested_drdpa_lock(lck);3533}35343535/* user locks3536* They are implemented as a table of function pointers which are set to the3537* lock functions of the appropriate kind, once that has been determined. */35383539enum kmp_lock_kind __kmp_user_lock_kind = lk_default;35403541size_t __kmp_base_user_lock_size = 0;3542size_t __kmp_user_lock_size = 0;35433544kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL;3545int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,3546kmp_int32 gtid) = NULL;35473548int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck,3549kmp_int32 gtid) = NULL;3550int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck,3551kmp_int32 gtid) = NULL;3552void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;3553void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL;3554void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;3555int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,3556kmp_int32 gtid) = NULL;35573558int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck,3559kmp_int32 gtid) = NULL;3560int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck,3561kmp_int32 gtid) = NULL;3562void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;3563void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL;35643565int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL;3566const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL;3567void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck,3568const ident_t *loc) = NULL;3569kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL;3570void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck,3571kmp_lock_flags_t flags) = NULL;35723573void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) {3574switch (user_lock_kind) {3575case lk_default:3576default:3577KMP_ASSERT(0);35783579case lk_tas: {3580__kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t);3581__kmp_user_lock_size = sizeof(kmp_tas_lock_t);35823583__kmp_get_user_lock_owner_ =3584(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner);35853586if (__kmp_env_consistency_check) {3587KMP_BIND_USER_LOCK_WITH_CHECKS(tas);3588KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas);3589} else {3590KMP_BIND_USER_LOCK(tas);3591KMP_BIND_NESTED_USER_LOCK(tas);3592}35933594__kmp_destroy_user_lock_ =3595(void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock);35963597__kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;35983599__kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;36003601__kmp_set_user_lock_location_ =3602(void (*)(kmp_user_lock_p, const ident_t *))NULL;36033604__kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;36053606__kmp_set_user_lock_flags_ =3607(void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;3608} break;36093610#if KMP_USE_FUTEX36113612case lk_futex: {3613__kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t);3614__kmp_user_lock_size = sizeof(kmp_futex_lock_t);36153616__kmp_get_user_lock_owner_ =3617(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner);36183619if (__kmp_env_consistency_check) {3620KMP_BIND_USER_LOCK_WITH_CHECKS(futex);3621KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex);3622} else {3623KMP_BIND_USER_LOCK(futex);3624KMP_BIND_NESTED_USER_LOCK(futex);3625}36263627__kmp_destroy_user_lock_ =3628(void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock);36293630__kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL;36313632__kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL;36333634__kmp_set_user_lock_location_ =3635(void (*)(kmp_user_lock_p, const ident_t *))NULL;36363637__kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL;36383639__kmp_set_user_lock_flags_ =3640(void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL;3641} break;36423643#endif // KMP_USE_FUTEX36443645case lk_ticket: {3646__kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t);3647__kmp_user_lock_size = sizeof(kmp_ticket_lock_t);36483649__kmp_get_user_lock_owner_ =3650(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner);36513652if (__kmp_env_consistency_check) {3653KMP_BIND_USER_LOCK_WITH_CHECKS(ticket);3654KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket);3655} else {3656KMP_BIND_USER_LOCK(ticket);3657KMP_BIND_NESTED_USER_LOCK(ticket);3658}36593660__kmp_destroy_user_lock_ =3661(void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock);36623663__kmp_is_user_lock_initialized_ =3664(int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized);36653666__kmp_get_user_lock_location_ =3667(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location);36683669__kmp_set_user_lock_location_ = (void (*)(3670kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location);36713672__kmp_get_user_lock_flags_ =3673(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags);36743675__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(3676&__kmp_set_ticket_lock_flags);3677} break;36783679case lk_queuing: {3680__kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t);3681__kmp_user_lock_size = sizeof(kmp_queuing_lock_t);36823683__kmp_get_user_lock_owner_ =3684(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);36853686if (__kmp_env_consistency_check) {3687KMP_BIND_USER_LOCK_WITH_CHECKS(queuing);3688KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing);3689} else {3690KMP_BIND_USER_LOCK(queuing);3691KMP_BIND_NESTED_USER_LOCK(queuing);3692}36933694__kmp_destroy_user_lock_ =3695(void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock);36963697__kmp_is_user_lock_initialized_ =3698(int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);36993700__kmp_get_user_lock_location_ =3701(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);37023703__kmp_set_user_lock_location_ = (void (*)(3704kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);37053706__kmp_get_user_lock_flags_ =3707(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);37083709__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(3710&__kmp_set_queuing_lock_flags);3711} break;37123713#if KMP_USE_ADAPTIVE_LOCKS3714case lk_adaptive: {3715__kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t);3716__kmp_user_lock_size = sizeof(kmp_adaptive_lock_t);37173718__kmp_get_user_lock_owner_ =3719(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner);37203721if (__kmp_env_consistency_check) {3722KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive);3723} else {3724KMP_BIND_USER_LOCK(adaptive);3725}37263727__kmp_destroy_user_lock_ =3728(void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock);37293730__kmp_is_user_lock_initialized_ =3731(int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized);37323733__kmp_get_user_lock_location_ =3734(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location);37353736__kmp_set_user_lock_location_ = (void (*)(3737kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location);37383739__kmp_get_user_lock_flags_ =3740(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags);37413742__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(3743&__kmp_set_queuing_lock_flags);37443745} break;3746#endif // KMP_USE_ADAPTIVE_LOCKS37473748case lk_drdpa: {3749__kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t);3750__kmp_user_lock_size = sizeof(kmp_drdpa_lock_t);37513752__kmp_get_user_lock_owner_ =3753(kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner);37543755if (__kmp_env_consistency_check) {3756KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa);3757KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa);3758} else {3759KMP_BIND_USER_LOCK(drdpa);3760KMP_BIND_NESTED_USER_LOCK(drdpa);3761}37623763__kmp_destroy_user_lock_ =3764(void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock);37653766__kmp_is_user_lock_initialized_ =3767(int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized);37683769__kmp_get_user_lock_location_ =3770(const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location);37713772__kmp_set_user_lock_location_ = (void (*)(3773kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location);37743775__kmp_get_user_lock_flags_ =3776(kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags);37773778__kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))(3779&__kmp_set_drdpa_lock_flags);3780} break;3781}3782}37833784// ----------------------------------------------------------------------------3785// User lock table & lock allocation37863787kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL};3788kmp_user_lock_p __kmp_lock_pool = NULL;37893790// Lock block-allocation support.3791kmp_block_of_locks *__kmp_lock_blocks = NULL;3792int __kmp_num_locks_in_block = 1; // FIXME - tune this value37933794static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) {3795// Assume that kmp_global_lock is held upon entry/exit.3796kmp_lock_index_t index;3797if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) {3798kmp_lock_index_t size;3799kmp_user_lock_p *table;3800// Reallocate lock table.3801if (__kmp_user_lock_table.allocated == 0) {3802size = 1024;3803} else {3804size = __kmp_user_lock_table.allocated * 2;3805}3806table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size);3807KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1,3808sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1));3809table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table;3810// We cannot free the previous table now, since it may be in use by other3811// threads. So save the pointer to the previous table in the first3812// element of the new table. All the tables will be organized into a list,3813// and could be freed when library shutting down.3814__kmp_user_lock_table.table = table;3815__kmp_user_lock_table.allocated = size;3816}3817KMP_DEBUG_ASSERT(__kmp_user_lock_table.used <3818__kmp_user_lock_table.allocated);3819index = __kmp_user_lock_table.used;3820__kmp_user_lock_table.table[index] = lck;3821++__kmp_user_lock_table.used;3822return index;3823}38243825static kmp_user_lock_p __kmp_lock_block_allocate() {3826// Assume that kmp_global_lock is held upon entry/exit.3827static int last_index = 0;3828if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) {3829// Restart the index.3830last_index = 0;3831// Need to allocate a new block.3832KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);3833size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block;3834char *buffer =3835(char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks));3836// Set up the new block.3837kmp_block_of_locks *new_block =3838(kmp_block_of_locks *)(&buffer[space_for_locks]);3839new_block->next_block = __kmp_lock_blocks;3840new_block->locks = (void *)buffer;3841// Publish the new block.3842KMP_MB();3843__kmp_lock_blocks = new_block;3844}3845kmp_user_lock_p ret = (kmp_user_lock_p)(&(3846((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size]));3847last_index++;3848return ret;3849}38503851// Get memory for a lock. It may be freshly allocated memory or reused memory3852// from lock pool.3853kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid,3854kmp_lock_flags_t flags) {3855kmp_user_lock_p lck;3856kmp_lock_index_t index;3857KMP_DEBUG_ASSERT(user_lock);38583859__kmp_acquire_lock(&__kmp_global_lock, gtid);38603861if (__kmp_lock_pool == NULL) {3862// Lock pool is empty. Allocate new memory.38633864if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point.3865lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size);3866} else {3867lck = __kmp_lock_block_allocate();3868}38693870// Insert lock in the table so that it can be freed in __kmp_cleanup,3871// and debugger has info on all allocated locks.3872index = __kmp_lock_table_insert(lck);3873} else {3874// Pick up lock from pool.3875lck = __kmp_lock_pool;3876index = __kmp_lock_pool->pool.index;3877__kmp_lock_pool = __kmp_lock_pool->pool.next;3878}38793880// We could potentially differentiate between nested and regular locks3881// here, and do the lock table lookup for regular locks only.3882if (OMP_LOCK_T_SIZE < sizeof(void *)) {3883*((kmp_lock_index_t *)user_lock) = index;3884} else {3885*((kmp_user_lock_p *)user_lock) = lck;3886}38873888// mark the lock if it is critical section lock.3889__kmp_set_user_lock_flags(lck, flags);38903891__kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper38923893return lck;3894}38953896// Put lock's memory to pool for reusing.3897void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid,3898kmp_user_lock_p lck) {3899KMP_DEBUG_ASSERT(user_lock != NULL);3900KMP_DEBUG_ASSERT(lck != NULL);39013902__kmp_acquire_lock(&__kmp_global_lock, gtid);39033904lck->pool.next = __kmp_lock_pool;3905__kmp_lock_pool = lck;3906if (OMP_LOCK_T_SIZE < sizeof(void *)) {3907kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);3908KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used);3909lck->pool.index = index;3910}39113912__kmp_release_lock(&__kmp_global_lock, gtid);3913}39143915kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) {3916kmp_user_lock_p lck = NULL;39173918if (__kmp_env_consistency_check) {3919if (user_lock == NULL) {3920KMP_FATAL(LockIsUninitialized, func);3921}3922}39233924if (OMP_LOCK_T_SIZE < sizeof(void *)) {3925kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock);3926if (__kmp_env_consistency_check) {3927if (!(0 < index && index < __kmp_user_lock_table.used)) {3928KMP_FATAL(LockIsUninitialized, func);3929}3930}3931KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used);3932KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0);3933lck = __kmp_user_lock_table.table[index];3934} else {3935lck = *((kmp_user_lock_p *)user_lock);3936}39373938if (__kmp_env_consistency_check) {3939if (lck == NULL) {3940KMP_FATAL(LockIsUninitialized, func);3941}3942}39433944return lck;3945}39463947void __kmp_cleanup_user_locks(void) {3948// Reset lock pool. Don't worry about lock in the pool--we will free them when3949// iterating through lock table (it includes all the locks, dead or alive).3950__kmp_lock_pool = NULL;39513952#define IS_CRITICAL(lck) \3953((__kmp_get_user_lock_flags_ != NULL) && \3954((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section))39553956// Loop through lock table, free all locks.3957// Do not free item [0], it is reserved for lock tables list.3958//3959// FIXME - we are iterating through a list of (pointers to) objects of type3960// union kmp_user_lock, but we have no way of knowing whether the base type is3961// currently "pool" or whatever the global user lock type is.3962//3963// We are relying on the fact that for all of the user lock types3964// (except "tas"), the first field in the lock struct is the "initialized"3965// field, which is set to the address of the lock object itself when3966// the lock is initialized. When the union is of type "pool", the3967// first field is a pointer to the next object in the free list, which3968// will not be the same address as the object itself.3969//3970// This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail3971// for "pool" objects on the free list. This must happen as the "location"3972// field of real user locks overlaps the "index" field of "pool" objects.3973//3974// It would be better to run through the free list, and remove all "pool"3975// objects from the lock table before executing this loop. However,3976// "pool" objects do not always have their index field set (only on3977// lin_32e), and I don't want to search the lock table for the address3978// of every "pool" object on the free list.3979while (__kmp_user_lock_table.used > 1) {3980const ident *loc;39813982// reduce __kmp_user_lock_table.used before freeing the lock,3983// so that state of locks is consistent3984kmp_user_lock_p lck =3985__kmp_user_lock_table.table[--__kmp_user_lock_table.used];39863987if ((__kmp_is_user_lock_initialized_ != NULL) &&3988(*__kmp_is_user_lock_initialized_)(lck)) {3989// Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND3990// it is NOT a critical section (user is not responsible for destroying3991// criticals) AND we know source location to report.3992if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) &&3993((loc = __kmp_get_user_lock_location(lck)) != NULL) &&3994(loc->psource != NULL)) {3995kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, false);3996KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line);3997__kmp_str_loc_free(&str_loc);3998}39994000#ifdef KMP_DEBUG4001if (IS_CRITICAL(lck)) {4002KA_TRACE(400320,4004("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n",4005lck, *(void **)lck));4006} else {4007KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck,4008*(void **)lck));4009}4010#endif // KMP_DEBUG40114012// Cleanup internal lock dynamic resources (for drdpa locks particularly).4013__kmp_destroy_user_lock(lck);4014}40154016// Free the lock if block allocation of locks is not used.4017if (__kmp_lock_blocks == NULL) {4018__kmp_free(lck);4019}4020}40214022#undef IS_CRITICAL40234024// delete lock table(s).4025kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table;4026__kmp_user_lock_table.table = NULL;4027__kmp_user_lock_table.allocated = 0;40284029while (table_ptr != NULL) {4030// In the first element we saved the pointer to the previous4031// (smaller) lock table.4032kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]);4033__kmp_free(table_ptr);4034table_ptr = next;4035}40364037// Free buffers allocated for blocks of locks.4038kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks;4039__kmp_lock_blocks = NULL;40404041while (block_ptr != NULL) {4042kmp_block_of_locks_t *next = block_ptr->next_block;4043__kmp_free(block_ptr->locks);4044// *block_ptr itself was allocated at the end of the locks vector.4045block_ptr = next;4046}40474048TCW_4(__kmp_init_user_locks, FALSE);4049}40504051#endif // KMP_USE_DYNAMIC_LOCK405240534054