/*1* Copyright 2010-2012 PathScale, Inc. All rights reserved.2* Copyright 2021 David Chisnall. All rights reserved.3*4* Redistribution and use in source and binary forms, with or without5* modification, are permitted provided that the following conditions are met:6*7* 1. Redistributions of source code must retain the above copyright notice,8* this list of conditions and the following disclaimer.9*10* 2. Redistributions in binary form must reproduce the above copyright notice,11* this list of conditions and the following disclaimer in the documentation12* and/or other materials provided with the distribution.13*14* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS15* IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,16* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR17* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR18* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,19* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,20* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;21* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,22* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR23* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF24* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.25*/2627/**28* guard.cc: Functions for thread-safe static initialisation.29*30* Static values in C++ can be initialised lazily their first use. This file31* contains functions that are used to ensure that two threads attempting to32* initialize the same static do not call the constructor twice. This is33* important because constructors can have side effects, so calling the34* constructor twice may be very bad.35*36* Statics that require initialisation are protected by a 64-bit value. Any37* platform that can do 32-bit atomic test and set operations can use this38* value as a low-overhead lock. Because statics (in most sane code) are39* accessed far more times than they are initialised, this lock implementation40* is heavily optimised towards the case where the static has already been41* initialised.42*/43#include "atomic.h"44#include <assert.h>45#include <pthread.h>46#include <stdint.h>47#include <stdlib.h>4849// Older GCC doesn't define __LITTLE_ENDIAN__50#ifndef __LITTLE_ENDIAN__51// If __BYTE_ORDER__ is defined, use that instead52# ifdef __BYTE_ORDER__53# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__54# define __LITTLE_ENDIAN__55# endif56// x86 and ARM are the most common little-endian CPUs, so let's have a57// special case for them (ARM is already special cased). Assume everything58// else is big endian.59# elif defined(__x86_64) || defined(__i386)60# define __LITTLE_ENDIAN__61# endif62#endif6364/*65* The Itanium C++ ABI defines guard words that are 64-bit (32-bit on AArch32)66* values with one bit defined to indicate that the guarded variable is and67* another bit to indicate that it's currently locked (initialisation in68* progress). The bit to use depends on the byte order of the target.69*70* On many 32-bit platforms, 64-bit atomics are unavailable (or slow) and so we71* treat the two halves of the 64-bit word as independent values and establish72* an ordering on them such that the guard word is never modified unless the73* lock word is in the locked state. This means that we can do double-checked74* locking by loading the guard word and, if it is not initialised, trying to75* transition the lock word from the unlocked to locked state, and then76* manipulate the guard word.77*/78namespace79{80/**81* The state of the guard variable when an attempt is made to lock it.82*/83enum class GuardState84{85/**86* The lock is not held but is not needed because initialisation is87* one.88*/89InitDone,9091/**92* Initialisation is not done but the lock is held by the caller.93*/94InitLockSucceeded,9596/**97* Attempting to acquire the lock failed.98*/99InitLockFailed100};101102/**103* Class encapsulating a single atomic word being used to represent the104* guard. The word size is defined by the type of `GuardWord`. The bit105* used to indicate the locked state is `1<<LockedBit`, the bit used to106* indicate the initialised state is `1<<InitBit`.107*/108template<typename GuardWord, int LockedBit, int InitBit>109struct SingleWordGuard110{111/**112* The value indicating that the lock bit is set (and no other bits).113*/114static constexpr GuardWord locked = static_cast<GuardWord>(1)115<< LockedBit;116117/**118* The value indicating that the initialised bit is set (and all other119* bits are zero).120*/121static constexpr GuardWord initialised = static_cast<GuardWord>(1)122<< InitBit;123124/**125* The guard variable.126*/127atomic<GuardWord> val;128129public:130/**131* Release the lock and set the initialised state. In the single-word132* implementation here, these are both done by a single store.133*/134void unlock(bool isInitialised)135{136val.store(isInitialised ? initialised : 0, memory_order::release);137#ifndef NDEBUG138GuardWord init_state = initialised;139assert(*reinterpret_cast<uint8_t*>(&init_state) != 0);140#endif141}142143/**144* Try to acquire the lock. This has a tri-state return, indicating145* either that the lock was acquired, it wasn't acquired because it was146* contended, or it wasn't acquired because the guarded variable is147* already initialised.148*/149GuardState try_lock()150{151GuardWord old = 0;152// Try to acquire the lock, assuming that we are in the state where153// the lock is not held and the variable is not initialised (so the154// expected value is 0).155if (val.compare_exchange(old, locked))156{157return GuardState::InitLockSucceeded;158}159// If the CAS failed and the old value indicates that this is160// initialised, return that initialisation is done and skip further161// retries.162if (old == initialised)163{164return GuardState::InitDone;165}166// Otherwise, report failure.167return GuardState::InitLockFailed;168}169170/**171* Check whether the guard indicates that the variable is initialised.172*/173bool is_initialised()174{175return (val.load(memory_order::acquire) & initialised) ==176initialised;177}178};179180/**181* Class encapsulating using two 32-bit atomic values to represent a 64-bit182* guard variable.183*/184template<int LockedBit, int InitBit>185class DoubleWordGuard186{187/**188* The value of `lock_word` when the lock is held.189*/190static constexpr uint32_t locked = static_cast<uint32_t>(1)191<< LockedBit;192193/**194* The value of `init_word` when the guarded variable is initialised.195*/196static constexpr uint32_t initialised = static_cast<uint32_t>(1)197<< InitBit;198199/**200* The word used for the initialised flag. This is always the first201* word irrespective of endian because the generated code compares the202* first byte in memory against 0.203*/204atomic<uint32_t> init_word;205206/**207* The word used for the lock.208*/209atomic<uint32_t> lock_word;210211public:212/**213* Try to acquire the lock. This has a tri-state return, indicating214* either that the lock was acquired, it wasn't acquired because it was215* contended, or it wasn't acquired because the guarded variable is216* already initialised.217*/218GuardState try_lock()219{220uint32_t old = 0;221// Try to acquire the lock222if (lock_word.compare_exchange(old, locked))223{224// If we succeeded, check if initialisation has happened. In225// this version, we don't have atomic manipulation of both the226// lock and initialised bits together. Instead, we have an227// ordering rule that the initialised bit is only ever updated228// with the lock held.229if (is_initialised())230{231// If another thread did manage to initialise this, release232// the lock and notify the caller that initialisation is233// done.234lock_word.store(0, memory_order::release);235return GuardState::InitDone;236}237return GuardState::InitLockSucceeded;238}239return GuardState::InitLockFailed;240}241242/**243* Set the initialised state and release the lock. In this244* implementation, this is ordered, not atomic: the initialise bit is245* set while the lock is held.246*/247void unlock(bool isInitialised)248{249init_word.store(isInitialised ? initialised : 0,250memory_order::release);251lock_word.store(0, memory_order::release);252assert((*reinterpret_cast<uint8_t*>(this) != 0) == isInitialised);253}254255/**256* Return whether the guarded variable is initialised.257*/258bool is_initialised()259{260return (init_word.load(memory_order::acquire) & initialised) ==261initialised;262}263};264265// Check that the two implementations are the correct size.266static_assert(sizeof(SingleWordGuard<uint32_t, 31, 0>) == sizeof(uint32_t),267"Single-word 32-bit guard must be 32 bits");268static_assert(sizeof(SingleWordGuard<uint64_t, 63, 0>) == sizeof(uint64_t),269"Single-word 64-bit guard must be 64 bits");270static_assert(sizeof(DoubleWordGuard<31, 0>) == sizeof(uint64_t),271"Double-word guard must be 64 bits");272273#ifdef __arm__274/**275* The Arm PCS defines a variant of the Itanium ABI with 32-bit lock words.276*/277using Guard = SingleWordGuard<uint32_t, 31, 0>;278#elif defined(_LP64)279# if defined(__LITTLE_ENDIAN__)280/**281* On little-endian 64-bit platforms the guard word is a single 64-bit282* atomic with the lock in the high bit and the initialised flag in the low283* bit.284*/285using Guard = SingleWordGuard<uint64_t, 63, 0>;286# else287/**288* On bit-endian 64-bit platforms, the guard word is a single 64-bit atomic289* with the lock in the low bit and the initialised bit in the highest290* byte.291*/292using Guard = SingleWordGuard<uint64_t, 0, 56>;293# endif294#else295# if defined(__LITTLE_ENDIAN__)296/**297* 32-bit platforms use the same layout as 64-bit.298*/299using Guard = DoubleWordGuard<31, 0>;300# else301/**302* 32-bit platforms use the same layout as 64-bit.303*/304using Guard = DoubleWordGuard<0, 24>;305# endif306#endif307308} // namespace309310/**311* Acquires a lock on a guard, returning 0 if the object has already been312* initialised, and 1 if it has not. If the object is already constructed then313* this function just needs to read a byte from memory and return.314*/315extern "C" int __cxa_guard_acquire(Guard *guard_object)316{317// Check if this is already initialised. If so, we don't have to do318// anything.319if (guard_object->is_initialised())320{321return 0;322}323// Spin trying to acquire the lock. If we fail to acquire the lock the324// first time then another thread will *probably* initialise it, but if the325// constructor throws an exception then we may have to try again in this326// thread.327for (;;)328{329// Try to acquire the lock.330switch (guard_object->try_lock())331{332// If we failed to acquire the lock but another thread has333// initialised the lock while we were waiting, return immediately334// indicating that initialisation is not required.335case GuardState::InitDone:336return 0;337// If we acquired the lock, return immediately to start338// initialisation.339case GuardState::InitLockSucceeded:340return 1;341// If we didn't acquire the lock, pause and retry.342case GuardState::InitLockFailed:343break;344}345sched_yield();346}347}348349/**350* Releases the lock without marking the object as initialised. This function351* is called if initialising a static causes an exception to be thrown.352*/353extern "C" void __cxa_guard_abort(Guard *guard_object)354{355guard_object->unlock(false);356}357358/**359* Releases the guard and marks the object as initialised. This function is360* called after successful initialisation of a static.361*/362extern "C" void __cxa_guard_release(Guard *guard_object)363{364guard_object->unlock(true);365}366367368