Path: blob/main/system/lib/llvm-libc/shared/rpc_util.h
6170 views
//===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLVM_LIBC_SHARED_RPC_UTIL_H9#define LLVM_LIBC_SHARED_RPC_UTIL_H1011#include <stddef.h>12#include <stdint.h>1314#if (defined(__NVPTX__) || defined(__AMDGPU__)) && \15!((defined(__CUDA__) && !defined(__CUDA_ARCH__)) || \16(defined(__HIP__) && !defined(__HIP_DEVICE_COMPILE__)))17#include <gpuintrin.h>18#define RPC_TARGET_IS_GPU19#endif2021// Workaround for missing __has_builtin in < GCC 10.22#ifndef __has_builtin23#define __has_builtin(x) 024#endif2526#ifndef RPC_ATTRS27#if defined(__CUDA__) || defined(__HIP__)28#define RPC_ATTRS __attribute__((host, device)) inline29#else30#define RPC_ATTRS inline31#endif32#endif3334namespace rpc {3536template <typename T> struct type_identity {37using type = T;38};3940template <class T, T v> struct type_constant {41static inline constexpr T value = v;42};4344template <class T> struct remove_reference : type_identity<T> {};45template <class T> struct remove_reference<T &> : type_identity<T> {};46template <class T> struct remove_reference<T &&> : type_identity<T> {};4748template <class T> struct is_const : type_constant<bool, false> {};49template <class T> struct is_const<const T> : type_constant<bool, true> {};5051/// Freestanding implementation of std::move.52template <class T>53RPC_ATTRS constexpr typename remove_reference<T>::type &&move(T &&t) {54return static_cast<typename remove_reference<T>::type &&>(t);55}5657/// Freestanding implementation of std::forward.58template <typename T>59RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &value) {60return static_cast<T &&>(value);61}62template <typename T>63RPC_ATTRS constexpr T &&forward(typename remove_reference<T>::type &&value) {64return static_cast<T &&>(value);65}6667struct in_place_t {68RPC_ATTRS explicit in_place_t() = default;69};7071struct nullopt_t {72RPC_ATTRS constexpr explicit nullopt_t() = default;73};7475constexpr inline in_place_t in_place{};76constexpr inline nullopt_t nullopt{};7778/// Freestanding and minimal implementation of std::optional.79template <typename T> class optional {80template <typename U> struct OptionalStorage {81union {82char empty;83U stored_value;84};8586bool in_use = false;8788RPC_ATTRS ~OptionalStorage() { reset(); }8990RPC_ATTRS constexpr OptionalStorage() : empty() {}9192template <typename... Args>93RPC_ATTRS constexpr explicit OptionalStorage(in_place_t, Args &&...args)94: stored_value(forward<Args>(args)...) {}9596RPC_ATTRS constexpr void reset() {97if (in_use)98stored_value.~U();99in_use = false;100}101};102103OptionalStorage<T> storage;104105public:106RPC_ATTRS constexpr optional() = default;107RPC_ATTRS constexpr optional(nullopt_t) {}108109RPC_ATTRS constexpr optional(const T &t) : storage(in_place, t) {110storage.in_use = true;111}112RPC_ATTRS constexpr optional(const optional &) = default;113114RPC_ATTRS constexpr optional(T &&t) : storage(in_place, move(t)) {115storage.in_use = true;116}117RPC_ATTRS constexpr optional(optional &&O) = default;118119RPC_ATTRS constexpr optional &operator=(T &&t) {120storage = move(t);121return *this;122}123RPC_ATTRS constexpr optional &operator=(optional &&) = default;124125RPC_ATTRS constexpr optional &operator=(const T &t) {126storage = t;127return *this;128}129RPC_ATTRS constexpr optional &operator=(const optional &) = default;130131RPC_ATTRS constexpr void reset() { storage.reset(); }132133RPC_ATTRS constexpr const T &value() const & { return storage.stored_value; }134135RPC_ATTRS constexpr T &value() & { return storage.stored_value; }136137RPC_ATTRS constexpr explicit operator bool() const { return storage.in_use; }138RPC_ATTRS constexpr bool has_value() const { return storage.in_use; }139RPC_ATTRS constexpr const T *operator->() const {140return &storage.stored_value;141}142RPC_ATTRS constexpr T *operator->() { return &storage.stored_value; }143RPC_ATTRS constexpr const T &operator*() const & {144return storage.stored_value;145}146RPC_ATTRS constexpr T &operator*() & { return storage.stored_value; }147148RPC_ATTRS constexpr T &&value() && { return move(storage.stored_value); }149RPC_ATTRS constexpr T &&operator*() && { return move(storage.stored_value); }150};151152/// Suspend the thread briefly to assist the thread scheduler during busy loops.153RPC_ATTRS void sleep_briefly() {154#if __has_builtin(__nvvm_reflect)155if (__nvvm_reflect("__CUDA_ARCH") >= 700)156asm("nanosleep.u32 64;" ::: "memory");157#elif __has_builtin(__builtin_amdgcn_s_sleep)158__builtin_amdgcn_s_sleep(2);159#elif __has_builtin(__builtin_ia32_pause)160__builtin_ia32_pause();161#elif __has_builtin(__builtin_arm_isb)162__builtin_arm_isb(0xf);163#else164// Simply do nothing if sleeping isn't supported on this platform.165#endif166}167168/// Conditional to indicate if this process is running on the GPU.169RPC_ATTRS constexpr bool is_process_gpu() {170#ifdef RPC_TARGET_IS_GPU171return true;172#else173return false;174#endif175}176177/// Wait for all lanes in the group to complete.178RPC_ATTRS void sync_lane([[maybe_unused]] uint64_t lane_mask) {179#ifdef RPC_TARGET_IS_GPU180return __gpu_sync_lane(lane_mask);181#endif182}183184/// Copies the value from the first active thread to the rest.185RPC_ATTRS uint32_t broadcast_value([[maybe_unused]] uint64_t lane_mask,186uint32_t x) {187#ifdef RPC_TARGET_IS_GPU188return __gpu_read_first_lane_u32(lane_mask, x);189#else190return x;191#endif192}193194/// Returns the number lanes that participate in the RPC interface.195RPC_ATTRS uint32_t get_num_lanes() {196#ifdef RPC_TARGET_IS_GPU197return __gpu_num_lanes();198#else199return 1;200#endif201}202203/// Returns the id of the thread inside of an AMD wavefront executing together.204RPC_ATTRS uint64_t get_lane_mask() {205#ifdef RPC_TARGET_IS_GPU206return __gpu_lane_mask();207#else208return 1;209#endif210}211212/// Returns the id of the thread inside of an AMD wavefront executing together.213RPC_ATTRS uint32_t get_lane_id() {214#ifdef RPC_TARGET_IS_GPU215return __gpu_lane_id();216#else217return 0;218#endif219}220221/// Conditional that is only true for a single thread in a lane.222RPC_ATTRS bool is_first_lane([[maybe_unused]] uint64_t lane_mask) {223#ifdef RPC_TARGET_IS_GPU224return __gpu_is_first_in_lane(lane_mask);225#else226return true;227#endif228}229230/// Returns a bitmask of threads in the current lane for which \p x is true.231RPC_ATTRS uint64_t ballot([[maybe_unused]] uint64_t lane_mask, bool x) {232#ifdef RPC_TARGET_IS_GPU233return __gpu_ballot(lane_mask, x);234#else235return x;236#endif237}238239/// Return \p val aligned "upwards" according to \p align.240template <typename V, typename A>241RPC_ATTRS constexpr V align_up(V val, A align) {242return ((val + V(align) - 1) / V(align)) * V(align);243}244245/// Utility to provide a unified interface between the CPU and GPU's memory246/// model. On the GPU stack variables are always private to a lane so we can247/// simply use the variable passed in. On the CPU we need to allocate enough248/// space for the whole lane and index into it.249template <typename V> RPC_ATTRS V &lane_value(V *val, uint32_t id) {250if constexpr (is_process_gpu())251return *val;252return val[id];253}254255/// Advance the \p p by \p bytes.256template <typename T, typename U> RPC_ATTRS T *advance(T *ptr, U bytes) {257if constexpr (is_const<T>::value)258return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +259bytes);260else261return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);262}263264/// Wrapper around the optimal memory copy implementation for the target.265RPC_ATTRS void rpc_memcpy(void *dst, const void *src, size_t count) {266__builtin_memcpy(dst, src, count);267}268269template <class T> RPC_ATTRS constexpr const T &max(const T &a, const T &b) {270return (a < b) ? b : a;271}272273} // namespace rpc274275#endif // LLVM_LIBC_SHARED_RPC_UTIL_H276277278