Path: blob/main/contrib/llvm-project/libc/src/__support/GPU/utils.h
213799 views
//===---------------- Implementation of GPU utils ---------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H9#define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H1011#include "src/__support/macros/attributes.h"12#include "src/__support/macros/config.h"13#include "src/__support/macros/properties/architectures.h"1415#if !__has_include(<gpuintrin.h>)16#error "Unsupported compiler"17#endif1819#include <gpuintrin.h>2021namespace LIBC_NAMESPACE_DECL {22namespace gpu {2324template <typename T> using Private = __gpu_private T;25template <typename T> using Constant = __gpu_constant T;26template <typename T> using Local = __gpu_local T;27template <typename T> using Global = __gpu_local T;2829LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); }3031LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); }3233LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); }3435LIBC_INLINE uint64_t get_num_blocks() {36return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();37}3839LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); }4041LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); }4243LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); }4445LIBC_INLINE uint64_t get_block_id() {46return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +47get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();48}4950LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); }5152LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); }5354LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); }5556LIBC_INLINE uint64_t get_num_threads() {57return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();58}5960LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); }6162LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); }6364LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); }6566LIBC_INLINE uint64_t get_thread_id() {67return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +68get_num_threads_x() * get_num_threads_y() * get_thread_id_z();69}7071LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); }7273LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); }7475LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); }7677LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {78return __gpu_read_first_lane_u32(lane_mask, x);79}8081LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {82return __gpu_ballot(lane_mask, x);83}8485LIBC_INLINE void sync_threads() { __gpu_sync_threads(); }8687LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); }8889LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,90uint32_t width = __gpu_num_lanes()) {91return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);92}9394LIBC_INLINE uint64_t shuffle(uint64_t lane_mask, uint32_t idx, uint64_t x,95uint32_t width = __gpu_num_lanes()) {96return __gpu_shuffle_idx_u64(lane_mask, idx, x, width);97}9899template <typename T>100LIBC_INLINE T *shuffle(uint64_t lane_mask, uint32_t idx, T *x,101uint32_t width = __gpu_num_lanes()) {102return reinterpret_cast<T *>(__gpu_shuffle_idx_u64(103lane_mask, idx, reinterpret_cast<uintptr_t>(x), width));104}105106LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {107return __gpu_match_any_u32(lane_mask, x);108}109110LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {111return __gpu_match_all_u32(lane_mask, x);112}113114[[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }115116LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {117return __gpu_is_first_in_lane(lane_mask);118}119120LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {121return __gpu_lane_sum_u32(lane_mask, x);122}123124LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {125return __gpu_lane_scan_u32(lane_mask, x);126}127128LIBC_INLINE uint64_t fixed_frequency_clock() {129return __builtin_readsteadycounter();130}131132LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }133134} // namespace gpu135} // namespace LIBC_NAMESPACE_DECL136137#endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H138139140