Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/libc/src/__support/GPU/utils.h
213799 views
1
//===---------------- Implementation of GPU utils ---------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#ifndef LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
10
#define LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
11
12
#include "src/__support/macros/attributes.h"
13
#include "src/__support/macros/config.h"
14
#include "src/__support/macros/properties/architectures.h"
15
16
#if !__has_include(<gpuintrin.h>)
17
#error "Unsupported compiler"
18
#endif
19
20
#include <gpuintrin.h>
21
22
namespace LIBC_NAMESPACE_DECL {
23
namespace gpu {
24
25
template <typename T> using Private = __gpu_private T;
26
template <typename T> using Constant = __gpu_constant T;
27
template <typename T> using Local = __gpu_local T;
28
template <typename T> using Global = __gpu_local T;
29
30
LIBC_INLINE uint32_t get_num_blocks_x() { return __gpu_num_blocks(0); }
31
32
LIBC_INLINE uint32_t get_num_blocks_y() { return __gpu_num_blocks(1); }
33
34
LIBC_INLINE uint32_t get_num_blocks_z() { return __gpu_num_blocks(2); }
35
36
LIBC_INLINE uint64_t get_num_blocks() {
37
return get_num_blocks_x() * get_num_blocks_y() * get_num_blocks_z();
38
}
39
40
LIBC_INLINE uint32_t get_block_id_x() { return __gpu_block_id(0); }
41
42
LIBC_INLINE uint32_t get_block_id_y() { return __gpu_block_id(1); }
43
44
LIBC_INLINE uint32_t get_block_id_z() { return __gpu_block_id(2); }
45
46
LIBC_INLINE uint64_t get_block_id() {
47
return get_block_id_x() + get_num_blocks_x() * get_block_id_y() +
48
get_num_blocks_x() * get_num_blocks_y() * get_block_id_z();
49
}
50
51
LIBC_INLINE uint32_t get_num_threads_x() { return __gpu_num_threads(0); }
52
53
LIBC_INLINE uint32_t get_num_threads_y() { return __gpu_num_threads(1); }
54
55
LIBC_INLINE uint32_t get_num_threads_z() { return __gpu_num_threads(2); }
56
57
LIBC_INLINE uint64_t get_num_threads() {
58
return get_num_threads_x() * get_num_threads_y() * get_num_threads_z();
59
}
60
61
LIBC_INLINE uint32_t get_thread_id_x() { return __gpu_thread_id(0); }
62
63
LIBC_INLINE uint32_t get_thread_id_y() { return __gpu_thread_id(1); }
64
65
LIBC_INLINE uint32_t get_thread_id_z() { return __gpu_thread_id(2); }
66
67
LIBC_INLINE uint64_t get_thread_id() {
68
return get_thread_id_x() + get_num_threads_x() * get_thread_id_y() +
69
get_num_threads_x() * get_num_threads_y() * get_thread_id_z();
70
}
71
72
LIBC_INLINE uint32_t get_lane_size() { return __gpu_num_lanes(); }
73
74
LIBC_INLINE uint32_t get_lane_id() { return __gpu_lane_id(); }
75
76
LIBC_INLINE uint64_t get_lane_mask() { return __gpu_lane_mask(); }
77
78
LIBC_INLINE uint32_t broadcast_value(uint64_t lane_mask, uint32_t x) {
79
return __gpu_read_first_lane_u32(lane_mask, x);
80
}
81
82
LIBC_INLINE uint64_t ballot(uint64_t lane_mask, bool x) {
83
return __gpu_ballot(lane_mask, x);
84
}
85
86
LIBC_INLINE void sync_threads() { __gpu_sync_threads(); }
87
88
LIBC_INLINE void sync_lane(uint64_t lane_mask) { __gpu_sync_lane(lane_mask); }
89
90
LIBC_INLINE uint32_t shuffle(uint64_t lane_mask, uint32_t idx, uint32_t x,
91
uint32_t width = __gpu_num_lanes()) {
92
return __gpu_shuffle_idx_u32(lane_mask, idx, x, width);
93
}
94
95
LIBC_INLINE uint64_t shuffle(uint64_t lane_mask, uint32_t idx, uint64_t x,
96
uint32_t width = __gpu_num_lanes()) {
97
return __gpu_shuffle_idx_u64(lane_mask, idx, x, width);
98
}
99
100
template <typename T>
101
LIBC_INLINE T *shuffle(uint64_t lane_mask, uint32_t idx, T *x,
102
uint32_t width = __gpu_num_lanes()) {
103
return reinterpret_cast<T *>(__gpu_shuffle_idx_u64(
104
lane_mask, idx, reinterpret_cast<uintptr_t>(x), width));
105
}
106
107
LIBC_INLINE uint64_t match_any(uint64_t lane_mask, uint32_t x) {
108
return __gpu_match_any_u32(lane_mask, x);
109
}
110
111
LIBC_INLINE uint64_t match_all(uint64_t lane_mask, uint32_t x) {
112
return __gpu_match_all_u32(lane_mask, x);
113
}
114
115
[[noreturn]] LIBC_INLINE void end_program() { __gpu_exit(); }
116
117
LIBC_INLINE bool is_first_lane(uint64_t lane_mask) {
118
return __gpu_is_first_in_lane(lane_mask);
119
}
120
121
LIBC_INLINE uint32_t reduce(uint64_t lane_mask, uint32_t x) {
122
return __gpu_lane_sum_u32(lane_mask, x);
123
}
124
125
LIBC_INLINE uint32_t scan(uint64_t lane_mask, uint32_t x) {
126
return __gpu_lane_scan_u32(lane_mask, x);
127
}
128
129
LIBC_INLINE uint64_t fixed_frequency_clock() {
130
return __builtin_readsteadycounter();
131
}
132
133
LIBC_INLINE uint64_t processor_clock() { return __builtin_readcyclecounter(); }
134
135
} // namespace gpu
136
} // namespace LIBC_NAMESPACE_DECL
137
138
#endif // LLVM_LIBC_SRC___SUPPORT_GPU_UTILS_H
139
140