Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shader_llvm_resources.c
4570 views
1
/*
2
* Copyright 2020 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25
#include "si_pipe.h"
26
#include "si_shader_internal.h"
27
#include "sid.h"
28
29
/**
30
* Return a value that is equal to the given i32 \p index if it lies in [0,num)
31
* or an undefined value in the same interval otherwise.
32
*/
33
static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValueRef index,
34
unsigned num)
35
{
36
LLVMBuilderRef builder = ctx->ac.builder;
37
LLVMValueRef c_max = LLVMConstInt(ctx->ac.i32, num - 1, 0);
38
LLVMValueRef cc;
39
40
if (util_is_power_of_two_or_zero(num)) {
41
index = LLVMBuildAnd(builder, index, c_max, "");
42
} else {
43
/* In theory, this MAX pattern should result in code that is
44
* as good as the bit-wise AND above.
45
*
46
* In practice, LLVM generates worse code (at the time of
47
* writing), because its value tracking is not strong enough.
48
*/
49
cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
50
index = LLVMBuildSelect(builder, cc, index, c_max, "");
51
}
52
53
return index;
54
}
55
56
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
57
{
58
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
59
struct si_shader_selector *sel = ctx->shader->selector;
60
61
/* Do the bounds checking with a descriptor, because
62
* doing computation and manual bounds checking of 64-bit
63
* addresses generates horrible VALU code with very high
64
* VGPR usage and very low SIMD occupancy.
65
*/
66
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
67
68
LLVMValueRef desc0, desc1;
69
desc0 = ptr;
70
desc1 = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
71
72
uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
73
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
74
75
if (ctx->screen->info.chip_class >= GFX10)
76
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
77
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
78
else
79
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
80
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
81
82
LLVMValueRef desc_elems[] = {desc0, desc1,
83
LLVMConstInt(ctx->ac.i32, sel->info.constbuf0_num_slots * 16, 0),
84
LLVMConstInt(ctx->ac.i32, rsrc3, false)};
85
86
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
87
}
88
89
static LLVMValueRef load_ubo(struct ac_shader_abi *abi,
90
unsigned desc_set, unsigned binding,
91
bool valid_binding, LLVMValueRef index)
92
{
93
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
94
struct si_shader_selector *sel = ctx->shader->selector;
95
96
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
97
98
if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0) {
99
return load_const_buffer_desc_fast_path(ctx);
100
}
101
102
index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
103
index =
104
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS, 0), "");
105
106
return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
107
}
108
109
static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write, bool non_uniform)
110
{
111
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
112
113
/* Fast path if the shader buffer is in user SGPRs. */
114
if (LLVMIsConstant(index) &&
115
LLVMConstIntGetZExtValue(index) < ctx->shader->selector->cs_num_shaderbufs_in_user_sgprs)
116
return ac_get_arg(&ctx->ac, ctx->cs_shaderbuf[LLVMConstIntGetZExtValue(index)]);
117
118
LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
119
120
index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
121
index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS - 1, 0),
122
index, "");
123
124
return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
125
}
126
127
/**
128
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
129
*
130
* At least on Tonga, executing image stores on images with DCC enabled and
131
* non-trivial can eventually lead to lockups. This can occur when an
132
* application binds an image as read-only but then uses a shader that writes
133
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
134
* program termination) in this case, but it doesn't cost much to be a bit
135
* nicer: disabling DCC in the shader still leads to undefined results but
136
* avoids the lockup.
137
*/
138
static LLVMValueRef force_dcc_off(struct si_shader_context *ctx, LLVMValueRef rsrc)
139
{
140
if (ctx->screen->info.chip_class <= GFX7) {
141
return rsrc;
142
} else {
143
LLVMValueRef i32_6 = LLVMConstInt(ctx->ac.i32, 6, 0);
144
LLVMValueRef i32_C = LLVMConstInt(ctx->ac.i32, C_008F28_COMPRESSION_EN, 0);
145
LLVMValueRef tmp;
146
147
tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
148
tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
149
return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
150
}
151
}
152
153
/* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
154
* adjust "index" to point to FMASK. */
155
static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx, LLVMValueRef list,
156
LLVMValueRef index, enum ac_descriptor_type desc_type,
157
bool uses_store, bool bindless)
158
{
159
LLVMBuilderRef builder = ctx->ac.builder;
160
LLVMValueRef rsrc;
161
162
if (desc_type == AC_DESC_BUFFER) {
163
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
164
list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
165
} else {
166
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
167
}
168
169
if (bindless)
170
rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
171
else
172
rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
173
174
if (desc_type == AC_DESC_IMAGE && uses_store && ctx->ac.chip_class <= GFX9)
175
rsrc = force_dcc_off(ctx, rsrc);
176
return rsrc;
177
}
178
179
/**
180
* Load an image view, fmask view. or sampler state descriptor.
181
*/
182
static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx, LLVMValueRef list,
183
LLVMValueRef index, enum ac_descriptor_type type)
184
{
185
LLVMBuilderRef builder = ctx->ac.builder;
186
187
switch (type) {
188
case AC_DESC_IMAGE:
189
/* The image is at [0:7]. */
190
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
191
break;
192
case AC_DESC_BUFFER:
193
/* The buffer is in [4:7]. */
194
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
195
list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
196
break;
197
case AC_DESC_FMASK:
198
/* The FMASK is at [8:15]. */
199
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 2, 0), ctx->ac.i32_1);
200
break;
201
case AC_DESC_SAMPLER:
202
/* The sampler state is at [12:15]. */
203
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0),
204
LLVMConstInt(ctx->ac.i32, 3, 0));
205
list = LLVMBuildPointerCast(builder, list, ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
206
break;
207
case AC_DESC_PLANE_0:
208
case AC_DESC_PLANE_1:
209
case AC_DESC_PLANE_2:
210
/* Only used for the multiplane image support for Vulkan. Should
211
* never be reached in radeonsi.
212
*/
213
unreachable("Plane descriptor requested in radeonsi.");
214
}
215
216
return ac_build_load_to_sgpr(&ctx->ac, list, index);
217
}
218
219
static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set,
220
unsigned base_index, unsigned constant_index,
221
LLVMValueRef dynamic_index,
222
enum ac_descriptor_type desc_type, bool image,
223
bool write, bool bindless)
224
{
225
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
226
LLVMBuilderRef builder = ctx->ac.builder;
227
unsigned const_index = base_index + constant_index;
228
229
assert(!descriptor_set);
230
assert(desc_type <= AC_DESC_BUFFER);
231
232
if (bindless) {
233
LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
234
235
/* dynamic_index is the bindless handle */
236
if (image) {
237
/* Bindless image descriptors use 16-dword slots. */
238
dynamic_index =
239
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
240
/* FMASK is right after the image. */
241
if (desc_type == AC_DESC_FMASK) {
242
dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index, ctx->ac.i32_1, "");
243
}
244
245
return si_load_image_desc(ctx, list, dynamic_index, desc_type, write, true);
246
}
247
248
/* Since bindless handle arithmetic can contain an unsigned integer
249
* wraparound and si_load_sampler_desc assumes there isn't any,
250
* use GEP without "inbounds" (inside ac_build_pointer_add)
251
* to prevent incorrect code generation and hangs.
252
*/
253
dynamic_index =
254
LLVMBuildMul(ctx->ac.builder, dynamic_index, LLVMConstInt(ctx->ac.i64, 2, 0), "");
255
list = ac_build_pointer_add(&ctx->ac, list, dynamic_index);
256
return si_load_sampler_desc(ctx, list, ctx->ac.i32_0, desc_type);
257
}
258
259
unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
260
assert(const_index < num_slots || dynamic_index);
261
262
LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
263
LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
264
265
if (dynamic_index) {
266
index = LLVMBuildAdd(builder, index, dynamic_index, "");
267
268
/* From the GL_ARB_shader_image_load_store extension spec:
269
*
270
* If a shader performs an image load, store, or atomic
271
* operation using an image variable declared as an array,
272
* and if the index used to select an individual element is
273
* negative or greater than or equal to the size of the
274
* array, the results of the operation are undefined but may
275
* not lead to termination.
276
*/
277
index = si_llvm_bound_index(ctx, index, num_slots);
278
}
279
280
if (image) {
281
/* Fast path if the image is in user SGPRs. */
282
if (!dynamic_index &&
283
const_index < ctx->shader->selector->cs_num_images_in_user_sgprs &&
284
(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER))
285
return ac_get_arg(&ctx->ac, ctx->cs_image[const_index]);
286
287
/* FMASKs are separate from images. */
288
if (desc_type == AC_DESC_FMASK) {
289
index =
290
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGES, 0), "");
291
}
292
index = LLVMBuildSub(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS - 1, 0),
293
index, "");
294
return si_load_image_desc(ctx, list, index, desc_type, write, false);
295
}
296
297
index = LLVMBuildAdd(ctx->ac.builder, index,
298
LLVMConstInt(ctx->ac.i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
299
return si_load_sampler_desc(ctx, list, index, desc_type);
300
}
301
302
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
303
{
304
ctx->abi.load_ubo = load_ubo;
305
ctx->abi.load_ssbo = load_ssbo;
306
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
307
}
308
309