Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_lower_amul.c
4549 views
1
/*
2
* Copyright © 2019 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "nir.h"
25
#include "nir_vla.h"
26
27
/* Lowering for amul instructions, for drivers that support imul24.
28
* This pass will analyze indirect derefs, and convert corresponding
29
* amul instructions to either imul or imul24, depending on the
30
* required range.
31
*
32
* 1) Analyze the uniform variables and build a table of UBOs and SSBOs
33
* that are either too large, or might be too large (unknown size)
34
* for imul24
35
*
36
* 2) Loop thru looking at all the intrinsics, finding dereferences of
37
* large variables, and recursively replacing all amul instructions
38
* used with imul
39
*
40
* 3) Finally loop again thru all instructions replacing any remaining
41
* amul with imul24. At this point any remaining amul instructions
42
* are not involved in calculating an offset into a large variable,
43
* thanks to the 2nd step, so they can be safely replace with imul24.
44
*
45
* Using two passes over all the instructions lets us handle the case
46
* where, due to CSE, an amul is used to calculate an offset into both
47
* a large and small variable.
48
*/
49
50
typedef struct {
51
nir_shader *shader;
52
53
int (*type_size)(const struct glsl_type *, bool);
54
55
/* Tables of UBOs and SSBOs mapping driver_location/base whether
56
* they are too large to use imul24:
57
*/
58
bool *large_ubos;
59
bool *large_ssbos;
60
61
/* for cases that we cannot determine UBO/SSBO index, track if *any*
62
* UBO/SSBO is too large for imul24:
63
*/
64
bool has_large_ubo;
65
bool has_large_ssbo;
66
67
unsigned max_slot;
68
} lower_state;
69
70
/* Lower 'amul's in offset src of large variables to 'imul': */
71
static bool
72
lower_large_src(nir_src *src, void *s)
73
{
74
lower_state *state = s;
75
76
assert(src->is_ssa);
77
78
nir_instr *parent = src->ssa->parent_instr;
79
80
/* No need to visit instructions we've already visited.. this also
81
* avoids infinite recursion when phi's are involved:
82
*/
83
if (parent->pass_flags)
84
return false;
85
86
bool progress = nir_foreach_src(parent, lower_large_src, state);
87
88
if (parent->type == nir_instr_type_alu) {
89
nir_alu_instr *alu = nir_instr_as_alu(parent);
90
if (alu->op == nir_op_amul) {
91
alu->op = nir_op_imul;
92
progress = true;
93
}
94
}
95
96
parent->pass_flags = 1;
97
98
return progress;
99
}
100
101
static bool
102
large_ubo(lower_state *state, nir_src src)
103
{
104
if (!nir_src_is_const(src))
105
return state->has_large_ubo;
106
unsigned idx = nir_src_as_uint(src);
107
assert(idx < state->shader->info.num_ubos);
108
return state->large_ubos[idx];
109
}
110
111
static bool
112
large_ssbo(lower_state *state, nir_src src)
113
{
114
if (!nir_src_is_const(src))
115
return state->has_large_ssbo;
116
unsigned idx = nir_src_as_uint(src);
117
assert(idx < state->shader->info.num_ssbos);
118
return state->large_ssbos[idx];
119
}
120
121
static bool
122
lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
123
{
124
switch (intr->intrinsic) {
125
case nir_intrinsic_load_ubo:
126
//# src[] = { buffer_index, offset }.
127
if (large_ubo(state, intr->src[0]))
128
return lower_large_src(&intr->src[1], state);
129
return false;
130
131
case nir_intrinsic_load_ssbo:
132
//# src[] = { buffer_index, offset }.
133
if (large_ssbo(state, intr->src[0]))
134
return lower_large_src(&intr->src[1], state);
135
return false;
136
137
case nir_intrinsic_store_ssbo:
138
//# src[] = { value, block_index, offset }
139
if (large_ssbo(state, intr->src[1]))
140
return lower_large_src(&intr->src[2], state);
141
return false;
142
143
case nir_intrinsic_ssbo_atomic_add:
144
case nir_intrinsic_ssbo_atomic_imin:
145
case nir_intrinsic_ssbo_atomic_umin:
146
case nir_intrinsic_ssbo_atomic_imax:
147
case nir_intrinsic_ssbo_atomic_umax:
148
case nir_intrinsic_ssbo_atomic_and:
149
case nir_intrinsic_ssbo_atomic_or:
150
case nir_intrinsic_ssbo_atomic_xor:
151
case nir_intrinsic_ssbo_atomic_exchange:
152
case nir_intrinsic_ssbo_atomic_comp_swap:
153
case nir_intrinsic_ssbo_atomic_fadd:
154
case nir_intrinsic_ssbo_atomic_fmin:
155
case nir_intrinsic_ssbo_atomic_fmax:
156
case nir_intrinsic_ssbo_atomic_fcomp_swap:
157
/* 0: SSBO index
158
* 1: offset
159
*/
160
if (large_ssbo(state, intr->src[0]))
161
return lower_large_src(&intr->src[1], state);
162
return false;
163
164
case nir_intrinsic_global_atomic_add:
165
case nir_intrinsic_global_atomic_imin:
166
case nir_intrinsic_global_atomic_umin:
167
case nir_intrinsic_global_atomic_imax:
168
case nir_intrinsic_global_atomic_umax:
169
case nir_intrinsic_global_atomic_and:
170
case nir_intrinsic_global_atomic_or:
171
case nir_intrinsic_global_atomic_xor:
172
case nir_intrinsic_global_atomic_exchange:
173
case nir_intrinsic_global_atomic_comp_swap:
174
case nir_intrinsic_global_atomic_fadd:
175
case nir_intrinsic_global_atomic_fmin:
176
case nir_intrinsic_global_atomic_fmax:
177
case nir_intrinsic_global_atomic_fcomp_swap:
178
/* just assume we that 24b is not sufficient: */
179
return lower_large_src(&intr->src[0], state);
180
181
/* These should all be small enough to unconditionally use imul24: */
182
case nir_intrinsic_shared_atomic_add:
183
case nir_intrinsic_shared_atomic_imin:
184
case nir_intrinsic_shared_atomic_umin:
185
case nir_intrinsic_shared_atomic_imax:
186
case nir_intrinsic_shared_atomic_umax:
187
case nir_intrinsic_shared_atomic_and:
188
case nir_intrinsic_shared_atomic_or:
189
case nir_intrinsic_shared_atomic_xor:
190
case nir_intrinsic_shared_atomic_exchange:
191
case nir_intrinsic_shared_atomic_comp_swap:
192
case nir_intrinsic_shared_atomic_fadd:
193
case nir_intrinsic_shared_atomic_fmin:
194
case nir_intrinsic_shared_atomic_fmax:
195
case nir_intrinsic_shared_atomic_fcomp_swap:
196
case nir_intrinsic_load_uniform:
197
case nir_intrinsic_load_input:
198
case nir_intrinsic_load_output:
199
case nir_intrinsic_store_output:
200
default:
201
return false;
202
}
203
}
204
205
static bool
206
lower_instr(lower_state *state, nir_instr *instr)
207
{
208
bool progress = false;
209
210
if (instr->type == nir_instr_type_intrinsic) {
211
progress |= lower_intrinsic(state, nir_instr_as_intrinsic(instr));
212
}
213
214
return progress;
215
}
216
217
static bool
218
is_large(lower_state *state, nir_variable *var)
219
{
220
const struct glsl_type *type = glsl_without_array(var->type);
221
unsigned size = state->type_size(type, false);
222
223
/* if size is not known (ie. VLA) then assume the worst: */
224
if (!size)
225
return true;
226
227
return size >= (1 << 23);
228
}
229
230
bool
231
nir_lower_amul(nir_shader *shader,
232
int (*type_size)(const struct glsl_type *, bool))
233
{
234
assert(shader->options->has_imul24);
235
assert(type_size);
236
237
NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0);
238
NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0);
239
240
lower_state state = {
241
.shader = shader,
242
.type_size = type_size,
243
.large_ubos = large_ubos,
244
.large_ssbos = large_ssbos,
245
};
246
247
/* Figure out which UBOs or SSBOs are large enough to be
248
* disqualified from imul24:
249
*/
250
nir_foreach_variable_in_shader (var, shader) {
251
if (var->data.mode == nir_var_mem_ubo) {
252
if (is_large(&state, var)) {
253
state.has_large_ubo = true;
254
unsigned size = MAX2(1, glsl_array_size(var->type));
255
for (unsigned i = 0; i < size; i++)
256
state.large_ubos[var->data.binding + i] = true;
257
}
258
} else if (var->data.mode == nir_var_mem_ssbo) {
259
if (is_large(&state, var)) {
260
state.has_large_ssbo = true;
261
unsigned size = MAX2(1, glsl_array_size(var->type));
262
for (unsigned i = 0; i < size; i++)
263
state.large_ssbos[var->data.binding + i] = true;
264
}
265
}
266
}
267
268
/* clear pass flags: */
269
nir_foreach_function(function, shader) {
270
nir_function_impl *impl = function->impl;
271
if (!impl)
272
continue;
273
274
nir_foreach_block(block, impl) {
275
nir_foreach_instr(instr, block) {
276
instr->pass_flags = 0;
277
}
278
}
279
}
280
281
bool progress = false;
282
nir_foreach_function(function, shader) {
283
nir_function_impl *impl = function->impl;
284
285
if (!impl)
286
continue;
287
288
nir_foreach_block(block, impl) {
289
nir_foreach_instr(instr, block) {
290
progress |= lower_instr(&state, instr);
291
}
292
}
293
}
294
295
/* At this point, all 'amul's used in calculating an offset into
296
* a large variable have been replaced with 'imul'. So remaining
297
* 'amul's can be replaced with 'imul24':
298
*/
299
nir_foreach_function(function, shader) {
300
nir_function_impl *impl = function->impl;
301
302
if (!impl)
303
continue;
304
305
nir_foreach_block(block, impl) {
306
nir_foreach_instr(instr, block) {
307
if (instr->type != nir_instr_type_alu)
308
continue;
309
310
nir_alu_instr *alu = nir_instr_as_alu(instr);
311
if (alu->op != nir_op_amul)
312
continue;
313
314
alu->op = nir_op_imul24;
315
progress |= true;
316
}
317
}
318
319
nir_metadata_preserve(impl, nir_metadata_block_index |
320
nir_metadata_dominance);
321
322
}
323
324
return progress;
325
}
326
327