CoCalc -- nir_lower

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/nir/nir_lower_amul.c
⁴⁵⁴⁹ views
1
/*
2
 * Copyright © 2019 Google, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 */
23

24
#include "nir.h"
25
#include "nir_vla.h"
26

27
/* Lowering for amul instructions, for drivers that support imul24.
28
 * This pass will analyze indirect derefs, and convert corresponding
29
 * amul instructions to either imul or imul24, depending on the
30
 * required range.
31
 *
32
 * 1) Analyze the uniform variables and build a table of UBOs and SSBOs
33
 *    that are either too large, or might be too large (unknown size)
34
 *    for imul24
35
 *
36
 * 2) Loop thru looking at all the intrinsics, finding dereferences of
37
 *    large variables, and recursively replacing all amul instructions
38
 *    used with imul
39
 *
40
 * 3) Finally loop again thru all instructions replacing any remaining
41
 *    amul with imul24.  At this point any remaining amul instructions
42
 *    are not involved in calculating an offset into a large variable,
43
 *    thanks to the 2nd step, so they can be safely replace with imul24.
44
 *
45
 * Using two passes over all the instructions lets us handle the case
46
 * where, due to CSE, an amul is used to calculate an offset into both
47
 * a large and small variable.
48
 */
49

50
typedef struct {
51
   nir_shader *shader;
52

53
   int (*type_size)(const struct glsl_type *, bool);
54

55
   /* Tables of UBOs and SSBOs mapping driver_location/base whether
56
    * they are too large to use imul24:
57
    */
58
   bool *large_ubos;
59
   bool *large_ssbos;
60

61
   /* for cases that we cannot determine UBO/SSBO index, track if *any*
62
    * UBO/SSBO is too large for imul24:
63
    */
64
   bool has_large_ubo;
65
   bool has_large_ssbo;
66

67
   unsigned max_slot;
68
} lower_state;
69

70
/* Lower 'amul's in offset src of large variables to 'imul': */
71
static bool
72
lower_large_src(nir_src *src, void *s)
73
{
74
   lower_state *state = s;
75

76
   assert(src->is_ssa);
77

78
   nir_instr *parent = src->ssa->parent_instr;
79

80
   /* No need to visit instructions we've already visited.. this also
81
    * avoids infinite recursion when phi's are involved:
82
    */
83
   if (parent->pass_flags)
84
      return false;
85

86
   bool progress = nir_foreach_src(parent, lower_large_src, state);
87

88
   if (parent->type == nir_instr_type_alu) {
89
      nir_alu_instr *alu = nir_instr_as_alu(parent);
90
      if (alu->op == nir_op_amul) {
91
         alu->op = nir_op_imul;
92
         progress = true;
93
      }
94
   }
95

96
   parent->pass_flags = 1;
97

98
   return progress;
99
}
100

101
static bool
102
large_ubo(lower_state *state, nir_src src)
103
{
104
   if (!nir_src_is_const(src))
105
      return state->has_large_ubo;
106
   unsigned idx = nir_src_as_uint(src);
107
   assert(idx < state->shader->info.num_ubos);
108
   return state->large_ubos[idx];
109
}
110

111
static bool
112
large_ssbo(lower_state *state, nir_src src)
113
{
114
   if (!nir_src_is_const(src))
115
      return state->has_large_ssbo;
116
   unsigned idx = nir_src_as_uint(src);
117
   assert(idx < state->shader->info.num_ssbos);
118
   return state->large_ssbos[idx];
119
}
120

121
static bool
122
lower_intrinsic(lower_state *state, nir_intrinsic_instr *intr)
123
{
124
   switch (intr->intrinsic) {
125
   case nir_intrinsic_load_ubo:
126
      //# src[] = { buffer_index, offset }.
127
      if (large_ubo(state, intr->src[0]))
128
         return lower_large_src(&intr->src[1], state);
129
      return false;
130

131
   case nir_intrinsic_load_ssbo:
132
      //# src[] = { buffer_index, offset }.
133
      if (large_ssbo(state, intr->src[0]))
134
         return lower_large_src(&intr->src[1], state);
135
      return false;
136

137
   case nir_intrinsic_store_ssbo:
138
      //# src[] = { value, block_index, offset }
139
      if (large_ssbo(state, intr->src[1]))
140
         return lower_large_src(&intr->src[2], state);
141
      return false;
142

143
   case nir_intrinsic_ssbo_atomic_add:
144
   case nir_intrinsic_ssbo_atomic_imin:
145
   case nir_intrinsic_ssbo_atomic_umin:
146
   case nir_intrinsic_ssbo_atomic_imax:
147
   case nir_intrinsic_ssbo_atomic_umax:
148
   case nir_intrinsic_ssbo_atomic_and:
149
   case nir_intrinsic_ssbo_atomic_or:
150
   case nir_intrinsic_ssbo_atomic_xor:
151
   case nir_intrinsic_ssbo_atomic_exchange:
152
   case nir_intrinsic_ssbo_atomic_comp_swap:
153
   case nir_intrinsic_ssbo_atomic_fadd:
154
   case nir_intrinsic_ssbo_atomic_fmin:
155
   case nir_intrinsic_ssbo_atomic_fmax:
156
   case nir_intrinsic_ssbo_atomic_fcomp_swap:
157
      /* 0: SSBO index
158
       * 1: offset
159
       */
160
      if (large_ssbo(state, intr->src[0]))
161
         return lower_large_src(&intr->src[1], state);
162
      return false;
163

164
   case nir_intrinsic_global_atomic_add:
165
   case nir_intrinsic_global_atomic_imin:
166
   case nir_intrinsic_global_atomic_umin:
167
   case nir_intrinsic_global_atomic_imax:
168
   case nir_intrinsic_global_atomic_umax:
169
   case nir_intrinsic_global_atomic_and:
170
   case nir_intrinsic_global_atomic_or:
171
   case nir_intrinsic_global_atomic_xor:
172
   case nir_intrinsic_global_atomic_exchange:
173
   case nir_intrinsic_global_atomic_comp_swap:
174
   case nir_intrinsic_global_atomic_fadd:
175
   case nir_intrinsic_global_atomic_fmin:
176
   case nir_intrinsic_global_atomic_fmax:
177
   case nir_intrinsic_global_atomic_fcomp_swap:
178
      /* just assume we that 24b is not sufficient: */
179
      return lower_large_src(&intr->src[0], state);
180

181
   /* These should all be small enough to unconditionally use imul24: */
182
   case nir_intrinsic_shared_atomic_add:
183
   case nir_intrinsic_shared_atomic_imin:
184
   case nir_intrinsic_shared_atomic_umin:
185
   case nir_intrinsic_shared_atomic_imax:
186
   case nir_intrinsic_shared_atomic_umax:
187
   case nir_intrinsic_shared_atomic_and:
188
   case nir_intrinsic_shared_atomic_or:
189
   case nir_intrinsic_shared_atomic_xor:
190
   case nir_intrinsic_shared_atomic_exchange:
191
   case nir_intrinsic_shared_atomic_comp_swap:
192
   case nir_intrinsic_shared_atomic_fadd:
193
   case nir_intrinsic_shared_atomic_fmin:
194
   case nir_intrinsic_shared_atomic_fmax:
195
   case nir_intrinsic_shared_atomic_fcomp_swap:
196
   case nir_intrinsic_load_uniform:
197
   case nir_intrinsic_load_input:
198
   case nir_intrinsic_load_output:
199
   case nir_intrinsic_store_output:
200
   default:
201
      return false;
202
   }
203
}
204

205
static bool
206
lower_instr(lower_state *state, nir_instr *instr)
207
{
208
   bool progress = false;
209

210
   if (instr->type == nir_instr_type_intrinsic) {
211
      progress |= lower_intrinsic(state, nir_instr_as_intrinsic(instr));
212
   }
213

214
   return progress;
215
}
216

217
static bool
218
is_large(lower_state *state, nir_variable *var)
219
{
220
   const struct glsl_type *type = glsl_without_array(var->type);
221
   unsigned size = state->type_size(type, false);
222

223
   /* if size is not known (ie. VLA) then assume the worst: */
224
   if (!size)
225
      return true;
226

227
   return size >= (1 << 23);
228
}
229

230
bool
231
nir_lower_amul(nir_shader *shader,
232
               int (*type_size)(const struct glsl_type *, bool))
233
{
234
   assert(shader->options->has_imul24);
235
   assert(type_size);
236

237
   NIR_VLA_FILL(bool, large_ubos, shader->info.num_ubos, 0);
238
   NIR_VLA_FILL(bool, large_ssbos, shader->info.num_ssbos, 0);
239

240
   lower_state state = {
241
      .shader = shader,
242
      .type_size = type_size,
243
      .large_ubos = large_ubos,
244
      .large_ssbos = large_ssbos,
245
   };
246

247
   /* Figure out which UBOs or SSBOs are large enough to be
248
    * disqualified from imul24:
249
    */
250
   nir_foreach_variable_in_shader (var, shader) {
251
      if (var->data.mode == nir_var_mem_ubo) {
252
         if (is_large(&state, var)) {
253
            state.has_large_ubo = true;
254
            unsigned size = MAX2(1, glsl_array_size(var->type));
255
            for (unsigned i = 0; i < size; i++)
256
               state.large_ubos[var->data.binding + i] = true;
257
         }
258
      } else if (var->data.mode == nir_var_mem_ssbo) {
259
         if (is_large(&state, var)) {
260
            state.has_large_ssbo = true;
261
            unsigned size = MAX2(1, glsl_array_size(var->type));
262
            for (unsigned i = 0; i < size; i++)
263
               state.large_ssbos[var->data.binding + i] = true;
264
         }
265
      }
266
   }
267

268
   /* clear pass flags: */
269
   nir_foreach_function(function, shader) {
270
      nir_function_impl *impl = function->impl;
271
      if (!impl)
272
         continue;
273

274
      nir_foreach_block(block, impl) {
275
         nir_foreach_instr(instr, block) {
276
            instr->pass_flags = 0;
277
         }
278
      }
279
   }
280

281
   bool progress = false;
282
   nir_foreach_function(function, shader) {
283
      nir_function_impl *impl = function->impl;
284

285
      if (!impl)
286
         continue;
287

288
      nir_foreach_block(block, impl) {
289
         nir_foreach_instr(instr, block) {
290
            progress |= lower_instr(&state, instr);
291
         }
292
      }
293
   }
294

295
   /* At this point, all 'amul's used in calculating an offset into
296
    * a large variable have been replaced with 'imul'.  So remaining
297
    * 'amul's can be replaced with 'imul24':
298
    */
299
   nir_foreach_function(function, shader) {
300
      nir_function_impl *impl = function->impl;
301

302
      if (!impl)
303
         continue;
304

305
      nir_foreach_block(block, impl) {
306
         nir_foreach_instr(instr, block) {
307
            if (instr->type != nir_instr_type_alu)
308
               continue;
309

310
            nir_alu_instr *alu = nir_instr_as_alu(instr);
311
            if (alu->op != nir_op_amul)
312
               continue;
313

314
            alu->op = nir_op_imul24;
315
            progress |= true;
316
         }
317
      }
318

319
      nir_metadata_preserve(impl, nir_metadata_block_index |
320
                                  nir_metadata_dominance);
321

322
   }
323

324
   return progress;
325
}
326

327
Product

Resources

Company