CoCalc -- vc4_nir_lower

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_nir_lower_io.c
⁴⁵⁷⁰ views
1
/*
2
 * Copyright © 2015 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "vc4_qir.h"
25
#include "compiler/nir/nir_builder.h"
26
#include "util/format/u_format.h"
27
#include "util/u_helpers.h"
28

29
/**
30
 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
31
 * intrinsics into something amenable to the VC4 architecture.
32
 *
33
 * Currently, it splits VS inputs and uniforms into scalars, drops any
34
 * non-position outputs in coordinate shaders, and fixes up the addressing on
35
 * indirect uniform loads.  FS input and VS output scalarization is handled by
36
 * nir_lower_io_to_scalar().
37
 */
38

39
static void
40
replace_intrinsic_with_vec(nir_builder *b, nir_intrinsic_instr *intr,
41
                           nir_ssa_def **comps)
42
{
43

44
        /* Batch things back together into a vector.  This will get split by
45
         * the later ALU scalarization pass.
46
         */
47
        nir_ssa_def *vec = nir_vec(b, comps, intr->num_components);
48

49
        /* Replace the old intrinsic with a reference to our reconstructed
50
         * vector.
51
         */
52
        nir_ssa_def_rewrite_uses(&intr->dest.ssa, vec);
53
        nir_instr_remove(&intr->instr);
54
}
55

56
static nir_ssa_def *
57
vc4_nir_unpack_8i(nir_builder *b, nir_ssa_def *src, unsigned chan)
58
{
59
        return nir_ubitfield_extract(b,
60
                                     src,
61
                                     nir_imm_int(b, 8 * chan),
62
                                     nir_imm_int(b, 8));
63
}
64

65
/** Returns the 16 bit field as a sign-extended 32-bit value. */
66
static nir_ssa_def *
67
vc4_nir_unpack_16i(nir_builder *b, nir_ssa_def *src, unsigned chan)
68
{
69
        return nir_ibitfield_extract(b,
70
                                     src,
71
                                     nir_imm_int(b, 16 * chan),
72
                                     nir_imm_int(b, 16));
73
}
74

75
/** Returns the 16 bit field as an unsigned 32 bit value. */
76
static nir_ssa_def *
77
vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
78
{
79
        if (chan == 0) {
80
                return nir_iand(b, src, nir_imm_int(b, 0xffff));
81
        } else {
82
                return nir_ushr(b, src, nir_imm_int(b, 16));
83
        }
84
}
85

86
static nir_ssa_def *
87
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
88
{
89
        return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan);
90
}
91

92
static nir_ssa_def *
93
vc4_nir_get_vattr_channel_vpm(struct vc4_compile *c,
94
                              nir_builder *b,
95
                              nir_ssa_def **vpm_reads,
96
                              uint8_t swiz,
97
                              const struct util_format_description *desc)
98
{
99
        const struct util_format_channel_description *chan =
100
                &desc->channel[swiz];
101
        nir_ssa_def *temp;
102

103
        if (swiz > PIPE_SWIZZLE_W) {
104
                return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
105
        } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_FLOAT) {
106
                return vc4_nir_get_swizzled_channel(b, vpm_reads, swiz);
107
        } else if (chan->size == 32 && chan->type == UTIL_FORMAT_TYPE_SIGNED) {
108
                if (chan->normalized) {
109
                        return nir_fmul(b,
110
                                        nir_i2f32(b, vpm_reads[swiz]),
111
                                        nir_imm_float(b,
112
                                                      1.0 / 0x7fffffff));
113
                } else {
114
                        return nir_i2f32(b, vpm_reads[swiz]);
115
                }
116
        } else if (chan->size == 8 &&
117
                   (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
118
                    chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
119
                nir_ssa_def *vpm = vpm_reads[0];
120
                if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
121
                        temp = nir_ixor(b, vpm, nir_imm_int(b, 0x80808080));
122
                        if (chan->normalized) {
123
                                return nir_fsub(b, nir_fmul(b,
124
                                                            vc4_nir_unpack_8f(b, temp, swiz),
125
                                                            nir_imm_float(b, 2.0)),
126
                                                nir_imm_float(b, 1.0));
127
                        } else {
128
                                return nir_fadd(b,
129
                                                nir_i2f32(b,
130
                                                          vc4_nir_unpack_8i(b, temp,
131
                                                                            swiz)),
132
                                                nir_imm_float(b, -128.0));
133
                        }
134
                } else {
135
                        if (chan->normalized) {
136
                                return vc4_nir_unpack_8f(b, vpm, swiz);
137
                        } else {
138
                                return nir_i2f32(b, vc4_nir_unpack_8i(b, vpm, swiz));
139
                        }
140
                }
141
        } else if (chan->size == 16 &&
142
                   (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
143
                    chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
144
                nir_ssa_def *vpm = vpm_reads[swiz / 2];
145

146
                /* Note that UNPACK_16F eats a half float, not ints, so we use
147
                 * UNPACK_16_I for all of these.
148
                 */
149
                if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
150
                        temp = nir_i2f32(b, vc4_nir_unpack_16i(b, vpm, swiz & 1));
151
                        if (chan->normalized) {
152
                                return nir_fmul(b, temp,
153
                                                nir_imm_float(b, 1/32768.0f));
154
                        } else {
155
                                return temp;
156
                        }
157
                } else {
158
                        temp = nir_i2f32(b, vc4_nir_unpack_16u(b, vpm, swiz & 1));
159
                        if (chan->normalized) {
160
                                return nir_fmul(b, temp,
161
                                                nir_imm_float(b, 1 / 65535.0));
162
                        } else {
163
                                return temp;
164
                        }
165
                }
166
        } else {
167
                return NULL;
168
        }
169
}
170

171
static void
172
vc4_nir_lower_vertex_attr(struct vc4_compile *c, nir_builder *b,
173
                          nir_intrinsic_instr *intr)
174
{
175
        b->cursor = nir_before_instr(&intr->instr);
176

177
        int attr = nir_intrinsic_base(intr);
178
        enum pipe_format format = c->vs_key->attr_formats[attr];
179
        uint32_t attr_size = util_format_get_blocksize(format);
180

181
        /* We only accept direct outputs and TGSI only ever gives them to us
182
         * with an offset value of 0.
183
         */
184
        assert(nir_src_as_uint(intr->src[0]) == 0);
185

186
        /* Generate dword loads for the VPM values (Since these intrinsics may
187
         * be reordered, the actual reads will be generated at the top of the
188
         * shader by ntq_setup_inputs().
189
         */
190
        nir_ssa_def *vpm_reads[4];
191
        for (int i = 0; i < align(attr_size, 4) / 4; i++)
192
                vpm_reads[i] = nir_load_input(b, 1, 32, nir_imm_int(b, 0),
193
                                              .base = nir_intrinsic_base(intr),
194
                                              .component = i);
195

196
        bool format_warned = false;
197
        const struct util_format_description *desc =
198
                util_format_description(format);
199

200
        nir_ssa_def *dests[4];
201
        for (int i = 0; i < intr->num_components; i++) {
202
                uint8_t swiz = desc->swizzle[i];
203
                dests[i] = vc4_nir_get_vattr_channel_vpm(c, b, vpm_reads, swiz,
204
                                                         desc);
205

206
                if (!dests[i]) {
207
                        if (!format_warned) {
208
                                fprintf(stderr,
209
                                        "vtx element %d unsupported type: %s\n",
210
                                        attr, util_format_name(format));
211
                                format_warned = true;
212
                        }
213
                        dests[i] = nir_imm_float(b, 0.0);
214
                }
215
        }
216

217
        replace_intrinsic_with_vec(b, intr, dests);
218
}
219

220
static void
221
vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
222
                       nir_intrinsic_instr *intr)
223
{
224
        b->cursor = nir_after_instr(&intr->instr);
225

226
        if (nir_intrinsic_base(intr) >= VC4_NIR_TLB_COLOR_READ_INPUT &&
227
            nir_intrinsic_base(intr) < (VC4_NIR_TLB_COLOR_READ_INPUT +
228
                                        VC4_MAX_SAMPLES)) {
229
                /* This doesn't need any lowering. */
230
                return;
231
        }
232

233
        nir_variable *input_var =
234
                nir_find_variable_with_driver_location(c->s, nir_var_shader_in,
235
                                                       nir_intrinsic_base(intr));
236
        assert(input_var);
237

238
        int comp = nir_intrinsic_component(intr);
239

240
        /* Lower away point coordinates, and fix up PNTC. */
241
        if (util_varying_is_point_coord(input_var->data.location,
242
                                        c->fs_key->point_sprite_mask)) {
243
                assert(intr->num_components == 1);
244

245
                nir_ssa_def *result = &intr->dest.ssa;
246

247
                switch (comp) {
248
                case 0:
249
                case 1:
250
                        /* If we're not rendering points, we need to set a
251
                         * defined value for the input that would come from
252
                         * PNTC.
253
                         */
254
                        if (!c->fs_key->is_points)
255
                                result = nir_imm_float(b, 0.0);
256
                        break;
257
                case 2:
258
                        result = nir_imm_float(b, 0.0);
259
                        break;
260
                case 3:
261
                        result = nir_imm_float(b, 1.0);
262
                        break;
263
                }
264

265
                if (c->fs_key->point_coord_upper_left && comp == 1)
266
                        result = nir_fsub(b, nir_imm_float(b, 1.0), result);
267

268
                if (result != &intr->dest.ssa) {
269
                        nir_ssa_def_rewrite_uses_after(&intr->dest.ssa,
270
                                                       result,
271
                                                       result->parent_instr);
272
                }
273
        }
274
}
275

276
static void
277
vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
278
                     nir_intrinsic_instr *intr)
279
{
280
        nir_variable *output_var =
281
                nir_find_variable_with_driver_location(c->s, nir_var_shader_out,
282
                                                       nir_intrinsic_base(intr));
283
        assert(output_var);
284

285
        if (c->stage == QSTAGE_COORD &&
286
            output_var->data.location != VARYING_SLOT_POS &&
287
            output_var->data.location != VARYING_SLOT_PSIZ) {
288
                nir_instr_remove(&intr->instr);
289
                return;
290
        }
291
}
292

293
static void
294
vc4_nir_lower_uniform(struct vc4_compile *c, nir_builder *b,
295
                      nir_intrinsic_instr *intr)
296
{
297
        b->cursor = nir_before_instr(&intr->instr);
298

299
        /* Generate scalar loads equivalent to the original vector. */
300
        nir_ssa_def *dests[4];
301
        for (unsigned i = 0; i < intr->num_components; i++) {
302
                nir_intrinsic_instr *intr_comp =
303
                        nir_intrinsic_instr_create(c->s, intr->intrinsic);
304
                intr_comp->num_components = 1;
305
                nir_ssa_dest_init(&intr_comp->instr, &intr_comp->dest, 1,
306
                                  intr->dest.ssa.bit_size, NULL);
307

308
                /* Convert the uniform offset to bytes.  If it happens
309
                 * to be a constant, constant-folding will clean up
310
                 * the shift for us.
311
                 */
312
                nir_intrinsic_set_base(intr_comp,
313
                                       nir_intrinsic_base(intr) * 16 +
314
                                       i * 4);
315
                nir_intrinsic_set_range(intr_comp,
316
                                        nir_intrinsic_range(intr) * 16 - i * 4);
317

318
                intr_comp->src[0] =
319
                        nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
320
                                                 nir_imm_int(b, 4)));
321

322
                dests[i] = &intr_comp->dest.ssa;
323

324
                nir_builder_instr_insert(b, &intr_comp->instr);
325
        }
326

327
        replace_intrinsic_with_vec(b, intr, dests);
328
}
329

330
static void
331
vc4_nir_lower_io_instr(struct vc4_compile *c, nir_builder *b,
332
                       struct nir_instr *instr)
333
{
334
        if (instr->type != nir_instr_type_intrinsic)
335
                return;
336
        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
337

338
        switch (intr->intrinsic) {
339
        case nir_intrinsic_load_input:
340
                if (c->stage == QSTAGE_FRAG)
341
                        vc4_nir_lower_fs_input(c, b, intr);
342
                else
343
                        vc4_nir_lower_vertex_attr(c, b, intr);
344
                break;
345

346
        case nir_intrinsic_store_output:
347
                vc4_nir_lower_output(c, b, intr);
348
                break;
349

350
        case nir_intrinsic_load_uniform:
351
                vc4_nir_lower_uniform(c, b, intr);
352
                break;
353

354
        case nir_intrinsic_load_user_clip_plane:
355
        default:
356
                break;
357
        }
358
}
359

360
static bool
361
vc4_nir_lower_io_impl(struct vc4_compile *c, nir_function_impl *impl)
362
{
363
        nir_builder b;
364
        nir_builder_init(&b, impl);
365

366
        nir_foreach_block(block, impl) {
367
                nir_foreach_instr_safe(instr, block)
368
                        vc4_nir_lower_io_instr(c, &b, instr);
369
        }
370

371
        nir_metadata_preserve(impl, nir_metadata_block_index |
372
                              nir_metadata_dominance);
373

374
        return true;
375
}
376

377
void
378
vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c)
379
{
380
        nir_foreach_function(function, s) {
381
                if (function->impl)
382
                        vc4_nir_lower_io_impl(c, function->impl);
383
        }
384
}
385

386
Product

Resources

Company