CoCalc -- mir_promote

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/midgard/mir_promote_uniforms.c
⁴⁵⁶⁴ views
1
/*
2
 * Copyright (C) 2019 Collabora, Ltd.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors (Collabora):
24
 *   Alyssa Rosenzweig <[email protected]>
25
 */
26

27
#include "compiler.h"
28
#include "util/u_math.h"
29
#include "util/u_memory.h"
30

31
/* This pass promotes reads from UBOs to register-mapped uniforms.  This saves
32
 * both instructions and work register pressure, but it reduces the work
33
 * registers available, requiring a balance.
34
 *
35
 * We use a heuristic to determine the ideal count, implemented by
36
 * mir_work_heuristic, which returns the ideal number of work registers.
37
 */
38

39
static bool
40
mir_is_ubo(midgard_instruction *ins)
41
{
42
        return (ins->type == TAG_LOAD_STORE_4) &&
43
                (OP_IS_UBO_READ(ins->op));
44
}
45

46
static bool
47
mir_is_direct_aligned_ubo(midgard_instruction *ins)
48
{
49
        return mir_is_ubo(ins) &&
50
                !(ins->constants.u32[0] & 0xF) &&
51
                (ins->src[1] == ~0) &&
52
                (ins->src[2] == ~0);
53
}
54

55
/* Represents use data for a single UBO */
56

57
#define MAX_UBO_QWORDS (65536 / 16)
58

59
struct mir_ubo_block {
60
        BITSET_DECLARE(uses, MAX_UBO_QWORDS);
61
        BITSET_DECLARE(pushed, MAX_UBO_QWORDS);
62
};
63

64
struct mir_ubo_analysis {
65
        /* Per block analysis */
66
        unsigned nr_blocks;
67
        struct mir_ubo_block *blocks;
68
};
69

70
static struct mir_ubo_analysis
71
mir_analyze_ranges(compiler_context *ctx)
72
{
73
        struct mir_ubo_analysis res = {
74
                .nr_blocks = ctx->nir->info.num_ubos + 1,
75
        };
76

77
        res.blocks = calloc(res.nr_blocks, sizeof(struct mir_ubo_block));
78

79
        mir_foreach_instr_global(ctx, ins) {
80
                if (!mir_is_direct_aligned_ubo(ins)) continue;
81

82
                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
83
                unsigned offset = ins->constants.u32[0] / 16;
84

85
                assert(ubo < res.nr_blocks);
86

87
                if (offset < MAX_UBO_QWORDS)
88
                        BITSET_SET(res.blocks[ubo].uses, offset);
89
        }
90

91
        return res;
92
}
93

94
/* Select UBO words to push. A sophisticated implementation would consider the
95
 * number of uses and perhaps the control flow to estimate benefit. This is not
96
 * sophisticated. Select from the last UBO first to prioritize sysvals. */
97

98
static void
99
mir_pick_ubo(struct panfrost_ubo_push *push, struct mir_ubo_analysis *analysis, unsigned max_qwords)
100
{
101
        unsigned max_words = MIN2(PAN_MAX_PUSH, max_qwords * 4);
102

103
        for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {
104
                struct mir_ubo_block *block = &analysis->blocks[ubo];
105

106
                unsigned vec4;
107
                BITSET_FOREACH_SET(vec4, block->uses, MAX_UBO_QWORDS) {
108
                        /* Don't push more than possible */
109
                        if (push->count > max_words - 4)
110
                                return;
111

112
                        for (unsigned offs = 0; offs < 4; ++offs) {
113
                                struct panfrost_ubo_word word = {
114
                                        .ubo = ubo,
115
                                        .offset = (vec4 * 16) + (offs * 4)
116
                                };
117

118
                                push->words[push->count++] = word;
119
                        }
120

121
                        /* Mark it as pushed so we can rewrite */
122
                        BITSET_SET(block->pushed, vec4);
123
                }
124
        }
125
}
126

127
#if 0
128
static void
129
mir_dump_ubo_analysis(struct mir_ubo_analysis *res)
130
{
131
        printf("%u blocks\n", res->nr_blocks);
132

133
        for (unsigned i = 0; i < res->nr_blocks; ++i) {
134
                BITSET_WORD *uses = res->blocks[i].uses;
135
                BITSET_WORD *push = res->blocks[i].pushed;
136

137
                unsigned last = BITSET_LAST_BIT_SIZED(uses, BITSET_WORDS(MAX_UBO_QWORDS));
138

139
                printf("\t");
140

141
                for (unsigned j = 0; j < last; ++j) {
142
                        bool used = BITSET_TEST(uses, j);
143
                        bool pushed = BITSET_TEST(push, j);
144
                        assert(used || !pushed);
145

146
                        putchar(pushed ? '*' : used ? '-' : '_');
147
                }
148

149
                printf("\n");
150
        }
151
}
152
#endif
153

154
static unsigned
155
mir_promoteable_uniform_count(struct mir_ubo_analysis *analysis)
156
{
157
        unsigned count = 0;
158

159
        for (unsigned i = 0; i < analysis->nr_blocks; ++i) {
160
                BITSET_WORD *uses = analysis->blocks[i].uses;
161

162
                for (unsigned w = 0; w < BITSET_WORDS(MAX_UBO_QWORDS); ++w)
163
                        count += util_bitcount(uses[w]);
164
        }
165

166
        return count;
167
}
168

169
static unsigned
170
mir_count_live(uint16_t *live, unsigned temp_count)
171
{
172
        unsigned count = 0;
173

174
        for (unsigned i = 0; i < temp_count; ++i)
175
                count += util_bitcount(live[i]);
176

177
        return count;
178
}
179

180
static unsigned
181
mir_estimate_pressure(compiler_context *ctx)
182
{
183
        mir_invalidate_liveness(ctx);
184
        mir_compute_liveness(ctx);
185

186
        unsigned max_live = 0;
187

188
        mir_foreach_block(ctx, _block) {
189
                midgard_block *block = (midgard_block *) _block;
190
                uint16_t *live = mem_dup(block->base.live_out, ctx->temp_count * sizeof(uint16_t));
191

192
                mir_foreach_instr_in_block_rev(block, ins) {
193
                        unsigned count = mir_count_live(live, ctx->temp_count);
194
                        max_live = MAX2(max_live, count);
195
                        mir_liveness_ins_update(live, ins, ctx->temp_count);
196
                }
197

198
                free(live);
199
        }
200

201
        return DIV_ROUND_UP(max_live, 16);
202
}
203

204
static unsigned
205
mir_work_heuristic(compiler_context *ctx, struct mir_ubo_analysis *analysis)
206
{
207
        unsigned uniform_count = mir_promoteable_uniform_count(analysis);
208

209
        /* If there are 8 or fewer uniforms, it doesn't matter what we do, so
210
         * allow as many work registers as needed */
211

212
        if (uniform_count <= 8)
213
                return 16;
214

215
        /* Otherwise, estimate the register pressure */
216

217
        unsigned pressure = mir_estimate_pressure(ctx);
218

219
        /* Prioritize not spilling above all else. The relation between the
220
         * pressure estimate and the actual register pressure is a little
221
         * murkier than we might like (due to scheduling, pipeline registers,
222
         * failure to pack vector registers, load/store registers, texture
223
         * registers...), hence why this is a heuristic parameter */
224

225
        if (pressure > 6)
226
                return 16;
227

228
        /* If there's no chance of spilling, prioritize UBOs and thread count */
229

230
        return 8;
231
}
232

233
/* Bitset of indices that will be used as a special register -- inputs to a
234
 * non-ALU op. We precompute this set so that testing is efficient, otherwise
235
 * we end up O(mn) behaviour for n instructions and m uniform reads */
236

237
static BITSET_WORD *
238
mir_special_indices(compiler_context *ctx)
239
{
240
        mir_compute_temp_count(ctx);
241
        BITSET_WORD *bset = calloc(BITSET_WORDS(ctx->temp_count), sizeof(BITSET_WORD));
242

243
        mir_foreach_instr_global(ctx, ins) {
244
                /* Look for special instructions */
245
                bool is_ldst = ins->type == TAG_LOAD_STORE_4;
246
                bool is_tex = ins->type == TAG_TEXTURE_4;
247
                bool is_writeout = ins->compact_branch && ins->writeout;
248

249
                if (!(is_ldst || is_tex || is_writeout))
250
                        continue;
251

252
                /* Anything read by a special instruction is itself special */
253
                mir_foreach_src(ins, i) {
254
                        unsigned idx = ins->src[i];
255

256
                        if (idx < ctx->temp_count)
257
                                BITSET_SET(bset, idx);
258
                }
259
        }
260

261
        return bset;
262
}
263

264
void
265
midgard_promote_uniforms(compiler_context *ctx)
266
{
267
        if (ctx->inputs->no_ubo_to_push) {
268
                /* If nothing is pushed, all UBOs need to be uploaded
269
                 * conventionally */
270
                ctx->ubo_mask = ~0;
271
                return;
272
        }
273

274
        struct mir_ubo_analysis analysis = mir_analyze_ranges(ctx);
275

276
        unsigned work_count = mir_work_heuristic(ctx, &analysis);
277
        unsigned promoted_count = 24 - work_count;
278

279
        /* Ensure we are 16 byte aligned to avoid underallocations */
280
        mir_pick_ubo(&ctx->info->push, &analysis, promoted_count);
281
        ctx->info->push.count = ALIGN_POT(ctx->info->push.count, 4);
282

283
        /* First, figure out special indices a priori so we don't recompute a lot */
284
        BITSET_WORD *special = mir_special_indices(ctx);
285

286
        ctx->ubo_mask = 0;
287

288
        mir_foreach_instr_global_safe(ctx, ins) {
289
                if (!mir_is_ubo(ins)) continue;
290

291
                unsigned ubo = midgard_unpack_ubo_index_imm(ins->load_store);
292
                unsigned qword = ins->constants.u32[0] / 16;
293

294
                if (!mir_is_direct_aligned_ubo(ins)) {
295
                        if (ins->src[1] == ~0)
296
                                ctx->ubo_mask |= BITSET_BIT(ubo);
297
                        else
298
                                ctx->ubo_mask = ~0;
299

300
                        continue;
301
                }
302

303
                /* Check if we decided to push this */
304
                assert(ubo < analysis.nr_blocks);
305
                if (!BITSET_TEST(analysis.blocks[ubo].pushed, qword)) {
306
                        ctx->ubo_mask |= BITSET_BIT(ubo);
307
                        continue;
308
                }
309

310
                /* Find where we pushed to, TODO: unaligned pushes to pack */
311
                unsigned base = pan_lookup_pushed_ubo(&ctx->info->push, ubo, qword * 16);
312
                assert((base & 0x3) == 0);
313

314
                unsigned address = base / 4;
315
                unsigned uniform_reg = 23 - address;
316

317
                /* Should've taken into account when pushing */
318
                assert(address < promoted_count);
319
                unsigned promoted = SSA_FIXED_REGISTER(uniform_reg);
320

321
                /* We do need the move for safety for a non-SSA dest, or if
322
                 * we're being fed into a special class */
323

324
                bool needs_move = ins->dest & PAN_IS_REG || ins->dest == ctx->blend_src1;
325

326
                if (ins->dest < ctx->temp_count)
327
                        needs_move |= BITSET_TEST(special, ins->dest);
328

329
                if (needs_move) {
330
                        unsigned type_size = nir_alu_type_get_type_size(ins->dest_type);
331
                        midgard_instruction mov = v_mov(promoted, ins->dest);
332
                        mov.dest_type = nir_type_uint | type_size;
333
                        mov.src_types[1] = mov.dest_type;
334

335
                        uint16_t rounded = mir_round_bytemask_up(mir_bytemask(ins), type_size);
336
                        mir_set_bytemask(&mov, rounded);
337
                        mir_insert_instruction_before(ctx, ins, mov);
338
                } else {
339
                        mir_rewrite_index_src(ctx, ins->dest, promoted);
340
                }
341

342
                mir_remove_instruction(ins);
343
        }
344

345
        free(special);
346
        free(analysis.blocks);
347
}
348

349
Product

Resources

Company