CoCalc -- midgard

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/midgard/midgard_emit.c
⁴⁵⁶⁴ views
1
/*
2
 * Copyright (C) 2018-2019 Alyssa Rosenzweig <[email protected]>
3
 * Copyright (C) 2019-2020 Collabora, Ltd.
4
 *
5
 * Permission is hereby granted, free of charge, to any person obtaining a
6
 * copy of this software and associated documentation files (the "Software"),
7
 * to deal in the Software without restriction, including without limitation
8
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
 * and/or sell copies of the Software, and to permit persons to whom the
10
 * Software is furnished to do so, subject to the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the next
13
 * paragraph) shall be included in all copies or substantial portions of the
14
 * Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
 * SOFTWARE.
23
 */
24

25
#include "compiler.h"
26
#include "midgard_ops.h"
27
#include "midgard_quirks.h"
28

29
static midgard_int_mod
30
mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
31
{
32
        if (!half) {
33
                assert(!shift);
34
                /* Doesn't matter, src mods are only used when expanding */
35
                return midgard_int_sign_extend;
36
        }
37

38
        if (shift)
39
                return midgard_int_left_shift;
40

41
        if (nir_alu_type_get_base_type(T) == nir_type_int)
42
                return midgard_int_sign_extend;
43
        else
44
                return midgard_int_zero_extend;
45
}
46

47
void
48
midgard_pack_ubo_index_imm(midgard_load_store_word *word, unsigned index)
49
{
50
        word->arg_comp = index & 0x3;
51
        word->arg_reg = (index >> 2) & 0x7;
52
        word->bitsize_toggle = (index >> 5) & 0x1;
53
        word->index_format = (index >> 6) & 0x3;
54
}
55

56
unsigned
57
midgard_unpack_ubo_index_imm(midgard_load_store_word word)
58
{
59
        unsigned ubo = word.arg_comp |
60
                       (word.arg_reg << 2)  |
61
                       (word.bitsize_toggle << 5) |
62
                       (word.index_format << 6);
63

64
        return ubo;
65
}
66

67
void midgard_pack_varying_params(midgard_load_store_word *word, midgard_varying_params p)
68
{
69
        /* Currently these parameters are not supported. */
70
        assert(p.direct_sample_pos_x == 0 && p.direct_sample_pos_y == 0);
71

72
        unsigned u;
73
        memcpy(&u, &p, sizeof(p));
74

75
        word->signed_offset |= u & 0x1FF;
76
}
77

78
midgard_varying_params midgard_unpack_varying_params(midgard_load_store_word word)
79
{
80
        unsigned params = word.signed_offset & 0x1FF;
81

82
        midgard_varying_params p;
83
        memcpy(&p, &params, sizeof(p));
84

85
        return p;
86
}
87

88
unsigned
89
mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
90
{
91
        bool integer = midgard_is_integer_op(ins->op);
92
        unsigned base_size = max_bitsize_for_alu(ins);
93
        unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
94
        bool half = (sz == (base_size >> 1));
95

96
        return integer ?
97
                mir_get_imod(ins->src_shift[i], ins->src_types[i], half, scalar) :
98
                ((ins->src_abs[i] << 0) |
99
                 ((ins->src_neg[i] << 1)));
100
}
101

102
/* Midgard IR only knows vector ALU types, but we sometimes need to actually
103
 * use scalar ALU instructions, for functional or performance reasons. To do
104
 * this, we just demote vector ALU payloads to scalar. */
105

106
static int
107
component_from_mask(unsigned mask)
108
{
109
        for (int c = 0; c < 8; ++c) {
110
                if (mask & (1 << c))
111
                        return c;
112
        }
113

114
        assert(0);
115
        return 0;
116
}
117

118
static unsigned
119
mir_pack_scalar_source(unsigned mod, bool is_full, unsigned component)
120
{
121
        midgard_scalar_alu_src s = {
122
                .mod = mod,
123
                .full = is_full,
124
                .component = component << (is_full ? 1 : 0)
125
        };
126

127
        unsigned o;
128
        memcpy(&o, &s, sizeof(s));
129

130
        return o & ((1 << 6) - 1);
131
}
132

133
static midgard_scalar_alu
134
vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins)
135
{
136
        bool is_full = nir_alu_type_get_type_size(ins->dest_type) == 32;
137

138
        bool half_0 = nir_alu_type_get_type_size(ins->src_types[0]) == 16;
139
        bool half_1 = nir_alu_type_get_type_size(ins->src_types[1]) == 16;
140
        unsigned comp = component_from_mask(ins->mask);
141

142
        unsigned packed_src[2] = {
143
                mir_pack_scalar_source(mir_pack_mod(ins, 0, true), !half_0, ins->swizzle[0][comp]),
144
                mir_pack_scalar_source(mir_pack_mod(ins, 1, true), !half_1, ins->swizzle[1][comp])
145
        };
146

147
        /* The output component is from the mask */
148
        midgard_scalar_alu s = {
149
                .op = v.op,
150
                .src1 = packed_src[0],
151
                .src2 = packed_src[1],
152
                .unknown = 0,
153
                .outmod = v.outmod,
154
                .output_full = is_full,
155
                .output_component = comp
156
        };
157

158
        /* Full components are physically spaced out */
159
        if (is_full) {
160
                assert(s.output_component < 4);
161
                s.output_component <<= 1;
162
        }
163

164
        /* Inline constant is passed along rather than trying to extract it
165
         * from v */
166

167
        if (ins->has_inline_constant) {
168
                uint16_t imm = 0;
169
                int lower_11 = ins->inline_constant & ((1 << 12) - 1);
170
                imm |= (lower_11 >> 9) & 3;
171
                imm |= (lower_11 >> 6) & 4;
172
                imm |= (lower_11 >> 2) & 0x38;
173
                imm |= (lower_11 & 63) << 6;
174

175
                s.src2 = imm;
176
        }
177

178
        return s;
179
}
180

181
/* 64-bit swizzles are super easy since there are 2 components of 2 components
182
 * in an 8-bit field ... lots of duplication to go around!
183
 *
184
 * Swizzles of 32-bit vectors accessed from 64-bit instructions are a little
185
 * funny -- pack them *as if* they were native 64-bit, using rep_* flags to
186
 * flag upper. For instance, xy would become 64-bit XY but that's just xyzw
187
 * native. Likewise, zz would become 64-bit XX with rep* so it would be xyxy
188
 * with rep. Pretty nifty, huh? */
189

190
static unsigned
191
mir_pack_swizzle_64(unsigned *swizzle, unsigned max_component)
192
{
193
        unsigned packed = 0;
194

195
        for (unsigned i = 0; i < 2; ++i) {
196
                assert(swizzle[i] <= max_component);
197

198
                unsigned a = (swizzle[i] & 1) ?
199
                        (COMPONENT_W << 2) | COMPONENT_Z :
200
                        (COMPONENT_Y << 2) | COMPONENT_X;
201

202
                packed |= a << (i * 4);
203
        }
204

205
        return packed;
206
}
207

208
static void
209
mir_pack_mask_alu(midgard_instruction *ins, midgard_vector_alu *alu)
210
{
211
        unsigned effective = ins->mask;
212

213
        /* If we have a destination override, we need to figure out whether to
214
         * override to the lower or upper half, shifting the effective mask in
215
         * the latter, so AAAA.... becomes AAAA */
216

217
        unsigned inst_size = max_bitsize_for_alu(ins);
218
        signed upper_shift = mir_upper_override(ins, inst_size);
219

220
        if (upper_shift >= 0) {
221
                effective >>= upper_shift;
222
                alu->shrink_mode = upper_shift ?
223
                        midgard_shrink_mode_upper :
224
                        midgard_shrink_mode_lower;
225
        } else {
226
                alu->shrink_mode = midgard_shrink_mode_none;
227
        }
228

229
        if (inst_size == 32)
230
                alu->mask = expand_writemask(effective, 2);
231
        else if (inst_size == 64)
232
                alu->mask = expand_writemask(effective, 1);
233
        else
234
                alu->mask = effective;
235
}
236

237
static unsigned
238
mir_pack_swizzle(unsigned mask, unsigned *swizzle,
239
                 unsigned sz, unsigned base_size,
240
                 bool op_channeled, midgard_src_expand_mode *expand_mode)
241
{
242
        unsigned packed = 0;
243

244
        *expand_mode = midgard_src_passthrough;
245

246
        midgard_reg_mode reg_mode = reg_mode_for_bitsize(base_size);
247

248
        if (reg_mode == midgard_reg_mode_64) {
249
                assert(sz == 64 || sz == 32);
250
                unsigned components = (sz == 32) ? 4 : 2;
251

252
                packed = mir_pack_swizzle_64(swizzle, components);
253

254
                if (sz == 32) {
255
                        bool lo = swizzle[0] >= COMPONENT_Z;
256
                        bool hi = swizzle[1] >= COMPONENT_Z;
257

258
                        if (mask & 0x1) {
259
                                /* We can't mix halves... */
260
                                if (mask & 2)
261
                                        assert(lo == hi);
262

263
                                *expand_mode = lo ? midgard_src_expand_high :
264
                                                    midgard_src_expand_low;
265
                        } else {
266
                                *expand_mode = hi ? midgard_src_expand_high :
267
                                                    midgard_src_expand_low;
268
                        }
269
                } else if (sz < 32) {
270
                        unreachable("Cannot encode 8/16 swizzle in 64-bit");
271
                }
272
        } else {
273
                /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
274
                 * the strategy is to check whether the nibble we're on is
275
                 * upper or lower. We need all components to be on the same
276
                 * "side"; that much is enforced by the ISA and should have
277
                 * been lowered. TODO: 8-bit packing. TODO: vec8 */
278

279
                unsigned first = mask ? ffs(mask) - 1 : 0;
280
                bool upper = swizzle[first] > 3;
281

282
                if (upper && mask)
283
                        assert(sz <= 16);
284

285
                bool dest_up = !op_channeled && (first >= 4);
286

287
                for (unsigned c = (dest_up ? 4 : 0); c < (dest_up ? 8 : 4); ++c) {
288
                        unsigned v = swizzle[c];
289

290
                        ASSERTED bool t_upper = v > 3;
291

292
                        /* Ensure we're doing something sane */
293

294
                        if (mask & (1 << c)) {
295
                                assert(t_upper == upper);
296
                                assert(v <= 7);
297
                        }
298

299
                        /* Use the non upper part */
300
                        v &= 0x3;
301

302
                        packed |= v << (2 * (c % 4));
303
                }
304

305

306
                /* Replicate for now.. should really pick a side for
307
                 * dot products */
308

309
                if (reg_mode == midgard_reg_mode_16 && sz == 16) {
310
                        *expand_mode = upper ? midgard_src_rep_high :
311
                                               midgard_src_rep_low;
312
                } else if (reg_mode == midgard_reg_mode_16 && sz == 8) {
313
                        if (base_size == 16) {
314
                                *expand_mode = upper ? midgard_src_expand_high :
315
                                                       midgard_src_expand_low;
316
                        } else if (upper) {
317
                                *expand_mode = midgard_src_swap;
318
                        }
319
                } else if (reg_mode == midgard_reg_mode_32 && sz == 16) {
320
                        *expand_mode = upper ? midgard_src_expand_high :
321
                                               midgard_src_expand_low;
322
                } else if (reg_mode == midgard_reg_mode_8) {
323
                        unreachable("Unhandled reg mode");
324
                }
325
        }
326

327
        return packed;
328
}
329

330
static void
331
mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
332
{
333
        bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props);
334

335
        unsigned base_size = max_bitsize_for_alu(ins);
336

337
        for (unsigned i = 0; i < 2; ++i) {
338
                if (ins->has_inline_constant && (i == 1))
339
                        continue;
340

341
                if (ins->src[i] == ~0)
342
                        continue;
343

344
                unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
345
                assert((sz == base_size) || (sz == base_size / 2));
346

347
                midgard_src_expand_mode expand_mode = midgard_src_passthrough;
348
                unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
349
                                                    sz, base_size, channeled,
350
                                                    &expand_mode);
351

352
                midgard_vector_alu_src pack = {
353
                        .mod = mir_pack_mod(ins, i, false),
354
                        .expand_mode = expand_mode,
355
                        .swizzle = swizzle
356
                };
357

358
                unsigned p = vector_alu_srco_unsigned(pack);
359
                
360
                if (i == 0)
361
                        alu->src1 = p;
362
                else
363
                        alu->src2 = p;
364
        }
365
}
366

367
static void
368
mir_pack_swizzle_ldst(midgard_instruction *ins)
369
{
370
        /* TODO: non-32-bit, non-vec4 */
371
        for (unsigned c = 0; c < 4; ++c) {
372
                unsigned v = ins->swizzle[0][c];
373

374
                /* Check vec4 */
375
                assert(v <= 3);
376

377
                ins->load_store.swizzle |= v << (2 * c);
378
        }
379

380
        /* TODO: arg_1/2 */
381
}
382

383
static void
384
mir_pack_swizzle_tex(midgard_instruction *ins)
385
{
386
        for (unsigned i = 0; i < 2; ++i) {
387
                unsigned packed = 0;
388

389
                for (unsigned c = 0; c < 4; ++c) {
390
                        unsigned v = ins->swizzle[i][c];
391

392
                        /* Check vec4 */
393
                        assert(v <= 3);
394

395
                        packed |= v << (2 * c);
396
                }
397

398
                if (i == 0)
399
                        ins->texture.swizzle = packed;
400
                else
401
                        ins->texture.in_reg_swizzle = packed;
402
        }
403

404
        /* TODO: bias component */
405
}
406

407
/* Up to 3 { ALU, LDST } bundles can execute in parallel with a texture op.
408
 * Given a texture op, lookahead to see how many such bundles we can flag for
409
 * OoO execution */
410

411
static bool
412
mir_can_run_ooo(midgard_block *block, midgard_bundle *bundle,
413
                unsigned dependency)
414
{
415
        /* Don't read out of bounds */
416
        if (bundle >= (midgard_bundle *) ((char *) block->bundles.data + block->bundles.size))
417
                return false;
418

419
        /* Texture ops can't execute with other texture ops */
420
        if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
421
                return false;
422

423
        /* Ensure there is no read-after-write dependency */
424

425
        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
426
                midgard_instruction *ins = bundle->instructions[i];
427

428
                mir_foreach_src(ins, s) {
429
                        if (ins->src[s] == dependency)
430
                                return false;
431
                }
432
        }
433

434
        /* Otherwise, we're okay */
435
        return true;
436
}
437

438
static void
439
mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instruction *ins)
440
{
441
        unsigned count = 0;
442

443
        for (count = 0; count < 3; ++count) {
444
                if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
445
                        break;
446
        }
447

448
        ins->texture.out_of_order = count;
449
}
450

451
/* Load store masks are 4-bits. Load/store ops pack for that.
452
 * For most operations, vec4 is the natural mask width; vec8 is constrained to
453
 * be in pairs, vec2 is duplicated. TODO: 8-bit?
454
 * For common stores (i.e. ST.*), each bit masks a single byte in the 32-bit
455
 * case, 2 bytes in the 64-bit case and 4 bytes in the 128-bit case.
456
 */
457

458
static unsigned
459
midgard_pack_common_store_mask(midgard_instruction *ins) {
460
        unsigned comp_sz = nir_alu_type_get_type_size(ins->dest_type);
461
        unsigned mask = ins->mask;
462
        unsigned packed = 0;
463
        unsigned nr_comp;
464

465
        switch (ins->op) {
466
                case midgard_op_st_u8:
467
                        packed |= mask & 1;
468
                        break;
469
                case midgard_op_st_u16:
470
                        nr_comp = 16 / comp_sz;
471
                        for (int i = 0; i < nr_comp; i++) {
472
                                if (mask & (1 << i)) {
473
                                        if (comp_sz == 16)
474
                                                packed |= 0x3;
475
                                        else if (comp_sz == 8)
476
                                                packed |= 1 << i;
477
                                }
478
                        }
479
                        break;
480
                case midgard_op_st_32:
481
                case midgard_op_st_64:
482
                case midgard_op_st_128: {
483
                        unsigned total_sz = 32;
484
                        if (ins->op == midgard_op_st_128)
485
                                total_sz = 128;
486
                        else if (ins->op == midgard_op_st_64)
487
                                total_sz = 64;
488

489
                        nr_comp = total_sz / comp_sz;
490

491
                        /* Each writemask bit masks 1/4th of the value to be stored. */
492
                        assert(comp_sz >= total_sz / 4);
493

494
                        for (int i = 0; i < nr_comp; i++) {
495
                                if (mask & (1 << i)) {
496
                                        if (comp_sz == total_sz)
497
                                                packed |= 0xF;
498
                                        else if (comp_sz == total_sz / 2)
499
                                                packed |= 0x3 << (i * 2);
500
                                        else if (comp_sz == total_sz / 4)
501
                                                packed |= 0x1 << i;
502
                                }
503
                        }
504
                        break;
505
                }
506
                default:
507
                        unreachable("unexpected ldst opcode");
508
        }
509

510
        return packed;
511
}
512

513
static void
514
mir_pack_ldst_mask(midgard_instruction *ins)
515
{
516
        unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
517
        unsigned packed = ins->mask;
518

519
        if (OP_IS_COMMON_STORE(ins->op)) {
520
                packed = midgard_pack_common_store_mask(ins);
521
        } else {
522
                if (sz == 64) {
523
                        packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
524
                                ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
525
                } else if (sz == 16) {
526
                        packed = 0;
527

528
                        for (unsigned i = 0; i < 4; ++i) {
529
                                /* Make sure we're duplicated */
530
                                bool u = (ins->mask & (1 << (2*i + 0))) != 0;
531
                                ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0;
532
                                assert(u == v);
533

534
                                packed |= (u << i);
535
                        }
536
                } else {
537
                        assert(sz == 32);
538
                }
539
        }
540

541
        ins->load_store.mask = packed;
542
}
543

544
static void
545
mir_lower_inverts(midgard_instruction *ins)
546
{
547
        bool inv[3] = {
548
                ins->src_invert[0],
549
                ins->src_invert[1],
550
                ins->src_invert[2]
551
        };
552

553
        switch (ins->op) {
554
        case midgard_alu_op_iand:
555
                /* a & ~b = iandnot(a, b) */
556
                /* ~a & ~b = ~(a | b) = inor(a, b) */
557

558
                if (inv[0] && inv[1])
559
                        ins->op = midgard_alu_op_inor;
560
                else if (inv[1])
561
                        ins->op = midgard_alu_op_iandnot;
562

563
                break;
564
        case midgard_alu_op_ior:
565
                /*  a | ~b = iornot(a, b) */
566
                /* ~a | ~b = ~(a & b) = inand(a, b) */
567

568
                if (inv[0] && inv[1])
569
                        ins->op = midgard_alu_op_inand;
570
                else if (inv[1])
571
                        ins->op = midgard_alu_op_iornot;
572

573
                break;
574

575
        case midgard_alu_op_ixor:
576
                /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */
577
                /* ~a ^ ~b = a ^ b */
578

579
                if (inv[0] ^ inv[1])
580
                        ins->op = midgard_alu_op_inxor;
581

582
                break;
583

584
        default:
585
                break;
586
        }
587
}
588

589
/* Opcodes with ROUNDS are the base (rte/0) type so we can just add */
590

591
static void
592
mir_lower_roundmode(midgard_instruction *ins)
593
{
594
        if (alu_opcode_props[ins->op].props & MIDGARD_ROUNDS) {
595
                assert(ins->roundmode <= 0x3);
596
                ins->op += ins->roundmode;
597
        }
598
}
599

600
static midgard_load_store_word
601
load_store_from_instr(midgard_instruction *ins)
602
{
603
        midgard_load_store_word ldst = ins->load_store;
604
        ldst.op = ins->op;
605

606
        if (OP_IS_STORE(ldst.op)) {
607
                ldst.reg = SSA_REG_FROM_FIXED(ins->src[0]) & 1;
608
        } else {
609
                ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
610
        }
611

612
        /* Atomic opcode swizzles have a special meaning:
613
         *   - The first two bits say which component of the implicit register should be used
614
         *   - The next two bits say if the implicit register is r26 or r27 */
615
        if (OP_IS_ATOMIC(ins->op)) {
616
                ldst.swizzle = 0;
617
                ldst.swizzle |= ins->swizzle[3][0] & 3;
618
                ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
619
        }
620

621
        if (ins->src[1] != ~0) {
622
                ldst.arg_reg = SSA_REG_FROM_FIXED(ins->src[1]) - REGISTER_LDST_BASE;
623
                unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
624
                ldst.arg_comp = midgard_ldst_comp(ldst.arg_reg, ins->swizzle[1][0], sz);
625
        }
626

627
        if (ins->src[2] != ~0) {
628
                ldst.index_reg = SSA_REG_FROM_FIXED(ins->src[2]) - REGISTER_LDST_BASE;
629
                unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
630
                ldst.index_comp = midgard_ldst_comp(ldst.index_reg, ins->swizzle[2][0], sz);
631
        }
632

633
        return ldst;
634
}
635

636
static midgard_texture_word
637
texture_word_from_instr(midgard_instruction *ins)
638
{
639
        midgard_texture_word tex = ins->texture;
640
        tex.op = ins->op;
641

642
        unsigned src1 = ins->src[1] == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->src[1]);
643
        tex.in_reg_select = src1 & 1;
644

645
        unsigned dest = ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest);
646
        tex.out_reg_select = dest & 1;
647

648
        if (ins->src[2] != ~0) {
649
                midgard_tex_register_select sel = {
650
                        .select = SSA_REG_FROM_FIXED(ins->src[2]) & 1,
651
                        .full = 1,
652
                        .component = ins->swizzle[2][0]
653
                };
654
                uint8_t packed;
655
                memcpy(&packed, &sel, sizeof(packed));
656
                tex.bias = packed;
657
        }
658

659
        if (ins->src[3] != ~0) {
660
                unsigned x = ins->swizzle[3][0];
661
                unsigned y = x + 1;
662
                unsigned z = x + 2;
663

664
                /* Check range, TODO: half-registers */
665
                assert(z < 4);
666

667
                unsigned offset_reg = SSA_REG_FROM_FIXED(ins->src[3]);
668
                tex.offset =
669
                        (1)                   | /* full */
670
                        (offset_reg & 1) << 1 | /* select */
671
                        (0 << 2)              | /* upper */
672
                        (x << 3)              | /* swizzle */
673
                        (y << 5)              | /* swizzle */
674
                        (z << 7);               /* swizzle */
675
        }
676

677
        return tex;
678
}
679

680
static midgard_vector_alu
681
vector_alu_from_instr(midgard_instruction *ins)
682
{
683
        midgard_vector_alu alu = {
684
                .op = ins->op,
685
                .outmod = ins->outmod,
686
                .reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins))
687
        };
688

689
        if (ins->has_inline_constant) {
690
                /* Encode inline 16-bit constant. See disassembler for
691
                 * where the algorithm is from */
692

693
                int lower_11 = ins->inline_constant & ((1 << 12) - 1);
694
                uint16_t imm = ((lower_11 >> 8) & 0x7) |
695
                               ((lower_11 & 0xFF) << 3);
696

697
                alu.src2 = imm << 2;
698
        }
699

700
        return alu;
701
}
702

703
static midgard_branch_extended
704
midgard_create_branch_extended( midgard_condition cond,
705
                                midgard_jmp_writeout_op op,
706
                                unsigned dest_tag,
707
                                signed quadword_offset)
708
{
709
        /* The condition code is actually a LUT describing a function to
710
         * combine multiple condition codes. However, we only support a single
711
         * condition code at the moment, so we just duplicate over a bunch of
712
         * times. */
713

714
        uint16_t duplicated_cond =
715
                (cond << 14) |
716
                (cond << 12) |
717
                (cond << 10) |
718
                (cond << 8) |
719
                (cond << 6) |
720
                (cond << 4) |
721
                (cond << 2) |
722
                (cond << 0);
723

724
        midgard_branch_extended branch = {
725
                .op = op,
726
                .dest_tag = dest_tag,
727
                .offset = quadword_offset,
728
                .cond = duplicated_cond
729
        };
730

731
        return branch;
732
}
733

734
static void
735
emit_branch(midgard_instruction *ins,
736
            compiler_context *ctx,
737
            midgard_block *block,
738
            midgard_bundle *bundle,
739
            struct util_dynarray *emission)
740
{
741
        /* Parse some basic branch info */
742
        bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
743
        bool is_conditional = ins->branch.conditional;
744
        bool is_inverted = ins->branch.invert_conditional;
745
        bool is_discard = ins->branch.target_type == TARGET_DISCARD;
746
        bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT;
747
        bool is_special = is_discard || is_tilebuf_wait;
748
        bool is_writeout = ins->writeout;
749

750
        /* Determine the block we're jumping to */
751
        int target_number = ins->branch.target_block;
752

753
        /* Report the destination tag */
754
        int dest_tag = is_discard ? 0 :
755
                is_tilebuf_wait ? bundle->tag :
756
                midgard_get_first_tag_from_block(ctx, target_number);
757

758
        /* Count up the number of quadwords we're
759
         * jumping over = number of quadwords until
760
         * (br_block_idx, target_number) */
761

762
        int quadword_offset = 0;
763

764
        if (is_discard) {
765
                /* Fixed encoding, not actually an offset */
766
                quadword_offset = 0x2;
767
        } else if (is_tilebuf_wait) {
768
                quadword_offset = -1;
769
        } else if (target_number > block->base.name) {
770
                /* Jump forward */
771

772
                for (int idx = block->base.name+1; idx < target_number; ++idx) {
773
                        midgard_block *blk = mir_get_block(ctx, idx);
774
                        assert(blk);
775

776
                        quadword_offset += blk->quadword_count;
777
                }
778
        } else {
779
                /* Jump backwards */
780

781
                for (int idx = block->base.name; idx >= target_number; --idx) {
782
                        midgard_block *blk = mir_get_block(ctx, idx);
783
                        assert(blk);
784

785
                        quadword_offset -= blk->quadword_count;
786
                }
787
        }
788

789
        /* Unconditional extended branches (far jumps)
790
         * have issues, so we always use a conditional
791
         * branch, setting the condition to always for
792
         * unconditional. For compact unconditional
793
         * branches, cond isn't used so it doesn't
794
         * matter what we pick. */
795

796
        midgard_condition cond =
797
                !is_conditional ? midgard_condition_always :
798
                is_inverted ? midgard_condition_false :
799
                midgard_condition_true;
800

801
        midgard_jmp_writeout_op op =
802
                is_discard ? midgard_jmp_writeout_op_discard :
803
                is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending :
804
                is_writeout ? midgard_jmp_writeout_op_writeout :
805
                (is_compact && !is_conditional) ?
806
                midgard_jmp_writeout_op_branch_uncond :
807
                midgard_jmp_writeout_op_branch_cond;
808

809
        if (is_compact) {
810
                unsigned size = sizeof(midgard_branch_cond);
811

812
                if (is_conditional || is_special) {
813
                        midgard_branch_cond branch = {
814
                                .op = op,
815
                                .dest_tag = dest_tag,
816
                                .offset = quadword_offset,
817
                                .cond = cond
818
                        };
819
                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
820
                } else {
821
                        assert(op == midgard_jmp_writeout_op_branch_uncond);
822
                        midgard_branch_uncond branch = {
823
                                .op = op,
824
                                .dest_tag = dest_tag,
825
                                .offset = quadword_offset,
826
                                .unknown = 1
827
                        };
828
                        assert(branch.offset == quadword_offset);
829
                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
830
                }
831
        } else { /* `ins->compact_branch`,  misnomer */
832
                unsigned size = sizeof(midgard_branch_extended);
833

834
                midgard_branch_extended branch =
835
                        midgard_create_branch_extended(
836
                                        cond, op,
837
                                        dest_tag,
838
                                        quadword_offset);
839

840
                memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
841
        }
842
}
843

844
static void
845
emit_alu_bundle(compiler_context *ctx,
846
                midgard_block *block,
847
                midgard_bundle *bundle,
848
                struct util_dynarray *emission,
849
                unsigned lookahead)
850
{
851
        /* Emit the control word */
852
        util_dynarray_append(emission, uint32_t, bundle->control | lookahead);
853

854
        /* Next up, emit register words */
855
        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
856
                midgard_instruction *ins = bundle->instructions[i];
857

858
                /* Check if this instruction has registers */
859
                if (ins->compact_branch) continue;
860

861
                unsigned src2_reg = REGISTER_UNUSED;
862
                if (ins->has_inline_constant)
863
                        src2_reg = ins->inline_constant >> 11;
864
                else if (ins->src[1] != ~0)
865
                        src2_reg = SSA_REG_FROM_FIXED(ins->src[1]);
866

867
                /* Otherwise, just emit the registers */
868
                uint16_t reg_word = 0;
869
                midgard_reg_info registers = {
870
                        .src1_reg = (ins->src[0] == ~0 ?
871
                                        REGISTER_UNUSED :
872
                                        SSA_REG_FROM_FIXED(ins->src[0])),
873
                        .src2_reg = src2_reg,
874
                        .src2_imm = ins->has_inline_constant,
875
                        .out_reg = (ins->dest == ~0 ?
876
                                        REGISTER_UNUSED :
877
                                        SSA_REG_FROM_FIXED(ins->dest)),
878
                };
879
                memcpy(&reg_word, &registers, sizeof(uint16_t));
880
                util_dynarray_append(emission, uint16_t, reg_word);
881
        }
882

883
        /* Now, we emit the body itself */
884
        for (unsigned i = 0; i < bundle->instruction_count; ++i) {
885
                midgard_instruction *ins = bundle->instructions[i];
886

887
                if (!ins->compact_branch) {
888
                        mir_lower_inverts(ins);
889
                        mir_lower_roundmode(ins);
890
                }
891

892
                if (midgard_is_branch_unit(ins->unit)) {
893
                        emit_branch(ins, ctx, block, bundle, emission);
894
                } else if (ins->unit & UNITS_ANY_VECTOR) {
895
                        midgard_vector_alu source = vector_alu_from_instr(ins);
896
                        mir_pack_mask_alu(ins, &source);
897
                        mir_pack_vector_srcs(ins, &source);
898
                        unsigned size = sizeof(source);
899
                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
900
                } else {
901
                        midgard_scalar_alu source = vector_to_scalar_alu(vector_alu_from_instr(ins), ins);
902
                        unsigned size = sizeof(source);
903
                        memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
904
                }
905
        }
906

907
        /* Emit padding (all zero) */
908
        if (bundle->padding) {
909
                memset(util_dynarray_grow_bytes(emission, bundle->padding, 1),
910
                                0, bundle->padding);
911
        }
912

913
        /* Tack on constants */
914

915
        if (bundle->has_embedded_constants)
916
                util_dynarray_append(emission, midgard_constants, bundle->constants);
917
}
918

919
/* Shift applied to the immediate used as an offset. Probably this is papering
920
 * over some other semantic distinction else well, but it unifies things in the
921
 * compiler so I don't mind. */
922

923
static void
924
mir_ldst_pack_offset(midgard_instruction *ins, int offset)
925
{
926
        /* These opcodes don't support offsets */
927
        assert(!OP_IS_REG2REG_LDST(ins->op) ||
928
               ins->op == midgard_op_lea    ||
929
               ins->op == midgard_op_lea_image);
930

931
        if (OP_IS_UBO_READ(ins->op))
932
                ins->load_store.signed_offset |= PACK_LDST_UBO_OFS(offset);
933
        else if (OP_IS_IMAGE(ins->op))
934
                ins->load_store.signed_offset |= PACK_LDST_ATTRIB_OFS(offset);
935
        else if (OP_IS_SPECIAL(ins->op))
936
                ins->load_store.signed_offset |= PACK_LDST_SELECTOR_OFS(offset);
937
        else
938
                ins->load_store.signed_offset |= PACK_LDST_MEM_OFS(offset);
939
}
940

941
static enum mali_sampler_type
942
midgard_sampler_type(nir_alu_type t) {
943
        switch (nir_alu_type_get_base_type(t))
944
        {
945
        case nir_type_float:
946
                return MALI_SAMPLER_FLOAT;
947
        case nir_type_int:
948
                return MALI_SAMPLER_SIGNED;
949
        case nir_type_uint:
950
                return MALI_SAMPLER_UNSIGNED;
951
        default:
952
                unreachable("Unknown sampler type");
953
        }
954
}
955

956
/* After everything is scheduled, emit whole bundles at a time */
957

958
void
959
emit_binary_bundle(compiler_context *ctx,
960
                   midgard_block *block,
961
                   midgard_bundle *bundle,
962
                   struct util_dynarray *emission,
963
                   int next_tag)
964
{
965
        int lookahead = next_tag << 4;
966

967
        switch (bundle->tag) {
968
        case TAG_ALU_4:
969
        case TAG_ALU_8:
970
        case TAG_ALU_12:
971
        case TAG_ALU_16:
972
        case TAG_ALU_4 + 4:
973
        case TAG_ALU_8 + 4:
974
        case TAG_ALU_12 + 4:
975
        case TAG_ALU_16 + 4:
976
                emit_alu_bundle(ctx, block, bundle, emission, lookahead);
977
                break;
978

979
        case TAG_LOAD_STORE_4: {
980
                /* One or two composing instructions */
981

982
                uint64_t current64, next64 = LDST_NOP;
983

984
                /* Copy masks */
985

986
                for (unsigned i = 0; i < bundle->instruction_count; ++i) {
987
                        midgard_instruction *ins = bundle->instructions[i];
988
                        mir_pack_ldst_mask(ins);
989

990
                        /* Atomic ops don't use this swizzle the same way as other ops */
991
                        if (!OP_IS_ATOMIC(ins->op))
992
                                mir_pack_swizzle_ldst(ins);
993

994
                        /* Apply a constant offset */
995
                        unsigned offset = ins->constants.u32[0];
996
                        if (offset)
997
                                mir_ldst_pack_offset(ins, offset);
998
                }
999

1000
                midgard_load_store_word ldst0 =
1001
                        load_store_from_instr(bundle->instructions[0]);
1002
                memcpy(&current64, &ldst0, sizeof(current64));
1003

1004
                if (bundle->instruction_count == 2) {
1005
                        midgard_load_store_word ldst1 =
1006
                                load_store_from_instr(bundle->instructions[1]);
1007
                        memcpy(&next64, &ldst1, sizeof(next64));
1008
                }
1009

1010
                midgard_load_store instruction = {
1011
                        .type = bundle->tag,
1012
                        .next_type = next_tag,
1013
                        .word1 = current64,
1014
                        .word2 = next64
1015
                };
1016

1017
                util_dynarray_append(emission, midgard_load_store, instruction);
1018

1019
                break;
1020
        }
1021

1022
        case TAG_TEXTURE_4:
1023
        case TAG_TEXTURE_4_VTX:
1024
        case TAG_TEXTURE_4_BARRIER: {
1025
                /* Texture instructions are easy, since there is no pipelining
1026
                 * nor VLIW to worry about. We may need to set .cont/.last
1027
                 * flags. */
1028

1029
                midgard_instruction *ins = bundle->instructions[0];
1030

1031
                ins->texture.type = bundle->tag;
1032
                ins->texture.next_type = next_tag;
1033

1034
                /* Nothing else to pack for barriers */
1035
                if (ins->op == midgard_tex_op_barrier) {
1036
                        ins->texture.cont = ins->texture.last = 1;
1037
                        ins->texture.op = ins->op;
1038
                        util_dynarray_append(emission, midgard_texture_word, ins->texture);
1039
                        return;
1040
                }
1041

1042
                signed override = mir_upper_override(ins, 32);
1043

1044
                ins->texture.mask = override > 0 ?
1045
                        ins->mask >> override :
1046
                        ins->mask;
1047

1048
                mir_pack_swizzle_tex(ins);
1049

1050
                if (!(ctx->quirks & MIDGARD_NO_OOO))
1051
                        mir_pack_tex_ooo(block, bundle, ins);
1052

1053
                unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
1054
                unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);
1055

1056
                assert(osz == 32 || osz == 16);
1057
                assert(isz == 32 || isz == 16);
1058

1059
                ins->texture.out_full = (osz == 32);
1060
                ins->texture.out_upper = override > 0;
1061
                ins->texture.in_reg_full = (isz == 32);
1062
                ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
1063
                ins->texture.outmod = ins->outmod;
1064

1065
                if (mir_op_computes_derivatives(ctx->stage, ins->op)) {
1066
                        ins->texture.cont = !ins->helper_terminate;
1067
                        ins->texture.last = ins->helper_terminate || ins->helper_execute;
1068
                } else {
1069
                        ins->texture.cont = ins->texture.last = 1;
1070
                }
1071

1072
                midgard_texture_word texture = texture_word_from_instr(ins);
1073
                util_dynarray_append(emission, midgard_texture_word, texture);
1074
                break;
1075
        }
1076

1077
        default:
1078
                unreachable("Unknown midgard instruction type\n");
1079
        }
1080
}
1081

1082
Product

Resources

Company