Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/panfrost/bifrost/bi_pack.c
4564 views
1
/*
2
* Copyright (C) 2020 Collabora, Ltd.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "compiler.h"
25
26
/* This file contains the final passes of the compiler. Running after
27
* scheduling and RA, the IR is now finalized, so we need to emit it to actual
28
* bits on the wire (as well as fixup branches) */
29
30
static uint64_t
31
bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)
32
{
33
/* next_dependencies are the union of the dependencies of successors'
34
* dependencies */
35
36
unsigned dependency_wait = next_1 ? next_1->dependencies : 0;
37
dependency_wait |= next_2 ? next_2->dependencies : 0;
38
39
bool staging_barrier = next_1 ? next_1->staging_barrier : false;
40
staging_barrier |= next_2 ? next_2->staging_barrier : 0;
41
42
struct bifrost_header header = {
43
.flow_control =
44
(next_1 == NULL && next_2 == NULL) ?
45
BIFROST_FLOW_END : clause->flow_control,
46
.terminate_discarded_threads = clause->td,
47
.next_clause_prefetch = clause->next_clause_prefetch && next_1,
48
.staging_barrier = staging_barrier,
49
.staging_register = clause->staging_register,
50
.dependency_wait = dependency_wait,
51
.dependency_slot = clause->scoreboard_id,
52
.message_type = clause->message_type,
53
.next_message_type = next_1 ? next_1->message_type : 0,
54
};
55
56
uint64_t u = 0;
57
memcpy(&u, &header, sizeof(header));
58
return u;
59
}
60
61
/* Assigns a slot for reading, before anything is written */
62
63
static void
64
bi_assign_slot_read(bi_registers *regs, bi_index src)
65
{
66
/* We only assign for registers */
67
if (src.type != BI_INDEX_REGISTER)
68
return;
69
70
/* Check if we already assigned the slot */
71
for (unsigned i = 0; i <= 1; ++i) {
72
if (regs->slot[i] == src.value && regs->enabled[i])
73
return;
74
}
75
76
if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)
77
return;
78
79
/* Assign it now */
80
81
for (unsigned i = 0; i <= 1; ++i) {
82
if (!regs->enabled[i]) {
83
regs->slot[i] = src.value;
84
regs->enabled[i] = true;
85
return;
86
}
87
}
88
89
if (!regs->slot23.slot3) {
90
regs->slot[2] = src.value;
91
regs->slot23.slot2 = BIFROST_OP_READ;
92
return;
93
}
94
95
bi_print_slots(regs, stderr);
96
unreachable("Failed to find a free slot for src");
97
}
98
99
static bi_registers
100
bi_assign_slots(bi_tuple *now, bi_tuple *prev)
101
{
102
/* We assign slots for the main register mechanism. Special ops
103
* use the data registers, which has its own mechanism entirely
104
* and thus gets skipped over here. */
105
106
bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read;
107
bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write;
108
109
/* First, assign reads */
110
111
if (now->fma)
112
bi_foreach_src(now->fma, src)
113
bi_assign_slot_read(&now->regs, (now->fma)->src[src]);
114
115
if (now->add) {
116
bi_foreach_src(now->add, src) {
117
if (!(src == 0 && read_dreg))
118
bi_assign_slot_read(&now->regs, (now->add)->src[src]);
119
}
120
}
121
122
/* Next, assign writes. Staging writes are assigned separately, but
123
* +ATEST wants its destination written to both a staging register
124
* _and_ a regular write, because it may not generate a message */
125
126
if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {
127
bi_index idx = prev->add->dest[0];
128
129
if (idx.type == BI_INDEX_REGISTER) {
130
now->regs.slot[3] = idx.value;
131
now->regs.slot23.slot3 = BIFROST_OP_WRITE;
132
}
133
}
134
135
if (prev->fma) {
136
bi_index idx = (prev->fma)->dest[0];
137
138
if (idx.type == BI_INDEX_REGISTER) {
139
if (now->regs.slot23.slot3) {
140
/* Scheduler constraint: cannot read 3 and write 2 */
141
assert(!now->regs.slot23.slot2);
142
now->regs.slot[2] = idx.value;
143
now->regs.slot23.slot2 = BIFROST_OP_WRITE;
144
} else {
145
now->regs.slot[3] = idx.value;
146
now->regs.slot23.slot3 = BIFROST_OP_WRITE;
147
now->regs.slot23.slot3_fma = true;
148
}
149
}
150
}
151
152
return now->regs;
153
}
154
155
static enum bifrost_reg_mode
156
bi_pack_register_mode(bi_registers r)
157
{
158
/* Handle idle as a special case */
159
if (!(r.slot23.slot2 | r.slot23.slot3))
160
return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE;
161
162
/* Otherwise, use the LUT */
163
for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {
164
if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)
165
return i;
166
}
167
168
bi_print_slots(&r, stderr);
169
unreachable("Invalid slot assignment");
170
}
171
172
static uint64_t
173
bi_pack_registers(bi_registers regs)
174
{
175
enum bifrost_reg_mode mode = bi_pack_register_mode(regs);
176
struct bifrost_regs s = { 0 };
177
uint64_t packed = 0;
178
179
/* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for
180
* first instruction and adds 16 when reg 2 == reg 3 */
181
182
unsigned ctrl;
183
bool r2_equals_r3 = false;
184
185
if (regs.first_instruction) {
186
/* Bit 3 implicitly must be clear for first instructions.
187
* The affected patterns all write both ADD/FMA, but that
188
* is forbidden for the last instruction (whose writes are
189
* encoded by the first), so this does not add additional
190
* encoding constraints */
191
assert(!(mode & 0x8));
192
193
/* Move bit 4 to bit 3, since bit 3 is clear */
194
ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);
195
196
/* If we can let r2 equal r3, we have to or the hardware raises
197
* INSTR_INVALID_ENC (it's unclear why). */
198
if (!(regs.slot23.slot2 && regs.slot23.slot3))
199
r2_equals_r3 = true;
200
} else {
201
/* We force r2=r3 or not for the upper bit */
202
ctrl = (mode & 0xF);
203
r2_equals_r3 = (mode & 0x10);
204
}
205
206
if (regs.enabled[1]) {
207
/* Gotta save that bit!~ Required by the 63-x trick */
208
assert(regs.slot[1] > regs.slot[0]);
209
assert(regs.enabled[0]);
210
211
/* Do the 63-x trick, see docs/disasm */
212
if (regs.slot[0] > 31) {
213
regs.slot[0] = 63 - regs.slot[0];
214
regs.slot[1] = 63 - regs.slot[1];
215
}
216
217
assert(regs.slot[0] <= 31);
218
assert(regs.slot[1] <= 63);
219
220
s.ctrl = ctrl;
221
s.reg1 = regs.slot[1];
222
s.reg0 = regs.slot[0];
223
} else {
224
/* slot 1 disabled, so set to zero and use slot 1 for ctrl */
225
s.ctrl = 0;
226
s.reg1 = ctrl << 2;
227
228
if (regs.enabled[0]) {
229
/* Bit 0 upper bit of slot 0 */
230
s.reg1 |= (regs.slot[0] >> 5);
231
232
/* Rest of slot 0 in usual spot */
233
s.reg0 = (regs.slot[0] & 0b11111);
234
} else {
235
/* Bit 1 set if slot 0 also disabled */
236
s.reg1 |= (1 << 1);
237
}
238
}
239
240
/* Force r2 =/!= r3 as needed */
241
if (r2_equals_r3) {
242
assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3));
243
244
if (regs.slot23.slot2)
245
regs.slot[3] = regs.slot[2];
246
else
247
regs.slot[2] = regs.slot[3];
248
} else if (!regs.first_instruction) {
249
/* Enforced by the encoding anyway */
250
assert(regs.slot[2] != regs.slot[3]);
251
}
252
253
s.reg2 = regs.slot[2];
254
s.reg3 = regs.slot[3];
255
s.fau_idx = regs.fau_idx;
256
257
memcpy(&packed, &s, sizeof(s));
258
return packed;
259
}
260
261
/* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix
262
* this up at pack time. (Scheduling doesn't care.) */
263
264
static void
265
bi_flip_slots(bi_registers *regs)
266
{
267
if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {
268
unsigned temp = regs->slot[0];
269
regs->slot[0] = regs->slot[1];
270
regs->slot[1] = temp;
271
}
272
273
}
274
275
static inline enum bifrost_packed_src
276
bi_get_src_slot(bi_registers *regs, unsigned reg)
277
{
278
if (regs->slot[0] == reg && regs->enabled[0])
279
return BIFROST_SRC_PORT0;
280
else if (regs->slot[1] == reg && regs->enabled[1])
281
return BIFROST_SRC_PORT1;
282
else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)
283
return BIFROST_SRC_PORT2;
284
else
285
unreachable("Tried to access register with no port");
286
}
287
288
static inline enum bifrost_packed_src
289
bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)
290
{
291
if (!ins)
292
return 0;
293
294
bi_index src = ins->src[s];
295
296
if (src.type == BI_INDEX_REGISTER)
297
return bi_get_src_slot(regs, src.value);
298
else if (src.type == BI_INDEX_PASS)
299
return src.value;
300
else if (bi_is_null(src) && ins->op == BI_OPCODE_ZS_EMIT && s < 2)
301
return BIFROST_SRC_STAGE;
302
else {
303
/* TODO make safer */
304
return BIFROST_SRC_STAGE;
305
}
306
}
307
308
static struct bi_packed_tuple
309
bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)
310
{
311
bi_assign_slots(tuple, prev);
312
tuple->regs.fau_idx = tuple->fau_idx;
313
tuple->regs.first_instruction = first_tuple;
314
315
bi_flip_slots(&tuple->regs);
316
317
bool sr_read = tuple->add &&
318
bi_opcode_props[(tuple->add)->op].sr_read;
319
320
uint64_t reg = bi_pack_registers(tuple->regs);
321
uint64_t fma = bi_pack_fma(tuple->fma,
322
bi_get_src_new(tuple->fma, &tuple->regs, 0),
323
bi_get_src_new(tuple->fma, &tuple->regs, 1),
324
bi_get_src_new(tuple->fma, &tuple->regs, 2),
325
bi_get_src_new(tuple->fma, &tuple->regs, 3));
326
327
uint64_t add = bi_pack_add(tuple->add,
328
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),
329
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),
330
bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2),
331
0);
332
333
if (tuple->add) {
334
bi_instr *add = tuple->add;
335
336
bool sr_write = bi_opcode_props[add->op].sr_write &&
337
!bi_is_null(add->dest[0]);
338
339
if (sr_read && !bi_is_null(add->src[0])) {
340
assert(add->src[0].type == BI_INDEX_REGISTER);
341
clause->staging_register = add->src[0].value;
342
343
if (sr_write)
344
assert(bi_is_equiv(add->src[0], add->dest[0]));
345
} else if (sr_write) {
346
assert(add->dest[0].type == BI_INDEX_REGISTER);
347
clause->staging_register = add->dest[0].value;
348
}
349
}
350
351
struct bi_packed_tuple packed = {
352
.lo = reg | (fma << 35) | ((add & 0b111111) << 58),
353
.hi = add >> 6
354
};
355
356
return packed;
357
}
358
359
/* A block contains at most one PC-relative constant, from a terminal branch.
360
* Find the last instruction and if it is a relative branch, fix up the
361
* PC-relative constant to contain the absolute offset. This occurs at pack
362
* time instead of schedule time because the number of quadwords between each
363
* block is not known until after all other passes have finished.
364
*/
365
366
static void
367
bi_assign_branch_offset(bi_context *ctx, bi_block *block)
368
{
369
if (list_is_empty(&block->clauses))
370
return;
371
372
bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);
373
bi_instr *br = bi_last_instr_in_clause(clause);
374
375
if (!br->branch_target)
376
return;
377
378
/* Put it in the high place */
379
int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);
380
int32_t bytes = qwords * 16;
381
382
/* Copy so we can toy with the sign without undefined behaviour */
383
uint32_t raw = 0;
384
memcpy(&raw, &bytes, sizeof(raw));
385
386
/* Clear off top bits for A1/B1 bits */
387
raw &= ~0xF0000000;
388
389
/* Put in top 32-bits */
390
assert(clause->pcrel_idx < 8);
391
clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;
392
}
393
394
static void
395
bi_pack_constants(unsigned tuple_count, uint64_t *constants,
396
unsigned word_idx, unsigned constant_words, bool ec0_packed,
397
struct util_dynarray *emission)
398
{
399
unsigned index = (word_idx << 1) + ec0_packed;
400
401
/* Do more constants follow */
402
bool more = (word_idx + 1) < constant_words;
403
404
/* Indexed first by tuple count and second by constant word number,
405
* indicates the position in the clause */
406
unsigned pos_lookup[8][3] = {
407
{ 0 },
408
{ 1 },
409
{ 3 },
410
{ 2, 5 },
411
{ 4, 8 },
412
{ 7, 11, 14 },
413
{ 6, 10, 13 },
414
{ 9, 12 }
415
};
416
417
/* Compute the pos, and check everything is reasonable */
418
assert((tuple_count - 1) < 8);
419
assert(word_idx < 3);
420
unsigned pos = pos_lookup[tuple_count - 1][word_idx];
421
assert(pos != 0 || (tuple_count == 1 && word_idx == 0));
422
423
struct bifrost_fmt_constant quad = {
424
.pos = pos,
425
.tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,
426
.imm_1 = constants[index + 0] >> 4,
427
.imm_2 = constants[index + 1] >> 4,
428
};
429
430
util_dynarray_append(emission, struct bifrost_fmt_constant, quad);
431
}
432
433
static inline uint8_t
434
bi_pack_literal(enum bi_clause_subword literal)
435
{
436
assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0);
437
assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7);
438
439
return (literal - BI_CLAUSE_SUBWORD_LITERAL_0);
440
}
441
442
static inline uint8_t
443
bi_clause_upper(unsigned val,
444
struct bi_packed_tuple *tuples,
445
ASSERTED unsigned tuple_count)
446
{
447
assert(val < tuple_count);
448
449
/* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */
450
struct bi_packed_tuple tuple = tuples[val];
451
return (tuple.hi >> 11);
452
}
453
454
static inline uint8_t
455
bi_pack_upper(enum bi_clause_subword upper,
456
struct bi_packed_tuple *tuples,
457
ASSERTED unsigned tuple_count)
458
{
459
assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0);
460
assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7);
461
462
return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples,
463
tuple_count);
464
}
465
466
static inline uint64_t
467
bi_pack_tuple_bits(enum bi_clause_subword idx,
468
struct bi_packed_tuple *tuples,
469
ASSERTED unsigned tuple_count,
470
unsigned offset, unsigned nbits)
471
{
472
assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0);
473
assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7);
474
475
unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0);
476
assert(val < tuple_count);
477
478
struct bi_packed_tuple tuple = tuples[val];
479
480
assert(offset + nbits < 78);
481
assert(nbits <= 64);
482
483
/* (X >> start) & m
484
* = (((hi << 64) | lo) >> start) & m
485
* = (((hi << 64) >> start) | (lo >> start)) & m
486
* = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64
487
* { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64
488
* = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64
489
* { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64
490
*
491
* By setting m = 2^64 - 1, we justify doing the respective shifts as
492
* 64-bit integers. Zero special cased to avoid undefined behaviour.
493
*/
494
495
uint64_t lo = (tuple.lo >> offset);
496
uint64_t hi = (offset == 0) ? 0
497
: (offset > 64) ? (tuple.hi >> (offset - 64))
498
: (tuple.hi << (64 - offset));
499
500
return (lo | hi) & ((1ULL << nbits) - 1);
501
}
502
503
static inline uint16_t
504
bi_pack_lu(enum bi_clause_subword word,
505
struct bi_packed_tuple *tuples,
506
ASSERTED unsigned tuple_count)
507
{
508
return (word >= BI_CLAUSE_SUBWORD_UPPER_0) ?
509
bi_pack_upper(word, tuples, tuple_count) :
510
bi_pack_literal(word);
511
}
512
513
static uint8_t
514
bi_pack_sync(enum bi_clause_subword t1,
515
enum bi_clause_subword t2,
516
enum bi_clause_subword t3,
517
struct bi_packed_tuple *tuples,
518
ASSERTED unsigned tuple_count,
519
bool z)
520
{
521
uint8_t sync =
522
(bi_pack_lu(t3, tuples, tuple_count) << 0) |
523
(bi_pack_lu(t2, tuples, tuple_count) << 3);
524
525
if (t1 == BI_CLAUSE_SUBWORD_Z)
526
sync |= z << 6;
527
else
528
sync |= bi_pack_literal(t1) << 6;
529
530
return sync;
531
}
532
533
static inline uint64_t
534
bi_pack_t_ec(enum bi_clause_subword word,
535
struct bi_packed_tuple *tuples,
536
ASSERTED unsigned tuple_count,
537
uint64_t ec0)
538
{
539
if (word == BI_CLAUSE_SUBWORD_CONSTANT)
540
return ec0;
541
else
542
return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60);
543
}
544
545
static uint32_t
546
bi_pack_subwords_56(enum bi_clause_subword t,
547
struct bi_packed_tuple *tuples,
548
ASSERTED unsigned tuple_count,
549
uint64_t header, uint64_t ec0,
550
unsigned tuple_subword)
551
{
552
switch (t) {
553
case BI_CLAUSE_SUBWORD_HEADER:
554
return (header & ((1 << 30) - 1));
555
case BI_CLAUSE_SUBWORD_RESERVED:
556
return 0;
557
case BI_CLAUSE_SUBWORD_CONSTANT:
558
return (ec0 >> 15) & ((1 << 30) - 1);
559
default:
560
return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30);
561
}
562
}
563
564
static uint16_t
565
bi_pack_subword(enum bi_clause_subword t, unsigned format,
566
struct bi_packed_tuple *tuples,
567
ASSERTED unsigned tuple_count,
568
uint64_t header, uint64_t ec0, unsigned m0,
569
unsigned tuple_subword)
570
{
571
switch (t) {
572
case BI_CLAUSE_SUBWORD_HEADER:
573
return header >> 30;
574
case BI_CLAUSE_SUBWORD_M:
575
return m0;
576
case BI_CLAUSE_SUBWORD_CONSTANT:
577
return (format == 5 || format == 10) ?
578
(ec0 & ((1 << 15) - 1)) :
579
(ec0 >> (15 + 30));
580
case BI_CLAUSE_SUBWORD_UPPER_23:
581
return (bi_clause_upper(2, tuples, tuple_count) << 12) |
582
(bi_clause_upper(3, tuples, tuple_count) << 9);
583
case BI_CLAUSE_SUBWORD_UPPER_56:
584
return (bi_clause_upper(5, tuples, tuple_count) << 12) |
585
(bi_clause_upper(6, tuples, tuple_count) << 9);
586
case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7:
587
return bi_pack_upper(t, tuples, tuple_count) << 12;
588
default:
589
return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15);
590
}
591
}
592
593
/* EC0 is 60-bits (bottom 4 already shifted off) */
594
void
595
bi_pack_format(struct util_dynarray *emission,
596
unsigned index,
597
struct bi_packed_tuple *tuples,
598
ASSERTED unsigned tuple_count,
599
uint64_t header, uint64_t ec0,
600
unsigned m0, bool z)
601
{
602
struct bi_clause_format format = bi_clause_formats[index];
603
604
uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3,
605
tuples, tuple_count, z);
606
607
uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0);
608
609
uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count, header, ec0, m0, 4);
610
611
uint32_t s5_s6 = bi_pack_subwords_56(format.s5_s6,
612
tuples, tuple_count, header, ec0,
613
(format.format == 2 || format.format == 7) ? 0 : 3);
614
615
uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count, header, ec0, m0, 2);
616
617
/* Now that subwords are packed, split into 64-bit halves and emit */
618
uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8);
619
uint64_t hi = (s0_s3 >> 56) | ((uint64_t) s4 << 4) | ((uint64_t) s5_s6 << 19) | ((uint64_t) s7 << 49);
620
621
util_dynarray_append(emission, uint64_t, lo);
622
util_dynarray_append(emission, uint64_t, hi);
623
}
624
625
static void
626
bi_pack_clause(bi_context *ctx, bi_clause *clause,
627
bi_clause *next_1, bi_clause *next_2,
628
struct util_dynarray *emission, gl_shader_stage stage)
629
{
630
struct bi_packed_tuple ins[8] = { 0 };
631
632
for (unsigned i = 0; i < clause->tuple_count; ++i) {
633
unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1;
634
ins[i] = bi_pack_tuple(clause, &clause->tuples[i],
635
&clause->tuples[prev], i == 0, stage);
636
}
637
638
bool ec0_packed = bi_ec0_packed(clause->tuple_count);
639
640
if (ec0_packed)
641
clause->constant_count = MAX2(clause->constant_count, 1);
642
643
unsigned constant_quads =
644
DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);
645
646
uint64_t header = bi_pack_header(clause, next_1, next_2);
647
uint64_t ec0 = (clause->constants[0] >> 4);
648
unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;
649
650
unsigned counts[8] = {
651
1, 2, 3, 3, 4, 5, 5, 6
652
};
653
654
unsigned indices[8][6] = {
655
{ 1 },
656
{ 0, 2 },
657
{ 0, 3, 4 },
658
{ 0, 3, 6 },
659
{ 0, 3, 7, 8 },
660
{ 0, 3, 5, 9, 10 },
661
{ 0, 3, 5, 9, 11 },
662
{ 0, 3, 5, 9, 12, 13 },
663
};
664
665
unsigned count = counts[clause->tuple_count - 1];
666
667
for (unsigned pos = 0; pos < count; ++pos) {
668
ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos];
669
assert(bi_clause_formats[idx].pos == pos);
670
assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) ==
671
(pos == count - 1));
672
673
/* Whether to end the clause immediately after the last tuple */
674
bool z = (constant_quads == 0);
675
676
bi_pack_format(emission, indices[clause->tuple_count - 1][pos],
677
ins, clause->tuple_count, header, ec0, m0,
678
z);
679
}
680
681
/* Pack the remaining constants */
682
683
for (unsigned pos = 0; pos < constant_quads; ++pos) {
684
bi_pack_constants(clause->tuple_count, clause->constants,
685
pos, constant_quads, ec0_packed, emission);
686
}
687
}
688
689
static void
690
bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,
691
const bi_clause *clause)
692
{
693
/* No need to collect return addresses when we're in a blend shader. */
694
if (ctx->inputs->is_blend)
695
return;
696
697
const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];
698
const bi_instr *ins = tuple->add;
699
700
if (!ins || ins->op != BI_OPCODE_BLEND)
701
return;
702
703
704
unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;
705
assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));
706
assert(!ctx->info->bifrost.blend[loc].return_offset);
707
ctx->info->bifrost.blend[loc].return_offset =
708
util_dynarray_num_elements(emission, uint8_t);
709
assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));
710
}
711
712
unsigned
713
bi_pack(bi_context *ctx, struct util_dynarray *emission)
714
{
715
unsigned previous_size = emission->size;
716
717
bi_foreach_block(ctx, _block) {
718
bi_block *block = (bi_block *) _block;
719
720
bi_assign_branch_offset(ctx, block);
721
722
bi_foreach_clause_in_block(block, clause) {
723
bool is_last = (clause->link.next == &block->clauses);
724
725
/* Get the succeeding clauses, either two successors of
726
* the block for the last clause in the block or just
727
* the next clause within the block */
728
729
bi_clause *next = NULL, *next_2 = NULL;
730
731
if (is_last) {
732
next = bi_next_clause(ctx, block->base.successors[0], NULL);
733
next_2 = bi_next_clause(ctx, block->base.successors[1], NULL);
734
} else {
735
next = bi_next_clause(ctx, _block, clause);
736
}
737
738
739
previous_size = emission->size;
740
741
bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);
742
743
if (!is_last)
744
bi_collect_blend_ret_addr(ctx, emission, clause);
745
}
746
}
747
748
return emission->size - previous_size;
749
}
750
751
#ifndef NDEBUG
752
753
static void
754
bi_test_pack_literal(void)
755
{
756
for (unsigned x = 0; x <= 7; ++x)
757
assert(bi_pack_literal(BI_CLAUSE_SUBWORD_LITERAL_0 + x) == x);
758
}
759
760
static void
761
bi_test_pack_upper(void)
762
{
763
struct bi_packed_tuple tuples[] = {
764
{ 0, 0x3 << (75 - 64) },
765
{ 0, 0x1 << (75 - 64) },
766
{ 0, 0x7 << (75 - 64) },
767
{ 0, 0x0 << (75 - 64) },
768
{ 0, 0x2 << (75 - 64) },
769
{ 0, 0x6 << (75 - 64) },
770
{ 0, 0x5 << (75 - 64) },
771
{ 0, 0x4 << (75 - 64) },
772
};
773
774
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 0, tuples, 8) == 3);
775
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 1, tuples, 8) == 1);
776
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 2, tuples, 8) == 7);
777
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 3, tuples, 8) == 0);
778
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 4, tuples, 8) == 2);
779
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 5, tuples, 8) == 6);
780
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 6, tuples, 8) == 5);
781
assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 7, tuples, 8) == 4);
782
}
783
784
static void
785
bi_test_pack_tuple_bits(void)
786
{
787
struct bi_packed_tuple tuples[] = {
788
{ 0x1234567801234567, 0x3A },
789
{ 0x9876543299999999, 0x1B },
790
{ 0xABCDEF0101234567, 0x7C },
791
};
792
793
assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 0, tuples, 8, 0, 30) == 0x01234567);
794
assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 1, tuples, 8, 10, 30) == 0xca66666);
795
assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 2, tuples, 8, 40, 15) == 0x4def);
796
}
797
798
#define L(x) (BI_CLAUSE_SUBWORD_LITERAL_0 + x)
799
#define U(x) (BI_CLAUSE_SUBWORD_UPPER_0 + x)
800
#define Z BI_CLAUSE_SUBWORD_Z
801
802
static void
803
bi_test_pack_sync(void)
804
{
805
struct bi_packed_tuple tuples[] = {
806
{ 0, 0x3 << (75 - 64) },
807
{ 0, 0x5 << (75 - 64) },
808
{ 0, 0x7 << (75 - 64) },
809
{ 0, 0x0 << (75 - 64) },
810
{ 0, 0x2 << (75 - 64) },
811
{ 0, 0x6 << (75 - 64) },
812
{ 0, 0x5 << (75 - 64) },
813
{ 0, 0x4 << (75 - 64) },
814
};
815
816
assert(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false) == 0xCF);
817
assert(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false) == 0xCC);
818
assert(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false) == 0xEC);
819
assert(bi_pack_sync(Z, U(1), U(7), tuples, 8, false) == 0x2C);
820
assert(bi_pack_sync(Z, U(1), U(7), tuples, 8, true) == 0x6C);
821
}
822
823
int bi_test_packing(void)
824
{
825
bi_test_pack_literal();
826
bi_test_pack_upper();
827
bi_test_pack_tuple_bits();
828
bi_test_pack_sync();
829
830
return 0;
831
}
832
#endif
833
834