Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_qpu.c
4570 views
1
/*
2
* Copyright © 2014 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <stdbool.h>
25
#include "util/ralloc.h"
26
#include "vc4_qir.h"
27
#include "vc4_qpu.h"
28
29
#define QPU_MUX(mux, muxfield) \
30
QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)
31
32
static uint64_t
33
set_src_raddr(uint64_t inst, struct qpu_reg src)
34
{
35
if (src.mux == QPU_MUX_A) {
36
assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||
37
QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);
38
return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);
39
}
40
41
if (src.mux == QPU_MUX_B) {
42
assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||
43
QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&
44
QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);
45
return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);
46
}
47
48
if (src.mux == QPU_MUX_SMALL_IMM) {
49
if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
50
assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);
51
} else {
52
inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);
53
assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);
54
}
55
return ((inst & ~QPU_RADDR_B_MASK) |
56
QPU_SET_FIELD(src.addr, QPU_RADDR_B));
57
}
58
59
return inst;
60
}
61
62
uint64_t
63
qpu_NOP()
64
{
65
uint64_t inst = 0;
66
67
inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);
68
inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);
69
70
/* Note: These field values are actually non-zero */
71
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
72
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
73
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
74
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
75
inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
76
77
return inst;
78
}
79
80
static uint64_t
81
qpu_a_dst(struct qpu_reg dst)
82
{
83
uint64_t inst = 0;
84
85
if (dst.mux <= QPU_MUX_R5) {
86
/* Translate the mux to the ACCn values. */
87
inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);
88
} else {
89
inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);
90
if (dst.mux == QPU_MUX_B)
91
inst |= QPU_WS;
92
}
93
94
return inst;
95
}
96
97
static uint64_t
98
qpu_m_dst(struct qpu_reg dst)
99
{
100
uint64_t inst = 0;
101
102
if (dst.mux <= QPU_MUX_R5) {
103
/* Translate the mux to the ACCn values. */
104
inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);
105
} else {
106
inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);
107
if (dst.mux == QPU_MUX_A)
108
inst |= QPU_WS;
109
}
110
111
return inst;
112
}
113
114
uint64_t
115
qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)
116
{
117
uint64_t inst = 0;
118
119
inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
120
inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);
121
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
122
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
123
inst |= qpu_a_dst(dst);
124
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
125
inst |= QPU_MUX(src.mux, QPU_ADD_A);
126
inst |= QPU_MUX(src.mux, QPU_ADD_B);
127
inst = set_src_raddr(inst, src);
128
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
129
130
return inst;
131
}
132
133
uint64_t
134
qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)
135
{
136
uint64_t inst = 0;
137
138
inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
139
inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);
140
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
141
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
142
inst |= qpu_m_dst(dst);
143
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
144
inst |= QPU_MUX(src.mux, QPU_MUL_A);
145
inst |= QPU_MUX(src.mux, QPU_MUL_B);
146
inst = set_src_raddr(inst, src);
147
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
148
149
return inst;
150
}
151
152
uint64_t
153
qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
154
{
155
uint64_t inst = 0;
156
157
inst |= qpu_a_dst(dst);
158
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
159
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
160
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
161
inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);
162
inst |= val;
163
164
return inst;
165
}
166
167
uint64_t
168
qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
169
{
170
return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
171
QPU_LOAD_IMM_MODE);
172
}
173
174
uint64_t
175
qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
176
{
177
return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
178
QPU_LOAD_IMM_MODE);
179
}
180
181
uint64_t
182
qpu_branch(uint32_t cond, uint32_t target)
183
{
184
uint64_t inst = 0;
185
186
inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));
187
inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));
188
inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);
189
inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);
190
inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);
191
192
return inst;
193
}
194
195
uint64_t
196
qpu_a_alu2(enum qpu_op_add op,
197
struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
198
{
199
uint64_t inst = 0;
200
201
inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
202
inst |= QPU_SET_FIELD(op, QPU_OP_ADD);
203
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
204
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
205
inst |= qpu_a_dst(dst);
206
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);
207
inst |= QPU_MUX(src0.mux, QPU_ADD_A);
208
inst = set_src_raddr(inst, src0);
209
inst |= QPU_MUX(src1.mux, QPU_ADD_B);
210
inst = set_src_raddr(inst, src1);
211
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);
212
213
return inst;
214
}
215
216
uint64_t
217
qpu_m_alu2(enum qpu_op_mul op,
218
struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)
219
{
220
uint64_t inst = 0;
221
222
inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);
223
inst |= QPU_SET_FIELD(op, QPU_OP_MUL);
224
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
225
inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);
226
inst |= qpu_m_dst(dst);
227
inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);
228
inst |= QPU_MUX(src0.mux, QPU_MUL_A);
229
inst = set_src_raddr(inst, src0);
230
inst |= QPU_MUX(src1.mux, QPU_MUL_B);
231
inst = set_src_raddr(inst, src1);
232
inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);
233
234
return inst;
235
}
236
237
uint64_t
238
qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)
239
{
240
uint64_t inst = 0;
241
inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);
242
243
inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);
244
inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,
245
QPU_SMALL_IMM);
246
247
return inst;
248
}
249
250
static bool
251
merge_fields(uint64_t *merge,
252
uint64_t a, uint64_t b,
253
uint64_t mask, uint64_t ignore)
254
{
255
if ((a & mask) == ignore) {
256
*merge = (*merge & ~mask) | (b & mask);
257
} else if ((b & mask) == ignore) {
258
*merge = (*merge & ~mask) | (a & mask);
259
} else {
260
if ((a & mask) != (b & mask))
261
return false;
262
}
263
264
return true;
265
}
266
267
int
268
qpu_num_sf_accesses(uint64_t inst)
269
{
270
int accesses = 0;
271
static const uint32_t specials[] = {
272
QPU_W_TLB_COLOR_MS,
273
QPU_W_TLB_COLOR_ALL,
274
QPU_W_TLB_Z,
275
QPU_W_TMU0_S,
276
QPU_W_TMU0_T,
277
QPU_W_TMU0_R,
278
QPU_W_TMU0_B,
279
QPU_W_TMU1_S,
280
QPU_W_TMU1_T,
281
QPU_W_TMU1_R,
282
QPU_W_TMU1_B,
283
QPU_W_SFU_RECIP,
284
QPU_W_SFU_RECIPSQRT,
285
QPU_W_SFU_EXP,
286
QPU_W_SFU_LOG,
287
};
288
uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
289
uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
290
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
291
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
292
293
for (int j = 0; j < ARRAY_SIZE(specials); j++) {
294
if (waddr_add == specials[j])
295
accesses++;
296
if (waddr_mul == specials[j])
297
accesses++;
298
}
299
300
if (raddr_a == QPU_R_MUTEX_ACQUIRE)
301
accesses++;
302
if (raddr_b == QPU_R_MUTEX_ACQUIRE &&
303
QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)
304
accesses++;
305
306
/* XXX: semaphore, combined color read/write? */
307
switch (QPU_GET_FIELD(inst, QPU_SIG)) {
308
case QPU_SIG_COLOR_LOAD:
309
case QPU_SIG_COLOR_LOAD_END:
310
case QPU_SIG_LOAD_TMU0:
311
case QPU_SIG_LOAD_TMU1:
312
accesses++;
313
}
314
315
return accesses;
316
}
317
318
static bool
319
qpu_waddr_ignores_ws(uint32_t waddr)
320
{
321
switch(waddr) {
322
case QPU_W_ACC0:
323
case QPU_W_ACC1:
324
case QPU_W_ACC2:
325
case QPU_W_ACC3:
326
case QPU_W_NOP:
327
case QPU_W_TLB_Z:
328
case QPU_W_TLB_COLOR_MS:
329
case QPU_W_TLB_COLOR_ALL:
330
case QPU_W_TLB_ALPHA_MASK:
331
case QPU_W_VPM:
332
case QPU_W_SFU_RECIP:
333
case QPU_W_SFU_RECIPSQRT:
334
case QPU_W_SFU_EXP:
335
case QPU_W_SFU_LOG:
336
case QPU_W_TMU0_S:
337
case QPU_W_TMU0_T:
338
case QPU_W_TMU0_R:
339
case QPU_W_TMU0_B:
340
case QPU_W_TMU1_S:
341
case QPU_W_TMU1_T:
342
case QPU_W_TMU1_R:
343
case QPU_W_TMU1_B:
344
return true;
345
}
346
347
return false;
348
}
349
350
static void
351
swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
352
{
353
uint64_t mux_mask = (uint64_t)0x7 << mux_shift;
354
uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;
355
uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;
356
357
if ((*a & mux_mask) == mux_a_val) {
358
*a = (*a & ~mux_mask) | mux_b_val;
359
*merge = (*merge & ~mux_mask) | mux_b_val;
360
}
361
}
362
363
static bool
364
try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
365
{
366
uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
367
uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
368
uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
369
uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
370
371
if (raddr_a_b != QPU_R_NOP)
372
return false;
373
374
switch (raddr_a_a) {
375
case QPU_R_UNIF:
376
case QPU_R_VARY:
377
break;
378
default:
379
return false;
380
}
381
382
if (!(*merge & QPU_PM) &&
383
QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {
384
return false;
385
}
386
387
if (raddr_b_b != QPU_R_NOP &&
388
raddr_b_b != raddr_a_a)
389
return false;
390
391
/* Move raddr A to B in instruction a. */
392
*a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
393
*a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
394
*merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);
395
*merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);
396
swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
397
swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
398
swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
399
swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
400
401
return true;
402
}
403
404
static bool
405
convert_mov(uint64_t *inst)
406
{
407
uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);
408
uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);
409
uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);
410
411
/* Is it a MOV? */
412
if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||
413
(add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {
414
return false;
415
}
416
417
if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)
418
return false;
419
420
/* We could maybe support this in the .8888 and .8a-.8d cases. */
421
if (*inst & QPU_PM)
422
return false;
423
424
*inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);
425
*inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);
426
427
*inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);
428
*inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);
429
*inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);
430
*inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);
431
432
*inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);
433
*inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);
434
435
*inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);
436
*inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);
437
438
if (!qpu_waddr_ignores_ws(waddr_add))
439
*inst ^= QPU_WS;
440
441
return true;
442
}
443
444
static bool
445
writes_a_file(uint64_t inst)
446
{
447
if (!(inst & QPU_WS))
448
return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;
449
else
450
return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;
451
}
452
453
static bool
454
reads_r4(uint64_t inst)
455
{
456
return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||
457
QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||
458
QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||
459
QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);
460
}
461
462
uint64_t
463
qpu_merge_inst(uint64_t a, uint64_t b)
464
{
465
uint64_t merge = a | b;
466
bool ok = true;
467
uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);
468
uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);
469
470
if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&
471
QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {
472
if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||
473
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||
474
!(convert_mov(&a) || convert_mov(&b))) {
475
return 0;
476
} else {
477
merge = a | b;
478
}
479
}
480
481
if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&
482
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
483
return 0;
484
485
if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))
486
return 0;
487
488
if (a_sig == QPU_SIG_LOAD_IMM ||
489
b_sig == QPU_SIG_LOAD_IMM ||
490
a_sig == QPU_SIG_SMALL_IMM ||
491
b_sig == QPU_SIG_SMALL_IMM ||
492
a_sig == QPU_SIG_BRANCH ||
493
b_sig == QPU_SIG_BRANCH) {
494
return 0;
495
}
496
497
ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,
498
QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
499
500
/* Misc fields that have to match exactly. */
501
ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);
502
503
if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,
504
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
505
/* Since we tend to use regfile A by default both for register
506
* allocation and for our special values (uniforms and
507
* varyings), try swapping uniforms and varyings to regfile B
508
* to resolve raddr A conflicts.
509
*/
510
if (!try_swap_ra_file(&merge, &a, &b) &&
511
!try_swap_ra_file(&merge, &b, &a)) {
512
return 0;
513
}
514
}
515
516
ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,
517
QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
518
519
ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,
520
QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
521
ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,
522
QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
523
524
/* Allow disagreement on WS (swapping A vs B physical reg file as the
525
* destination for ADD/MUL) if one of the original instructions
526
* ignores it (probably because it's just writing to accumulators).
527
*/
528
if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&
529
qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
530
merge = (merge & ~QPU_WS) | (b & QPU_WS);
531
} else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&
532
qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
533
merge = (merge & ~QPU_WS) | (a & QPU_WS);
534
} else {
535
if ((a & QPU_WS) != (b & QPU_WS))
536
return 0;
537
}
538
539
if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {
540
/* If one instruction has PM bit set and the other not, the
541
* one without PM shouldn't do packing/unpacking, and we
542
* have to make sure non-NOP packing/unpacking from PM
543
* instruction aren't added to it.
544
*/
545
uint64_t temp;
546
547
/* Let a be the one with PM bit */
548
if (!(a & QPU_PM)) {
549
temp = a;
550
a = b;
551
b = temp;
552
}
553
554
if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)
555
return 0;
556
557
if ((a & QPU_PACK_MASK) != 0 &&
558
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
559
return 0;
560
561
if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))
562
return 0;
563
} else {
564
/* packing: Make sure that non-NOP packs agree, then deal with
565
* special-case failing of adding a non-NOP pack to something
566
* with a NOP pack.
567
*/
568
if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))
569
return 0;
570
bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=
571
QPU_GET_FIELD(merge, QPU_PACK));
572
bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=
573
QPU_GET_FIELD(merge, QPU_PACK));
574
if (!(merge & QPU_PM)) {
575
/* Make sure we're not going to be putting a new
576
* a-file packing on either half.
577
*/
578
if (new_a_pack && writes_a_file(a))
579
return 0;
580
581
if (new_b_pack && writes_a_file(b))
582
return 0;
583
} else {
584
/* Make sure we're not going to be putting new MUL
585
* packing on either half.
586
*/
587
if (new_a_pack &&
588
QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)
589
return 0;
590
591
if (new_b_pack &&
592
QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
593
return 0;
594
}
595
596
/* unpacking: Make sure that non-NOP unpacks agree, then deal
597
* with special-case failing of adding a non-NOP unpack to
598
* something with a NOP unpack.
599
*/
600
if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))
601
return 0;
602
bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=
603
QPU_GET_FIELD(merge, QPU_UNPACK));
604
bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=
605
QPU_GET_FIELD(merge, QPU_UNPACK));
606
if (!(merge & QPU_PM)) {
607
/* Make sure we're not going to be putting a new
608
* a-file packing on either half.
609
*/
610
if (new_a_unpack &&
611
QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)
612
return 0;
613
614
if (new_b_unpack &&
615
QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)
616
return 0;
617
} else {
618
/* Make sure we're not going to be putting new r4
619
* unpack on either half.
620
*/
621
if (new_a_unpack && reads_r4(a))
622
return 0;
623
624
if (new_b_unpack && reads_r4(b))
625
return 0;
626
}
627
}
628
629
if (ok)
630
return merge;
631
else
632
return 0;
633
}
634
635
uint64_t
636
qpu_set_sig(uint64_t inst, uint32_t sig)
637
{
638
assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);
639
return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);
640
}
641
642
uint64_t
643
qpu_set_cond_add(uint64_t inst, uint32_t cond)
644
{
645
assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);
646
return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);
647
}
648
649
uint64_t
650
qpu_set_cond_mul(uint64_t inst, uint32_t cond)
651
{
652
assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);
653
return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);
654
}
655
656
bool
657
qpu_waddr_is_tlb(uint32_t waddr)
658
{
659
switch (waddr) {
660
case QPU_W_TLB_COLOR_ALL:
661
case QPU_W_TLB_COLOR_MS:
662
case QPU_W_TLB_Z:
663
return true;
664
default:
665
return false;
666
}
667
}
668
669
bool
670
qpu_inst_is_tlb(uint64_t inst)
671
{
672
uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
673
674
return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
675
qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
676
sig == QPU_SIG_COLOR_LOAD ||
677
sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
678
}
679
680
/**
681
* Returns the small immediate value to be encoded in to the raddr b field if
682
* the argument can be represented as one, or ~0 otherwise.
683
*/
684
uint32_t
685
qpu_encode_small_immediate(uint32_t i)
686
{
687
if (i <= 15)
688
return i;
689
if ((int)i < 0 && (int)i >= -16)
690
return i + 32;
691
692
switch (i) {
693
case 0x3f800000:
694
return 32;
695
case 0x40000000:
696
return 33;
697
case 0x40800000:
698
return 34;
699
case 0x41000000:
700
return 35;
701
case 0x41800000:
702
return 36;
703
case 0x42000000:
704
return 37;
705
case 0x42800000:
706
return 38;
707
case 0x43000000:
708
return 39;
709
case 0x3b800000:
710
return 40;
711
case 0x3c000000:
712
return 41;
713
case 0x3c800000:
714
return 42;
715
case 0x3d000000:
716
return 43;
717
case 0x3d800000:
718
return 44;
719
case 0x3e000000:
720
return 45;
721
case 0x3e800000:
722
return 46;
723
case 0x3f000000:
724
return 47;
725
}
726
727
return ~0;
728
}
729
730
void
731
qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
732
{
733
if (c->qpu_inst_count >= c->qpu_inst_size) {
734
c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);
735
c->qpu_insts = reralloc(c, c->qpu_insts,
736
uint64_t, c->qpu_inst_size);
737
}
738
c->qpu_insts[c->qpu_inst_count++] = inst;
739
}
740
741