Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/qpu/qpu_instr.c
4560 views
1
/*
2
* Copyright © 2016 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <stdlib.h>
25
#include <string.h>
26
#include "util/macros.h"
27
#include "broadcom/common/v3d_device_info.h"
28
#include "qpu_instr.h"
29
30
const char *
31
v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,
32
enum v3d_qpu_waddr waddr)
33
{
34
/* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */
35
if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)
36
return "tmu";
37
38
static const char *waddr_magic[] = {
39
[V3D_QPU_WADDR_R0] = "r0",
40
[V3D_QPU_WADDR_R1] = "r1",
41
[V3D_QPU_WADDR_R2] = "r2",
42
[V3D_QPU_WADDR_R3] = "r3",
43
[V3D_QPU_WADDR_R4] = "r4",
44
[V3D_QPU_WADDR_R5] = "r5",
45
[V3D_QPU_WADDR_NOP] = "-",
46
[V3D_QPU_WADDR_TLB] = "tlb",
47
[V3D_QPU_WADDR_TLBU] = "tlbu",
48
[V3D_QPU_WADDR_UNIFA] = "unifa",
49
[V3D_QPU_WADDR_TMUL] = "tmul",
50
[V3D_QPU_WADDR_TMUD] = "tmud",
51
[V3D_QPU_WADDR_TMUA] = "tmua",
52
[V3D_QPU_WADDR_TMUAU] = "tmuau",
53
[V3D_QPU_WADDR_VPM] = "vpm",
54
[V3D_QPU_WADDR_VPMU] = "vpmu",
55
[V3D_QPU_WADDR_SYNC] = "sync",
56
[V3D_QPU_WADDR_SYNCU] = "syncu",
57
[V3D_QPU_WADDR_SYNCB] = "syncb",
58
[V3D_QPU_WADDR_RECIP] = "recip",
59
[V3D_QPU_WADDR_RSQRT] = "rsqrt",
60
[V3D_QPU_WADDR_EXP] = "exp",
61
[V3D_QPU_WADDR_LOG] = "log",
62
[V3D_QPU_WADDR_SIN] = "sin",
63
[V3D_QPU_WADDR_RSQRT2] = "rsqrt2",
64
[V3D_QPU_WADDR_TMUC] = "tmuc",
65
[V3D_QPU_WADDR_TMUS] = "tmus",
66
[V3D_QPU_WADDR_TMUT] = "tmut",
67
[V3D_QPU_WADDR_TMUR] = "tmur",
68
[V3D_QPU_WADDR_TMUI] = "tmui",
69
[V3D_QPU_WADDR_TMUB] = "tmub",
70
[V3D_QPU_WADDR_TMUDREF] = "tmudref",
71
[V3D_QPU_WADDR_TMUOFF] = "tmuoff",
72
[V3D_QPU_WADDR_TMUSCM] = "tmuscm",
73
[V3D_QPU_WADDR_TMUSF] = "tmusf",
74
[V3D_QPU_WADDR_TMUSLOD] = "tmuslod",
75
[V3D_QPU_WADDR_TMUHS] = "tmuhs",
76
[V3D_QPU_WADDR_TMUHSCM] = "tmuscm",
77
[V3D_QPU_WADDR_TMUHSF] = "tmuhsf",
78
[V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",
79
[V3D_QPU_WADDR_R5REP] = "r5rep",
80
};
81
82
return waddr_magic[waddr];
83
}
84
85
const char *
86
v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
87
{
88
static const char *op_names[] = {
89
[V3D_QPU_A_FADD] = "fadd",
90
[V3D_QPU_A_FADDNF] = "faddnf",
91
[V3D_QPU_A_VFPACK] = "vfpack",
92
[V3D_QPU_A_ADD] = "add",
93
[V3D_QPU_A_SUB] = "sub",
94
[V3D_QPU_A_FSUB] = "fsub",
95
[V3D_QPU_A_MIN] = "min",
96
[V3D_QPU_A_MAX] = "max",
97
[V3D_QPU_A_UMIN] = "umin",
98
[V3D_QPU_A_UMAX] = "umax",
99
[V3D_QPU_A_SHL] = "shl",
100
[V3D_QPU_A_SHR] = "shr",
101
[V3D_QPU_A_ASR] = "asr",
102
[V3D_QPU_A_ROR] = "ror",
103
[V3D_QPU_A_FMIN] = "fmin",
104
[V3D_QPU_A_FMAX] = "fmax",
105
[V3D_QPU_A_VFMIN] = "vfmin",
106
[V3D_QPU_A_AND] = "and",
107
[V3D_QPU_A_OR] = "or",
108
[V3D_QPU_A_XOR] = "xor",
109
[V3D_QPU_A_VADD] = "vadd",
110
[V3D_QPU_A_VSUB] = "vsub",
111
[V3D_QPU_A_NOT] = "not",
112
[V3D_QPU_A_NEG] = "neg",
113
[V3D_QPU_A_FLAPUSH] = "flapush",
114
[V3D_QPU_A_FLBPUSH] = "flbpush",
115
[V3D_QPU_A_FLPOP] = "flpop",
116
[V3D_QPU_A_RECIP] = "recip",
117
[V3D_QPU_A_SETMSF] = "setmsf",
118
[V3D_QPU_A_SETREVF] = "setrevf",
119
[V3D_QPU_A_NOP] = "nop",
120
[V3D_QPU_A_TIDX] = "tidx",
121
[V3D_QPU_A_EIDX] = "eidx",
122
[V3D_QPU_A_LR] = "lr",
123
[V3D_QPU_A_VFLA] = "vfla",
124
[V3D_QPU_A_VFLNA] = "vflna",
125
[V3D_QPU_A_VFLB] = "vflb",
126
[V3D_QPU_A_VFLNB] = "vflnb",
127
[V3D_QPU_A_FXCD] = "fxcd",
128
[V3D_QPU_A_XCD] = "xcd",
129
[V3D_QPU_A_FYCD] = "fycd",
130
[V3D_QPU_A_YCD] = "ycd",
131
[V3D_QPU_A_MSF] = "msf",
132
[V3D_QPU_A_REVF] = "revf",
133
[V3D_QPU_A_VDWWT] = "vdwwt",
134
[V3D_QPU_A_IID] = "iid",
135
[V3D_QPU_A_SAMPID] = "sampid",
136
[V3D_QPU_A_BARRIERID] = "barrierid",
137
[V3D_QPU_A_TMUWT] = "tmuwt",
138
[V3D_QPU_A_VPMSETUP] = "vpmsetup",
139
[V3D_QPU_A_VPMWT] = "vpmwt",
140
[V3D_QPU_A_FLAFIRST] = "flafirst",
141
[V3D_QPU_A_FLNAFIRST] = "flnafirst",
142
[V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
143
[V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
144
[V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
145
[V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
146
[V3D_QPU_A_LDVPMP] = "ldvpmp",
147
[V3D_QPU_A_RSQRT] = "rsqrt",
148
[V3D_QPU_A_EXP] = "exp",
149
[V3D_QPU_A_LOG] = "log",
150
[V3D_QPU_A_SIN] = "sin",
151
[V3D_QPU_A_RSQRT2] = "rsqrt2",
152
[V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
153
[V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
154
[V3D_QPU_A_FCMP] = "fcmp",
155
[V3D_QPU_A_VFMAX] = "vfmax",
156
[V3D_QPU_A_FROUND] = "fround",
157
[V3D_QPU_A_FTOIN] = "ftoin",
158
[V3D_QPU_A_FTRUNC] = "ftrunc",
159
[V3D_QPU_A_FTOIZ] = "ftoiz",
160
[V3D_QPU_A_FFLOOR] = "ffloor",
161
[V3D_QPU_A_FTOUZ] = "ftouz",
162
[V3D_QPU_A_FCEIL] = "fceil",
163
[V3D_QPU_A_FTOC] = "ftoc",
164
[V3D_QPU_A_FDX] = "fdx",
165
[V3D_QPU_A_FDY] = "fdy",
166
[V3D_QPU_A_STVPMV] = "stvpmv",
167
[V3D_QPU_A_STVPMD] = "stvpmd",
168
[V3D_QPU_A_STVPMP] = "stvpmp",
169
[V3D_QPU_A_ITOF] = "itof",
170
[V3D_QPU_A_CLZ] = "clz",
171
[V3D_QPU_A_UTOF] = "utof",
172
};
173
174
if (op >= ARRAY_SIZE(op_names))
175
return NULL;
176
177
return op_names[op];
178
}
179
180
const char *
181
v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)
182
{
183
static const char *op_names[] = {
184
[V3D_QPU_M_ADD] = "add",
185
[V3D_QPU_M_SUB] = "sub",
186
[V3D_QPU_M_UMUL24] = "umul24",
187
[V3D_QPU_M_VFMUL] = "vfmul",
188
[V3D_QPU_M_SMUL24] = "smul24",
189
[V3D_QPU_M_MULTOP] = "multop",
190
[V3D_QPU_M_FMOV] = "fmov",
191
[V3D_QPU_M_MOV] = "mov",
192
[V3D_QPU_M_NOP] = "nop",
193
[V3D_QPU_M_FMUL] = "fmul",
194
};
195
196
if (op >= ARRAY_SIZE(op_names))
197
return NULL;
198
199
return op_names[op];
200
}
201
202
const char *
203
v3d_qpu_cond_name(enum v3d_qpu_cond cond)
204
{
205
switch (cond) {
206
case V3D_QPU_COND_NONE:
207
return "";
208
case V3D_QPU_COND_IFA:
209
return ".ifa";
210
case V3D_QPU_COND_IFB:
211
return ".ifb";
212
case V3D_QPU_COND_IFNA:
213
return ".ifna";
214
case V3D_QPU_COND_IFNB:
215
return ".ifnb";
216
default:
217
unreachable("bad cond value");
218
}
219
}
220
221
const char *
222
v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)
223
{
224
switch (cond) {
225
case V3D_QPU_BRANCH_COND_ALWAYS:
226
return "";
227
case V3D_QPU_BRANCH_COND_A0:
228
return ".a0";
229
case V3D_QPU_BRANCH_COND_NA0:
230
return ".na0";
231
case V3D_QPU_BRANCH_COND_ALLA:
232
return ".alla";
233
case V3D_QPU_BRANCH_COND_ANYNA:
234
return ".anyna";
235
case V3D_QPU_BRANCH_COND_ANYA:
236
return ".anya";
237
case V3D_QPU_BRANCH_COND_ALLNA:
238
return ".allna";
239
default:
240
unreachable("bad branch cond value");
241
}
242
}
243
244
const char *
245
v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)
246
{
247
switch (msfign) {
248
case V3D_QPU_MSFIGN_NONE:
249
return "";
250
case V3D_QPU_MSFIGN_P:
251
return "p";
252
case V3D_QPU_MSFIGN_Q:
253
return "q";
254
default:
255
unreachable("bad branch cond value");
256
}
257
}
258
259
const char *
260
v3d_qpu_pf_name(enum v3d_qpu_pf pf)
261
{
262
switch (pf) {
263
case V3D_QPU_PF_NONE:
264
return "";
265
case V3D_QPU_PF_PUSHZ:
266
return ".pushz";
267
case V3D_QPU_PF_PUSHN:
268
return ".pushn";
269
case V3D_QPU_PF_PUSHC:
270
return ".pushc";
271
default:
272
unreachable("bad pf value");
273
}
274
}
275
276
const char *
277
v3d_qpu_uf_name(enum v3d_qpu_uf uf)
278
{
279
switch (uf) {
280
case V3D_QPU_UF_NONE:
281
return "";
282
case V3D_QPU_UF_ANDZ:
283
return ".andz";
284
case V3D_QPU_UF_ANDNZ:
285
return ".andnz";
286
case V3D_QPU_UF_NORZ:
287
return ".norz";
288
case V3D_QPU_UF_NORNZ:
289
return ".nornz";
290
case V3D_QPU_UF_ANDN:
291
return ".andn";
292
case V3D_QPU_UF_ANDNN:
293
return ".andnn";
294
case V3D_QPU_UF_NORN:
295
return ".norn";
296
case V3D_QPU_UF_NORNN:
297
return ".nornn";
298
case V3D_QPU_UF_ANDC:
299
return ".andc";
300
case V3D_QPU_UF_ANDNC:
301
return ".andnc";
302
case V3D_QPU_UF_NORC:
303
return ".norc";
304
case V3D_QPU_UF_NORNC:
305
return ".nornc";
306
default:
307
unreachable("bad pf value");
308
}
309
}
310
311
const char *
312
v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)
313
{
314
switch (pack) {
315
case V3D_QPU_PACK_NONE:
316
return "";
317
case V3D_QPU_PACK_L:
318
return ".l";
319
case V3D_QPU_PACK_H:
320
return ".h";
321
default:
322
unreachable("bad pack value");
323
}
324
}
325
326
const char *
327
v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)
328
{
329
switch (unpack) {
330
case V3D_QPU_UNPACK_NONE:
331
return "";
332
case V3D_QPU_UNPACK_L:
333
return ".l";
334
case V3D_QPU_UNPACK_H:
335
return ".h";
336
case V3D_QPU_UNPACK_ABS:
337
return ".abs";
338
case V3D_QPU_UNPACK_REPLICATE_32F_16:
339
return ".ff";
340
case V3D_QPU_UNPACK_REPLICATE_L_16:
341
return ".ll";
342
case V3D_QPU_UNPACK_REPLICATE_H_16:
343
return ".hh";
344
case V3D_QPU_UNPACK_SWAP_16:
345
return ".swp";
346
default:
347
unreachable("bad unpack value");
348
}
349
}
350
351
#define D 1
352
#define A 2
353
#define B 4
354
static const uint8_t add_op_args[] = {
355
[V3D_QPU_A_FADD] = D | A | B,
356
[V3D_QPU_A_FADDNF] = D | A | B,
357
[V3D_QPU_A_VFPACK] = D | A | B,
358
[V3D_QPU_A_ADD] = D | A | B,
359
[V3D_QPU_A_VFPACK] = D | A | B,
360
[V3D_QPU_A_SUB] = D | A | B,
361
[V3D_QPU_A_VFPACK] = D | A | B,
362
[V3D_QPU_A_FSUB] = D | A | B,
363
[V3D_QPU_A_MIN] = D | A | B,
364
[V3D_QPU_A_MAX] = D | A | B,
365
[V3D_QPU_A_UMIN] = D | A | B,
366
[V3D_QPU_A_UMAX] = D | A | B,
367
[V3D_QPU_A_SHL] = D | A | B,
368
[V3D_QPU_A_SHR] = D | A | B,
369
[V3D_QPU_A_ASR] = D | A | B,
370
[V3D_QPU_A_ROR] = D | A | B,
371
[V3D_QPU_A_FMIN] = D | A | B,
372
[V3D_QPU_A_FMAX] = D | A | B,
373
[V3D_QPU_A_VFMIN] = D | A | B,
374
375
[V3D_QPU_A_AND] = D | A | B,
376
[V3D_QPU_A_OR] = D | A | B,
377
[V3D_QPU_A_XOR] = D | A | B,
378
379
[V3D_QPU_A_VADD] = D | A | B,
380
[V3D_QPU_A_VSUB] = D | A | B,
381
[V3D_QPU_A_NOT] = D | A,
382
[V3D_QPU_A_NEG] = D | A,
383
[V3D_QPU_A_FLAPUSH] = D | A,
384
[V3D_QPU_A_FLBPUSH] = D | A,
385
[V3D_QPU_A_FLPOP] = D | A,
386
[V3D_QPU_A_RECIP] = D | A,
387
[V3D_QPU_A_SETMSF] = D | A,
388
[V3D_QPU_A_SETREVF] = D | A,
389
[V3D_QPU_A_NOP] = 0,
390
[V3D_QPU_A_TIDX] = D,
391
[V3D_QPU_A_EIDX] = D,
392
[V3D_QPU_A_LR] = D,
393
[V3D_QPU_A_VFLA] = D,
394
[V3D_QPU_A_VFLNA] = D,
395
[V3D_QPU_A_VFLB] = D,
396
[V3D_QPU_A_VFLNB] = D,
397
398
[V3D_QPU_A_FXCD] = D,
399
[V3D_QPU_A_XCD] = D,
400
[V3D_QPU_A_FYCD] = D,
401
[V3D_QPU_A_YCD] = D,
402
403
[V3D_QPU_A_MSF] = D,
404
[V3D_QPU_A_REVF] = D,
405
[V3D_QPU_A_VDWWT] = D,
406
[V3D_QPU_A_IID] = D,
407
[V3D_QPU_A_SAMPID] = D,
408
[V3D_QPU_A_BARRIERID] = D,
409
[V3D_QPU_A_TMUWT] = D,
410
[V3D_QPU_A_VPMWT] = D,
411
[V3D_QPU_A_FLAFIRST] = D,
412
[V3D_QPU_A_FLNAFIRST] = D,
413
414
[V3D_QPU_A_VPMSETUP] = D | A,
415
416
[V3D_QPU_A_LDVPMV_IN] = D | A,
417
[V3D_QPU_A_LDVPMV_OUT] = D | A,
418
[V3D_QPU_A_LDVPMD_IN] = D | A,
419
[V3D_QPU_A_LDVPMD_OUT] = D | A,
420
[V3D_QPU_A_LDVPMP] = D | A,
421
[V3D_QPU_A_RSQRT] = D | A,
422
[V3D_QPU_A_EXP] = D | A,
423
[V3D_QPU_A_LOG] = D | A,
424
[V3D_QPU_A_SIN] = D | A,
425
[V3D_QPU_A_RSQRT2] = D | A,
426
[V3D_QPU_A_LDVPMG_IN] = D | A | B,
427
[V3D_QPU_A_LDVPMG_OUT] = D | A | B,
428
429
/* FIXME: MOVABSNEG */
430
431
[V3D_QPU_A_FCMP] = D | A | B,
432
[V3D_QPU_A_VFMAX] = D | A | B,
433
434
[V3D_QPU_A_FROUND] = D | A,
435
[V3D_QPU_A_FTOIN] = D | A,
436
[V3D_QPU_A_FTRUNC] = D | A,
437
[V3D_QPU_A_FTOIZ] = D | A,
438
[V3D_QPU_A_FFLOOR] = D | A,
439
[V3D_QPU_A_FTOUZ] = D | A,
440
[V3D_QPU_A_FCEIL] = D | A,
441
[V3D_QPU_A_FTOC] = D | A,
442
443
[V3D_QPU_A_FDX] = D | A,
444
[V3D_QPU_A_FDY] = D | A,
445
446
[V3D_QPU_A_STVPMV] = A | B,
447
[V3D_QPU_A_STVPMD] = A | B,
448
[V3D_QPU_A_STVPMP] = A | B,
449
450
[V3D_QPU_A_ITOF] = D | A,
451
[V3D_QPU_A_CLZ] = D | A,
452
[V3D_QPU_A_UTOF] = D | A,
453
};
454
455
static const uint8_t mul_op_args[] = {
456
[V3D_QPU_M_ADD] = D | A | B,
457
[V3D_QPU_M_SUB] = D | A | B,
458
[V3D_QPU_M_UMUL24] = D | A | B,
459
[V3D_QPU_M_VFMUL] = D | A | B,
460
[V3D_QPU_M_SMUL24] = D | A | B,
461
[V3D_QPU_M_MULTOP] = D | A | B,
462
[V3D_QPU_M_FMOV] = D | A,
463
[V3D_QPU_M_NOP] = 0,
464
[V3D_QPU_M_MOV] = D | A,
465
[V3D_QPU_M_FMUL] = D | A | B,
466
};
467
468
bool
469
v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)
470
{
471
assert(op < ARRAY_SIZE(add_op_args));
472
473
return add_op_args[op] & D;
474
}
475
476
bool
477
v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)
478
{
479
assert(op < ARRAY_SIZE(mul_op_args));
480
481
return mul_op_args[op] & D;
482
}
483
484
int
485
v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)
486
{
487
assert(op < ARRAY_SIZE(add_op_args));
488
489
uint8_t args = add_op_args[op];
490
if (args & B)
491
return 2;
492
else if (args & A)
493
return 1;
494
else
495
return 0;
496
}
497
498
int
499
v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)
500
{
501
assert(op < ARRAY_SIZE(mul_op_args));
502
503
uint8_t args = mul_op_args[op];
504
if (args & B)
505
return 2;
506
else if (args & A)
507
return 1;
508
else
509
return 0;
510
}
511
512
enum v3d_qpu_cond
513
v3d_qpu_cond_invert(enum v3d_qpu_cond cond)
514
{
515
switch (cond) {
516
case V3D_QPU_COND_IFA:
517
return V3D_QPU_COND_IFNA;
518
case V3D_QPU_COND_IFNA:
519
return V3D_QPU_COND_IFA;
520
case V3D_QPU_COND_IFB:
521
return V3D_QPU_COND_IFNB;
522
case V3D_QPU_COND_IFNB:
523
return V3D_QPU_COND_IFB;
524
default:
525
unreachable("Non-invertible cond");
526
}
527
}
528
529
bool
530
v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)
531
{
532
switch (waddr) {
533
case V3D_QPU_WADDR_RECIP:
534
case V3D_QPU_WADDR_RSQRT:
535
case V3D_QPU_WADDR_EXP:
536
case V3D_QPU_WADDR_LOG:
537
case V3D_QPU_WADDR_SIN:
538
case V3D_QPU_WADDR_RSQRT2:
539
return true;
540
default:
541
return false;
542
}
543
}
544
545
bool
546
v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,
547
enum v3d_qpu_waddr waddr)
548
{
549
if (devinfo->ver >= 40) {
550
return ((waddr >= V3D_QPU_WADDR_TMUD &&
551
waddr <= V3D_QPU_WADDR_TMUAU) ||
552
(waddr >= V3D_QPU_WADDR_TMUC &&
553
waddr <= V3D_QPU_WADDR_TMUHSLOD));
554
} else {
555
return ((waddr >= V3D_QPU_WADDR_TMU &&
556
waddr <= V3D_QPU_WADDR_TMUAU) ||
557
(waddr >= V3D_QPU_WADDR_TMUC &&
558
waddr <= V3D_QPU_WADDR_TMUHSLOD));
559
}
560
}
561
562
bool
563
v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)
564
{
565
return (inst->sig.ldtmu ||
566
(inst->type == V3D_QPU_INSTR_TYPE_ALU &&
567
inst->alu.add.op == V3D_QPU_A_TMUWT));
568
}
569
570
bool
571
v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)
572
{
573
return (waddr == V3D_QPU_WADDR_TLB ||
574
waddr == V3D_QPU_WADDR_TLBU);
575
}
576
577
bool
578
v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)
579
{
580
return (waddr == V3D_QPU_WADDR_VPM ||
581
waddr == V3D_QPU_WADDR_VPMU);
582
}
583
584
bool
585
v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
586
{
587
return (waddr == V3D_QPU_WADDR_SYNC ||
588
waddr == V3D_QPU_WADDR_SYNCB ||
589
waddr == V3D_QPU_WADDR_SYNCU);
590
}
591
592
bool
593
v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)
594
{
595
switch (waddr) {
596
case V3D_QPU_WADDR_VPMU:
597
case V3D_QPU_WADDR_TLBU:
598
case V3D_QPU_WADDR_TMUAU:
599
case V3D_QPU_WADDR_SYNCU:
600
return true;
601
default:
602
return false;
603
}
604
}
605
606
static bool
607
v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)
608
{
609
switch (op) {
610
case V3D_QPU_A_VPMSETUP:
611
case V3D_QPU_A_LDVPMV_IN:
612
case V3D_QPU_A_LDVPMV_OUT:
613
case V3D_QPU_A_LDVPMD_IN:
614
case V3D_QPU_A_LDVPMD_OUT:
615
case V3D_QPU_A_LDVPMP:
616
case V3D_QPU_A_LDVPMG_IN:
617
case V3D_QPU_A_LDVPMG_OUT:
618
return true;
619
default:
620
return false;
621
}
622
}
623
624
static bool
625
v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)
626
{
627
switch (op) {
628
case V3D_QPU_A_VPMSETUP:
629
case V3D_QPU_A_STVPMV:
630
case V3D_QPU_A_STVPMD:
631
case V3D_QPU_A_STVPMP:
632
return true;
633
default:
634
return false;
635
}
636
}
637
638
bool
639
v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)
640
{
641
if (inst->sig.ldtlb ||
642
inst->sig.ldtlbu)
643
return true;
644
645
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
646
if (inst->alu.add.magic_write &&
647
v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {
648
return true;
649
}
650
651
if (inst->alu.mul.magic_write &&
652
v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {
653
return true;
654
}
655
}
656
657
return false;
658
}
659
660
bool
661
v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)
662
{
663
if (v3d_qpu_instr_is_sfu(inst))
664
return true;
665
666
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
667
if (inst->alu.add.magic_write &&
668
v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {
669
return true;
670
}
671
672
if (inst->alu.mul.magic_write &&
673
v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {
674
return true;
675
}
676
}
677
678
return false;
679
}
680
681
bool
682
v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)
683
{
684
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
685
switch (inst->alu.add.op) {
686
case V3D_QPU_A_RECIP:
687
case V3D_QPU_A_RSQRT:
688
case V3D_QPU_A_EXP:
689
case V3D_QPU_A_LOG:
690
case V3D_QPU_A_SIN:
691
case V3D_QPU_A_RSQRT2:
692
return true;
693
default:
694
return false;
695
}
696
}
697
return false;
698
}
699
700
bool
701
v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,
702
const struct v3d_qpu_instr *inst)
703
{
704
return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&
705
((inst->alu.add.magic_write &&
706
v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||
707
(inst->alu.mul.magic_write &&
708
v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));
709
}
710
711
bool
712
v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,
713
const struct v3d_qpu_instr *inst)
714
{
715
return v3d_qpu_writes_tmu(devinfo, inst) &&
716
(!inst->alu.add.magic_write ||
717
inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&
718
(!inst->alu.mul.magic_write ||
719
inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);
720
}
721
722
bool
723
v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)
724
{
725
if (inst->sig.ldvpm)
726
return true;
727
728
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
729
if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))
730
return true;
731
}
732
733
return false;
734
}
735
736
bool
737
v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)
738
{
739
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
740
if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))
741
return true;
742
743
if (inst->alu.add.magic_write &&
744
v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
745
return true;
746
}
747
748
if (inst->alu.mul.magic_write &&
749
v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
750
return true;
751
}
752
}
753
754
return false;
755
}
756
757
bool
758
v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,
759
const struct v3d_qpu_instr *inst)
760
{
761
if (devinfo->ver < 40)
762
return false;
763
764
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
765
if (inst->alu.add.op != V3D_QPU_A_NOP &&
766
inst->alu.add.magic_write &&
767
inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {
768
return true;
769
}
770
771
if (inst->alu.mul.op != V3D_QPU_M_NOP &&
772
inst->alu.mul.magic_write &&
773
inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {
774
return true;
775
}
776
}
777
778
return false;
779
}
780
781
static bool
782
v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)
783
{
784
return inst->type == V3D_QPU_INSTR_TYPE_ALU &&
785
inst->alu.add.op == V3D_QPU_A_VPMWT;
786
}
787
788
bool
789
v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)
790
{
791
return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);
792
}
793
794
bool
795
v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
796
{
797
return v3d_qpu_reads_vpm(inst) ||
798
v3d_qpu_writes_vpm(inst) ||
799
v3d_qpu_waits_vpm(inst);
800
}
801
802
static bool
803
qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,
804
const struct v3d_qpu_instr *inst,
805
uint32_t waddr)
806
{
807
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
808
if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr)
809
return true;
810
811
if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)
812
return true;
813
}
814
815
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
816
inst->sig_magic && inst->sig_addr == waddr) {
817
return true;
818
}
819
820
return false;
821
}
822
823
bool
824
v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
825
const struct v3d_qpu_instr *inst)
826
{
827
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))
828
return true;
829
830
return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;
831
}
832
833
bool
834
v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
835
const struct v3d_qpu_instr *inst)
836
{
837
if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
838
if (inst->alu.add.magic_write &&
839
(inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
840
v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {
841
return true;
842
}
843
844
if (inst->alu.mul.magic_write &&
845
(inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||
846
v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {
847
return true;
848
}
849
}
850
851
if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
852
if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
853
return true;
854
} else if (inst->sig.ldtmu) {
855
return true;
856
}
857
858
return false;
859
}
860
861
bool
862
v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
863
const struct v3d_qpu_instr *inst)
864
{
865
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))
866
return true;
867
868
return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;
869
}
870
871
bool
872
v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,
873
const struct v3d_qpu_instr *inst)
874
{
875
if (v3d_qpu_writes_r5(devinfo, inst))
876
return true;
877
if (v3d_qpu_writes_r4(devinfo, inst))
878
return true;
879
if (v3d_qpu_writes_r3(devinfo, inst))
880
return true;
881
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))
882
return true;
883
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))
884
return true;
885
if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))
886
return true;
887
888
return false;
889
}
890
891
bool
892
v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)
893
{
894
int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);
895
int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);
896
897
return ((add_nsrc > 0 && inst->alu.add.a == mux) ||
898
(add_nsrc > 1 && inst->alu.add.b == mux) ||
899
(mul_nsrc > 0 && inst->alu.mul.a == mux) ||
900
(mul_nsrc > 1 && inst->alu.mul.b == mux));
901
}
902
903
bool
904
v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
905
const struct v3d_qpu_sig *sig)
906
{
907
if (devinfo->ver < 41)
908
return false;
909
910
return (sig->ldunifrf ||
911
sig->ldunifarf ||
912
sig->ldvary ||
913
sig->ldtmu ||
914
sig->ldtlb ||
915
sig->ldtlbu);
916
}
917
918
bool
919
v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)
920
{
921
if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {
922
return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;
923
} else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
924
if (inst->flags.ac != V3D_QPU_COND_NONE ||
925
inst->flags.mc != V3D_QPU_COND_NONE ||
926
inst->flags.auf != V3D_QPU_UF_NONE ||
927
inst->flags.muf != V3D_QPU_UF_NONE)
928
return true;
929
930
switch (inst->alu.add.op) {
931
case V3D_QPU_A_VFLA:
932
case V3D_QPU_A_VFLNA:
933
case V3D_QPU_A_VFLB:
934
case V3D_QPU_A_VFLNB:
935
case V3D_QPU_A_FLAPUSH:
936
case V3D_QPU_A_FLBPUSH:
937
case V3D_QPU_A_FLAFIRST:
938
case V3D_QPU_A_FLNAFIRST:
939
return true;
940
default:
941
break;
942
}
943
}
944
945
return false;
946
}
947
948
bool
949
v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)
950
{
951
if (inst->flags.apf != V3D_QPU_PF_NONE ||
952
inst->flags.mpf != V3D_QPU_PF_NONE ||
953
inst->flags.auf != V3D_QPU_UF_NONE ||
954
inst->flags.muf != V3D_QPU_UF_NONE) {
955
return true;
956
}
957
958
return false;
959
}
960
961
bool
962
v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
963
{
964
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
965
return false;
966
967
switch (inst->alu.add.op) {
968
case V3D_QPU_A_FADD:
969
case V3D_QPU_A_FADDNF:
970
case V3D_QPU_A_FSUB:
971
case V3D_QPU_A_FMIN:
972
case V3D_QPU_A_FMAX:
973
case V3D_QPU_A_FCMP:
974
case V3D_QPU_A_FROUND:
975
case V3D_QPU_A_FTRUNC:
976
case V3D_QPU_A_FFLOOR:
977
case V3D_QPU_A_FCEIL:
978
case V3D_QPU_A_FDX:
979
case V3D_QPU_A_FDY:
980
case V3D_QPU_A_FTOIN:
981
case V3D_QPU_A_FTOIZ:
982
case V3D_QPU_A_FTOUZ:
983
case V3D_QPU_A_FTOC:
984
case V3D_QPU_A_VFPACK:
985
return true;
986
break;
987
default:
988
break;
989
}
990
991
switch (inst->alu.mul.op) {
992
case V3D_QPU_M_FMOV:
993
case V3D_QPU_M_FMUL:
994
return true;
995
break;
996
default:
997
break;
998
}
999
1000
return false;
1001
}
1002
bool
1003
v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
1004
{
1005
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1006
return false;
1007
1008
switch (inst->alu.add.op) {
1009
case V3D_QPU_A_VFMIN:
1010
case V3D_QPU_A_VFMAX:
1011
return true;
1012
break;
1013
default:
1014
break;
1015
}
1016
1017
switch (inst->alu.mul.op) {
1018
case V3D_QPU_M_VFMUL:
1019
return true;
1020
break;
1021
default:
1022
break;
1023
}
1024
1025
return false;
1026
}
1027
1028
bool
1029
v3d_qpu_is_nop(struct v3d_qpu_instr *inst)
1030
{
1031
static const struct v3d_qpu_sig nosig = { 0 };
1032
1033
if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
1034
return false;
1035
if (inst->alu.add.op != V3D_QPU_A_NOP)
1036
return false;
1037
if (inst->alu.mul.op != V3D_QPU_M_NOP)
1038
return false;
1039
if (memcmp(&inst->sig, &nosig, sizeof(nosig)))
1040
return false;
1041
return true;
1042
}
1043
1044