Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/qpu/qpu_pack.c
4560 views
1
/*
2
* Copyright © 2016 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <string.h>
25
#include "util/macros.h"
26
#include "util/bitscan.h"
27
28
#include "broadcom/common/v3d_device_info.h"
29
#include "qpu_instr.h"
30
31
#ifndef QPU_MASK
32
#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33
/* Using the GNU statement expression extension */
34
#define QPU_SET_FIELD(value, field) \
35
({ \
36
uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37
assert((fieldval & ~ field ## _MASK) == 0); \
38
fieldval & field ## _MASK; \
39
})
40
41
#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42
43
#define QPU_UPDATE_FIELD(inst, value, field) \
44
(((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45
#endif /* QPU_MASK */
46
47
#define V3D_QPU_OP_MUL_SHIFT 58
48
#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49
50
#define V3D_QPU_SIG_SHIFT 53
51
#define V3D_QPU_SIG_MASK QPU_MASK(57, 53)
52
53
#define V3D_QPU_COND_SHIFT 46
54
#define V3D_QPU_COND_MASK QPU_MASK(52, 46)
55
#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
56
57
#define V3D_QPU_MM QPU_MASK(45, 45)
58
#define V3D_QPU_MA QPU_MASK(44, 44)
59
60
#define V3D_QPU_WADDR_M_SHIFT 38
61
#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
62
63
#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 35
64
#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
65
66
#define V3D_QPU_WADDR_A_SHIFT 32
67
#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
68
69
#define V3D_QPU_BRANCH_COND_SHIFT 32
70
#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
71
72
#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73
#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
74
75
#define V3D_QPU_OP_ADD_SHIFT 24
76
#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)
77
78
#define V3D_QPU_MUL_B_SHIFT 21
79
#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)
80
81
#define V3D_QPU_BRANCH_MSFIGN_SHIFT 21
82
#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
83
84
#define V3D_QPU_MUL_A_SHIFT 18
85
#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)
86
87
#define V3D_QPU_ADD_B_SHIFT 15
88
#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)
89
90
#define V3D_QPU_BRANCH_BDU_SHIFT 15
91
#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
92
93
#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)
94
95
#define V3D_QPU_ADD_A_SHIFT 12
96
#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)
97
98
#define V3D_QPU_BRANCH_BDI_SHIFT 12
99
#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
100
101
#define V3D_QPU_RADDR_A_SHIFT 6
102
#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)
103
104
#define V3D_QPU_RADDR_B_SHIFT 0
105
#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)
106
107
#define THRSW .thrsw = true
108
#define LDUNIF .ldunif = true
109
#define LDUNIFRF .ldunifrf = true
110
#define LDUNIFA .ldunifa = true
111
#define LDUNIFARF .ldunifarf = true
112
#define LDTMU .ldtmu = true
113
#define LDVARY .ldvary = true
114
#define LDVPM .ldvpm = true
115
#define SMIMM .small_imm = true
116
#define LDTLB .ldtlb = true
117
#define LDTLBU .ldtlbu = true
118
#define UCB .ucb = true
119
#define ROT .rotate = true
120
#define WRTMUC .wrtmuc = true
121
122
static const struct v3d_qpu_sig v33_sig_map[] = {
123
/* MISC R3 R4 R5 */
124
[0] = { },
125
[1] = { THRSW, },
126
[2] = { LDUNIF },
127
[3] = { THRSW, LDUNIF },
128
[4] = { LDTMU, },
129
[5] = { THRSW, LDTMU, },
130
[6] = { LDTMU, LDUNIF },
131
[7] = { THRSW, LDTMU, LDUNIF },
132
[8] = { LDVARY, },
133
[9] = { THRSW, LDVARY, },
134
[10] = { LDVARY, LDUNIF },
135
[11] = { THRSW, LDVARY, LDUNIF },
136
[12] = { LDVARY, LDTMU, },
137
[13] = { THRSW, LDVARY, LDTMU, },
138
[14] = { SMIMM, LDVARY, },
139
[15] = { SMIMM, },
140
[16] = { LDTLB, },
141
[17] = { LDTLBU, },
142
/* 18-21 reserved */
143
[22] = { UCB, },
144
[23] = { ROT, },
145
[24] = { LDVPM, },
146
[25] = { THRSW, LDVPM, },
147
[26] = { LDVPM, LDUNIF },
148
[27] = { THRSW, LDVPM, LDUNIF },
149
[28] = { LDVPM, LDTMU, },
150
[29] = { THRSW, LDVPM, LDTMU, },
151
[30] = { SMIMM, LDVPM, },
152
[31] = { SMIMM, },
153
};
154
155
static const struct v3d_qpu_sig v40_sig_map[] = {
156
/* MISC R3 R4 R5 */
157
[0] = { },
158
[1] = { THRSW, },
159
[2] = { LDUNIF },
160
[3] = { THRSW, LDUNIF },
161
[4] = { LDTMU, },
162
[5] = { THRSW, LDTMU, },
163
[6] = { LDTMU, LDUNIF },
164
[7] = { THRSW, LDTMU, LDUNIF },
165
[8] = { LDVARY, },
166
[9] = { THRSW, LDVARY, },
167
[10] = { LDVARY, LDUNIF },
168
[11] = { THRSW, LDVARY, LDUNIF },
169
/* 12-13 reserved */
170
[14] = { SMIMM, LDVARY, },
171
[15] = { SMIMM, },
172
[16] = { LDTLB, },
173
[17] = { LDTLBU, },
174
[18] = { WRTMUC },
175
[19] = { THRSW, WRTMUC },
176
[20] = { LDVARY, WRTMUC },
177
[21] = { THRSW, LDVARY, WRTMUC },
178
[22] = { UCB, },
179
[23] = { ROT, },
180
/* 24-30 reserved */
181
[31] = { SMIMM, LDTMU, },
182
};
183
184
static const struct v3d_qpu_sig v41_sig_map[] = {
185
/* MISC phys R5 */
186
[0] = { },
187
[1] = { THRSW, },
188
[2] = { LDUNIF },
189
[3] = { THRSW, LDUNIF },
190
[4] = { LDTMU, },
191
[5] = { THRSW, LDTMU, },
192
[6] = { LDTMU, LDUNIF },
193
[7] = { THRSW, LDTMU, LDUNIF },
194
[8] = { LDVARY, },
195
[9] = { THRSW, LDVARY, },
196
[10] = { LDVARY, LDUNIF },
197
[11] = { THRSW, LDVARY, LDUNIF },
198
[12] = { LDUNIFRF },
199
[13] = { THRSW, LDUNIFRF },
200
[14] = { SMIMM, LDVARY, },
201
[15] = { SMIMM, },
202
[16] = { LDTLB, },
203
[17] = { LDTLBU, },
204
[18] = { WRTMUC },
205
[19] = { THRSW, WRTMUC },
206
[20] = { LDVARY, WRTMUC },
207
[21] = { THRSW, LDVARY, WRTMUC },
208
[22] = { UCB, },
209
[23] = { ROT, },
210
/* 24-30 reserved */
211
[24] = { LDUNIFA},
212
[25] = { LDUNIFARF },
213
[31] = { SMIMM, LDTMU, },
214
};
215
216
bool
217
v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218
uint32_t packed_sig,
219
struct v3d_qpu_sig *sig)
220
{
221
if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222
return false;
223
224
if (devinfo->ver >= 41)
225
*sig = v41_sig_map[packed_sig];
226
else if (devinfo->ver == 40)
227
*sig = v40_sig_map[packed_sig];
228
else
229
*sig = v33_sig_map[packed_sig];
230
231
/* Signals with zeroed unpacked contents after element 0 are reserved. */
232
return (packed_sig == 0 ||
233
memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234
}
235
236
bool
237
v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238
const struct v3d_qpu_sig *sig,
239
uint32_t *packed_sig)
240
{
241
static const struct v3d_qpu_sig *map;
242
243
if (devinfo->ver >= 41)
244
map = v41_sig_map;
245
else if (devinfo->ver == 40)
246
map = v40_sig_map;
247
else
248
map = v33_sig_map;
249
250
for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251
if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252
*packed_sig = i;
253
return true;
254
}
255
}
256
257
return false;
258
}
259
static inline unsigned
260
fui( float f )
261
{
262
union {float f; unsigned ui;} fi;
263
fi.f = f;
264
return fi.ui;
265
}
266
267
static const uint32_t small_immediates[] = {
268
0, 1, 2, 3,
269
4, 5, 6, 7,
270
8, 9, 10, 11,
271
12, 13, 14, 15,
272
-16, -15, -14, -13,
273
-12, -11, -10, -9,
274
-8, -7, -6, -5,
275
-4, -3, -2, -1,
276
0x3b800000, /* 2.0^-8 */
277
0x3c000000, /* 2.0^-7 */
278
0x3c800000, /* 2.0^-6 */
279
0x3d000000, /* 2.0^-5 */
280
0x3d800000, /* 2.0^-4 */
281
0x3e000000, /* 2.0^-3 */
282
0x3e800000, /* 2.0^-2 */
283
0x3f000000, /* 2.0^-1 */
284
0x3f800000, /* 2.0^0 */
285
0x40000000, /* 2.0^1 */
286
0x40800000, /* 2.0^2 */
287
0x41000000, /* 2.0^3 */
288
0x41800000, /* 2.0^4 */
289
0x42000000, /* 2.0^5 */
290
0x42800000, /* 2.0^6 */
291
0x43000000, /* 2.0^7 */
292
};
293
294
bool
295
v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
296
uint32_t packed_small_immediate,
297
uint32_t *small_immediate)
298
{
299
if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
300
return false;
301
302
*small_immediate = small_immediates[packed_small_immediate];
303
return true;
304
}
305
306
bool
307
v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
308
uint32_t value,
309
uint32_t *packed_small_immediate)
310
{
311
STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
312
313
for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
314
if (small_immediates[i] == value) {
315
*packed_small_immediate = i;
316
return true;
317
}
318
}
319
320
return false;
321
}
322
323
bool
324
v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
325
uint32_t packed_cond,
326
struct v3d_qpu_flags *cond)
327
{
328
static const enum v3d_qpu_cond cond_map[4] = {
329
[0] = V3D_QPU_COND_IFA,
330
[1] = V3D_QPU_COND_IFB,
331
[2] = V3D_QPU_COND_IFNA,
332
[3] = V3D_QPU_COND_IFNB,
333
};
334
335
cond->ac = V3D_QPU_COND_NONE;
336
cond->mc = V3D_QPU_COND_NONE;
337
cond->apf = V3D_QPU_PF_NONE;
338
cond->mpf = V3D_QPU_PF_NONE;
339
cond->auf = V3D_QPU_UF_NONE;
340
cond->muf = V3D_QPU_UF_NONE;
341
342
if (packed_cond == 0) {
343
return true;
344
} else if (packed_cond >> 2 == 0) {
345
cond->apf = packed_cond & 0x3;
346
} else if (packed_cond >> 4 == 0) {
347
cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
348
} else if (packed_cond == 0x10) {
349
return false;
350
} else if (packed_cond >> 2 == 0x4) {
351
cond->mpf = packed_cond & 0x3;
352
} else if (packed_cond >> 4 == 0x1) {
353
cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
354
} else if (packed_cond >> 4 == 0x2) {
355
cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
356
cond->mpf = packed_cond & 0x3;
357
} else if (packed_cond >> 4 == 0x3) {
358
cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
359
cond->apf = packed_cond & 0x3;
360
} else if (packed_cond >> 6) {
361
cond->mc = cond_map[(packed_cond >> 4) & 0x3];
362
if (((packed_cond >> 2) & 0x3) == 0) {
363
cond->ac = cond_map[packed_cond & 0x3];
364
} else {
365
cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
366
}
367
}
368
369
return true;
370
}
371
372
bool
373
v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
374
const struct v3d_qpu_flags *cond,
375
uint32_t *packed_cond)
376
{
377
#define AC (1 << 0)
378
#define MC (1 << 1)
379
#define APF (1 << 2)
380
#define MPF (1 << 3)
381
#define AUF (1 << 4)
382
#define MUF (1 << 5)
383
static const struct {
384
uint8_t flags_present;
385
uint8_t bits;
386
} flags_table[] = {
387
{ 0, 0 },
388
{ APF, 0 },
389
{ AUF, 0 },
390
{ MPF, (1 << 4) },
391
{ MUF, (1 << 4) },
392
{ AC, (1 << 5) },
393
{ AC | MPF, (1 << 5) },
394
{ MC, (1 << 5) | (1 << 4) },
395
{ MC | APF, (1 << 5) | (1 << 4) },
396
{ MC | AC, (1 << 6) },
397
{ MC | AUF, (1 << 6) },
398
};
399
400
uint8_t flags_present = 0;
401
if (cond->ac != V3D_QPU_COND_NONE)
402
flags_present |= AC;
403
if (cond->mc != V3D_QPU_COND_NONE)
404
flags_present |= MC;
405
if (cond->apf != V3D_QPU_PF_NONE)
406
flags_present |= APF;
407
if (cond->mpf != V3D_QPU_PF_NONE)
408
flags_present |= MPF;
409
if (cond->auf != V3D_QPU_UF_NONE)
410
flags_present |= AUF;
411
if (cond->muf != V3D_QPU_UF_NONE)
412
flags_present |= MUF;
413
414
for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
415
if (flags_table[i].flags_present != flags_present)
416
continue;
417
418
*packed_cond = flags_table[i].bits;
419
420
*packed_cond |= cond->apf;
421
*packed_cond |= cond->mpf;
422
423
if (flags_present & AUF)
424
*packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
425
if (flags_present & MUF)
426
*packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
427
428
if (flags_present & AC)
429
*packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
430
431
if (flags_present & MC) {
432
if (*packed_cond & (1 << 6))
433
*packed_cond |= (cond->mc -
434
V3D_QPU_COND_IFA) << 4;
435
else
436
*packed_cond |= (cond->mc -
437
V3D_QPU_COND_IFA) << 2;
438
}
439
440
return true;
441
}
442
443
return false;
444
}
445
446
/* Make a mapping of the table of opcodes in the spec. The opcode is
447
* determined by a combination of the opcode field, and in the case of 0 or
448
* 1-arg opcodes, the mux_b field as well.
449
*/
450
#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451
#define ANYMUX MUX_MASK(0, 7)
452
453
struct opcode_desc {
454
uint8_t opcode_first;
455
uint8_t opcode_last;
456
uint8_t mux_b_mask;
457
uint8_t mux_a_mask;
458
uint8_t op;
459
/* 0 if it's the same across V3D versions, or a specific V3D version. */
460
uint8_t ver;
461
};
462
463
static const struct opcode_desc add_ops[] = {
464
/* FADD is FADDNF depending on the order of the mux_a/mux_b. */
465
{ 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
466
{ 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
467
{ 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
468
{ 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
469
{ 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
470
{ 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
471
{ 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
472
{ 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
473
{ 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
474
{ 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
475
{ 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
476
{ 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
477
{ 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
478
{ 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
479
{ 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
480
{ 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
481
/* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
482
{ 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
483
{ 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
484
{ 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
485
486
{ 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
487
{ 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
488
{ 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
489
490
{ 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
491
{ 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
492
{ 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
493
{ 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
494
{ 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
495
{ 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
496
{ 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
497
{ 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
498
{ 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
499
{ 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
500
{ 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
501
{ 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
502
{ 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
503
{ 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
504
{ 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
505
{ 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
506
{ 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
507
{ 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
508
509
{ 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
510
{ 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
511
{ 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
512
{ 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
513
514
{ 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
515
{ 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
516
{ 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
517
{ 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
518
{ 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
519
{ 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
520
{ 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
521
{ 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
522
{ 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },
523
{ 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },
524
{ 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
525
526
{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
527
{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
528
{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
529
{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
530
{ 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
531
{ 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
532
{ 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
533
{ 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
534
{ 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
535
{ 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
536
{ 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
537
{ 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
538
539
/* FIXME: MORE COMPLICATED */
540
/* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
541
542
{ 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
543
{ 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
544
545
{ 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
546
{ 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
547
{ 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
548
{ 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
549
{ 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
550
{ 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
551
{ 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
552
{ 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
553
554
{ 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
555
{ 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
556
557
/* The stvpms are distinguished by the waddr field. */
558
{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
559
{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
560
{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
561
562
{ 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
563
{ 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
564
{ 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
565
};
566
567
static const struct opcode_desc mul_ops[] = {
568
{ 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
569
{ 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
570
{ 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
571
{ 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
572
{ 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
573
{ 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
574
{ 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
575
{ 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
576
{ 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
577
{ 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
578
{ 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
579
};
580
581
static const struct opcode_desc *
582
lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
583
uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
584
{
585
for (int i = 0; i < num_opcodes; i++) {
586
const struct opcode_desc *op_desc = &opcodes[i];
587
588
if (opcode < op_desc->opcode_first ||
589
opcode > op_desc->opcode_last)
590
continue;
591
592
if (!(op_desc->mux_b_mask & (1 << mux_b)))
593
continue;
594
595
if (!(op_desc->mux_a_mask & (1 << mux_a)))
596
continue;
597
598
return op_desc;
599
}
600
601
return NULL;
602
}
603
604
static bool
605
v3d_qpu_float32_unpack_unpack(uint32_t packed,
606
enum v3d_qpu_input_unpack *unpacked)
607
{
608
switch (packed) {
609
case 0:
610
*unpacked = V3D_QPU_UNPACK_ABS;
611
return true;
612
case 1:
613
*unpacked = V3D_QPU_UNPACK_NONE;
614
return true;
615
case 2:
616
*unpacked = V3D_QPU_UNPACK_L;
617
return true;
618
case 3:
619
*unpacked = V3D_QPU_UNPACK_H;
620
return true;
621
default:
622
return false;
623
}
624
}
625
626
static bool
627
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
628
uint32_t *packed)
629
{
630
switch (unpacked) {
631
case V3D_QPU_UNPACK_ABS:
632
*packed = 0;
633
return true;
634
case V3D_QPU_UNPACK_NONE:
635
*packed = 1;
636
return true;
637
case V3D_QPU_UNPACK_L:
638
*packed = 2;
639
return true;
640
case V3D_QPU_UNPACK_H:
641
*packed = 3;
642
return true;
643
default:
644
return false;
645
}
646
}
647
648
static bool
649
v3d_qpu_float16_unpack_unpack(uint32_t packed,
650
enum v3d_qpu_input_unpack *unpacked)
651
{
652
switch (packed) {
653
case 0:
654
*unpacked = V3D_QPU_UNPACK_NONE;
655
return true;
656
case 1:
657
*unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
658
return true;
659
case 2:
660
*unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
661
return true;
662
case 3:
663
*unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
664
return true;
665
case 4:
666
*unpacked = V3D_QPU_UNPACK_SWAP_16;
667
return true;
668
default:
669
return false;
670
}
671
}
672
673
static bool
674
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
675
uint32_t *packed)
676
{
677
switch (unpacked) {
678
case V3D_QPU_UNPACK_NONE:
679
*packed = 0;
680
return true;
681
case V3D_QPU_UNPACK_REPLICATE_32F_16:
682
*packed = 1;
683
return true;
684
case V3D_QPU_UNPACK_REPLICATE_L_16:
685
*packed = 2;
686
return true;
687
case V3D_QPU_UNPACK_REPLICATE_H_16:
688
*packed = 3;
689
return true;
690
case V3D_QPU_UNPACK_SWAP_16:
691
*packed = 4;
692
return true;
693
default:
694
return false;
695
}
696
}
697
698
static bool
699
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
700
uint32_t *packed)
701
{
702
switch (unpacked) {
703
case V3D_QPU_PACK_NONE:
704
*packed = 0;
705
return true;
706
case V3D_QPU_PACK_L:
707
*packed = 1;
708
return true;
709
case V3D_QPU_PACK_H:
710
*packed = 2;
711
return true;
712
default:
713
return false;
714
}
715
}
716
717
static bool
718
v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
719
struct v3d_qpu_instr *instr)
720
{
721
uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
722
uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
723
uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
724
uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
725
726
uint32_t map_op = op;
727
/* Some big clusters of opcodes are replicated with unpack
728
* flags
729
*/
730
if (map_op >= 249 && map_op <= 251)
731
map_op = (map_op - 249 + 245);
732
if (map_op >= 253 && map_op <= 255)
733
map_op = (map_op - 253 + 245);
734
735
const struct opcode_desc *desc =
736
lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
737
map_op, mux_a, mux_b);
738
if (!desc)
739
return false;
740
741
instr->alu.add.op = desc->op;
742
743
/* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
744
* operands.
745
*/
746
if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
747
if (instr->alu.add.op == V3D_QPU_A_FMIN)
748
instr->alu.add.op = V3D_QPU_A_FMAX;
749
if (instr->alu.add.op == V3D_QPU_A_FADD)
750
instr->alu.add.op = V3D_QPU_A_FADDNF;
751
}
752
753
/* Some QPU ops require a bit more than just basic opcode and mux a/b
754
* comparisons to distinguish them.
755
*/
756
switch (instr->alu.add.op) {
757
case V3D_QPU_A_STVPMV:
758
case V3D_QPU_A_STVPMD:
759
case V3D_QPU_A_STVPMP:
760
switch (waddr) {
761
case 0:
762
instr->alu.add.op = V3D_QPU_A_STVPMV;
763
break;
764
case 1:
765
instr->alu.add.op = V3D_QPU_A_STVPMD;
766
break;
767
case 2:
768
instr->alu.add.op = V3D_QPU_A_STVPMP;
769
break;
770
default:
771
return false;
772
}
773
break;
774
default:
775
break;
776
}
777
778
switch (instr->alu.add.op) {
779
case V3D_QPU_A_FADD:
780
case V3D_QPU_A_FADDNF:
781
case V3D_QPU_A_FSUB:
782
case V3D_QPU_A_FMIN:
783
case V3D_QPU_A_FMAX:
784
case V3D_QPU_A_FCMP:
785
case V3D_QPU_A_VFPACK:
786
if (instr->alu.add.op != V3D_QPU_A_VFPACK)
787
instr->alu.add.output_pack = (op >> 4) & 0x3;
788
else
789
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
790
791
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
792
&instr->alu.add.a_unpack)) {
793
return false;
794
}
795
796
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
797
&instr->alu.add.b_unpack)) {
798
return false;
799
}
800
break;
801
802
case V3D_QPU_A_FFLOOR:
803
case V3D_QPU_A_FROUND:
804
case V3D_QPU_A_FTRUNC:
805
case V3D_QPU_A_FCEIL:
806
case V3D_QPU_A_FDX:
807
case V3D_QPU_A_FDY:
808
instr->alu.add.output_pack = mux_b & 0x3;
809
810
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
811
&instr->alu.add.a_unpack)) {
812
return false;
813
}
814
break;
815
816
case V3D_QPU_A_FTOIN:
817
case V3D_QPU_A_FTOIZ:
818
case V3D_QPU_A_FTOUZ:
819
case V3D_QPU_A_FTOC:
820
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
821
822
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
823
&instr->alu.add.a_unpack)) {
824
return false;
825
}
826
break;
827
828
case V3D_QPU_A_VFMIN:
829
case V3D_QPU_A_VFMAX:
830
if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
831
&instr->alu.add.a_unpack)) {
832
return false;
833
}
834
835
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
836
instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
837
break;
838
839
default:
840
instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
841
instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
842
instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
843
break;
844
}
845
846
instr->alu.add.a = mux_a;
847
instr->alu.add.b = mux_b;
848
instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
849
850
instr->alu.add.magic_write = false;
851
if (packed_inst & V3D_QPU_MA) {
852
switch (instr->alu.add.op) {
853
case V3D_QPU_A_LDVPMV_IN:
854
instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
855
break;
856
case V3D_QPU_A_LDVPMD_IN:
857
instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
858
break;
859
case V3D_QPU_A_LDVPMG_IN:
860
instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
861
break;
862
default:
863
instr->alu.add.magic_write = true;
864
break;
865
}
866
}
867
868
return true;
869
}
870
871
static bool
872
v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
873
struct v3d_qpu_instr *instr)
874
{
875
uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
876
uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
877
uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
878
879
{
880
const struct opcode_desc *desc =
881
lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
882
op, mux_a, mux_b);
883
if (!desc)
884
return false;
885
886
instr->alu.mul.op = desc->op;
887
}
888
889
switch (instr->alu.mul.op) {
890
case V3D_QPU_M_FMUL:
891
instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
892
893
if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
894
&instr->alu.mul.a_unpack)) {
895
return false;
896
}
897
898
if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
899
&instr->alu.mul.b_unpack)) {
900
return false;
901
}
902
903
break;
904
905
case V3D_QPU_M_FMOV:
906
instr->alu.mul.output_pack = (((op & 1) << 1) +
907
((mux_b >> 2) & 1));
908
909
if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
910
&instr->alu.mul.a_unpack)) {
911
return false;
912
}
913
914
break;
915
916
case V3D_QPU_M_VFMUL:
917
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
918
919
if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
920
&instr->alu.mul.a_unpack)) {
921
return false;
922
}
923
924
instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
925
926
break;
927
928
default:
929
instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
930
instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
931
instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
932
break;
933
}
934
935
instr->alu.mul.a = mux_a;
936
instr->alu.mul.b = mux_b;
937
instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
938
instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
939
940
return true;
941
}
942
943
static bool
944
v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
945
const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
946
{
947
uint32_t waddr = instr->alu.add.waddr;
948
uint32_t mux_a = instr->alu.add.a;
949
uint32_t mux_b = instr->alu.add.b;
950
int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
951
const struct opcode_desc *desc;
952
953
int opcode;
954
for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
955
desc++) {
956
if (desc->op == instr->alu.add.op)
957
break;
958
}
959
if (desc == &add_ops[ARRAY_SIZE(add_ops)])
960
return false;
961
962
opcode = desc->opcode_first;
963
964
/* If an operation doesn't use an arg, its mux values may be used to
965
* identify the operation type.
966
*/
967
if (nsrc < 2)
968
mux_b = ffs(desc->mux_b_mask) - 1;
969
970
if (nsrc < 1)
971
mux_a = ffs(desc->mux_a_mask) - 1;
972
973
bool no_magic_write = false;
974
975
switch (instr->alu.add.op) {
976
case V3D_QPU_A_STVPMV:
977
waddr = 0;
978
no_magic_write = true;
979
break;
980
case V3D_QPU_A_STVPMD:
981
waddr = 1;
982
no_magic_write = true;
983
break;
984
case V3D_QPU_A_STVPMP:
985
waddr = 2;
986
no_magic_write = true;
987
break;
988
989
case V3D_QPU_A_LDVPMV_IN:
990
case V3D_QPU_A_LDVPMD_IN:
991
case V3D_QPU_A_LDVPMP:
992
case V3D_QPU_A_LDVPMG_IN:
993
assert(!instr->alu.add.magic_write);
994
break;
995
996
case V3D_QPU_A_LDVPMV_OUT:
997
case V3D_QPU_A_LDVPMD_OUT:
998
case V3D_QPU_A_LDVPMG_OUT:
999
assert(!instr->alu.add.magic_write);
1000
*packed_instr |= V3D_QPU_MA;
1001
break;
1002
1003
default:
1004
break;
1005
}
1006
1007
switch (instr->alu.add.op) {
1008
case V3D_QPU_A_FADD:
1009
case V3D_QPU_A_FADDNF:
1010
case V3D_QPU_A_FSUB:
1011
case V3D_QPU_A_FMIN:
1012
case V3D_QPU_A_FMAX:
1013
case V3D_QPU_A_FCMP: {
1014
uint32_t output_pack;
1015
uint32_t a_unpack;
1016
uint32_t b_unpack;
1017
1018
if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1019
&output_pack)) {
1020
return false;
1021
}
1022
opcode |= output_pack << 4;
1023
1024
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1025
&a_unpack)) {
1026
return false;
1027
}
1028
1029
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1030
&b_unpack)) {
1031
return false;
1032
}
1033
1034
/* These operations with commutative operands are
1035
* distinguished by which order their operands come in.
1036
*/
1037
bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1038
if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1039
instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1040
((instr->alu.add.op == V3D_QPU_A_FMAX ||
1041
instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1042
uint32_t temp;
1043
1044
temp = a_unpack;
1045
a_unpack = b_unpack;
1046
b_unpack = temp;
1047
1048
temp = mux_a;
1049
mux_a = mux_b;
1050
mux_b = temp;
1051
}
1052
1053
opcode |= a_unpack << 2;
1054
opcode |= b_unpack << 0;
1055
1056
break;
1057
}
1058
1059
case V3D_QPU_A_VFPACK: {
1060
uint32_t a_unpack;
1061
uint32_t b_unpack;
1062
1063
if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1064
instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1065
return false;
1066
}
1067
1068
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1069
&a_unpack)) {
1070
return false;
1071
}
1072
1073
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1074
&b_unpack)) {
1075
return false;
1076
}
1077
1078
opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1079
opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1080
1081
break;
1082
}
1083
1084
case V3D_QPU_A_FFLOOR:
1085
case V3D_QPU_A_FROUND:
1086
case V3D_QPU_A_FTRUNC:
1087
case V3D_QPU_A_FCEIL:
1088
case V3D_QPU_A_FDX:
1089
case V3D_QPU_A_FDY: {
1090
uint32_t packed;
1091
1092
if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1093
&packed)) {
1094
return false;
1095
}
1096
mux_b |= packed;
1097
1098
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1099
&packed)) {
1100
return false;
1101
}
1102
if (packed == 0)
1103
return false;
1104
opcode = (opcode & ~(1 << 2)) | packed << 2;
1105
break;
1106
}
1107
1108
case V3D_QPU_A_FTOIN:
1109
case V3D_QPU_A_FTOIZ:
1110
case V3D_QPU_A_FTOUZ:
1111
case V3D_QPU_A_FTOC:
1112
if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1113
return false;
1114
1115
uint32_t packed;
1116
if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1117
&packed)) {
1118
return false;
1119
}
1120
if (packed == 0)
1121
return false;
1122
opcode |= packed << 2;
1123
1124
break;
1125
1126
case V3D_QPU_A_VFMIN:
1127
case V3D_QPU_A_VFMAX:
1128
if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1129
instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1130
return false;
1131
}
1132
1133
if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1134
&packed)) {
1135
return false;
1136
}
1137
opcode |= packed;
1138
break;
1139
1140
default:
1141
if (instr->alu.add.op != V3D_QPU_A_NOP &&
1142
(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1143
instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1144
instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1145
return false;
1146
}
1147
break;
1148
}
1149
1150
*packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1151
*packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1152
*packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1153
*packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1154
if (instr->alu.add.magic_write && !no_magic_write)
1155
*packed_instr |= V3D_QPU_MA;
1156
1157
return true;
1158
}
1159
1160
static bool
1161
v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1162
const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1163
{
1164
uint32_t mux_a = instr->alu.mul.a;
1165
uint32_t mux_b = instr->alu.mul.b;
1166
int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1167
const struct opcode_desc *desc;
1168
1169
for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1170
desc++) {
1171
if (desc->op == instr->alu.mul.op)
1172
break;
1173
}
1174
if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1175
return false;
1176
1177
uint32_t opcode = desc->opcode_first;
1178
1179
/* Some opcodes have a single valid value for their mux a/b, so set
1180
* that here. If mux a/b determine packing, it will be set below.
1181
*/
1182
if (nsrc < 2)
1183
mux_b = ffs(desc->mux_b_mask) - 1;
1184
1185
if (nsrc < 1)
1186
mux_a = ffs(desc->mux_a_mask) - 1;
1187
1188
switch (instr->alu.mul.op) {
1189
case V3D_QPU_M_FMUL: {
1190
uint32_t packed;
1191
1192
if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1193
&packed)) {
1194
return false;
1195
}
1196
/* No need for a +1 because desc->opcode_first has a 1 in this
1197
* field.
1198
*/
1199
opcode += packed << 4;
1200
1201
if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1202
&packed)) {
1203
return false;
1204
}
1205
opcode |= packed << 2;
1206
1207
if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1208
&packed)) {
1209
return false;
1210
}
1211
opcode |= packed << 0;
1212
break;
1213
}
1214
1215
case V3D_QPU_M_FMOV: {
1216
uint32_t packed;
1217
1218
if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1219
&packed)) {
1220
return false;
1221
}
1222
opcode |= (packed >> 1) & 1;
1223
mux_b = (packed & 1) << 2;
1224
1225
if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1226
&packed)) {
1227
return false;
1228
}
1229
mux_b |= packed;
1230
break;
1231
}
1232
1233
case V3D_QPU_M_VFMUL: {
1234
uint32_t packed;
1235
1236
if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1237
return false;
1238
1239
if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1240
&packed)) {
1241
return false;
1242
}
1243
if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1244
opcode = 8;
1245
else
1246
opcode |= (packed + 4) & 7;
1247
1248
if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1249
return false;
1250
1251
break;
1252
}
1253
1254
default:
1255
break;
1256
}
1257
1258
*packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
1259
*packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
1260
1261
*packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
1262
*packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1263
if (instr->alu.mul.magic_write)
1264
*packed_instr |= V3D_QPU_MM;
1265
1266
return true;
1267
}
1268
1269
static bool
1270
v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1271
uint64_t packed_instr,
1272
struct v3d_qpu_instr *instr)
1273
{
1274
instr->type = V3D_QPU_INSTR_TYPE_ALU;
1275
1276
if (!v3d_qpu_sig_unpack(devinfo,
1277
QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
1278
&instr->sig))
1279
return false;
1280
1281
uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
1282
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1283
instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
1284
instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
1285
1286
instr->flags.ac = V3D_QPU_COND_NONE;
1287
instr->flags.mc = V3D_QPU_COND_NONE;
1288
instr->flags.apf = V3D_QPU_PF_NONE;
1289
instr->flags.mpf = V3D_QPU_PF_NONE;
1290
instr->flags.auf = V3D_QPU_UF_NONE;
1291
instr->flags.muf = V3D_QPU_UF_NONE;
1292
} else {
1293
if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1294
return false;
1295
}
1296
1297
instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
1298
instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
1299
1300
if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1301
return false;
1302
1303
if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1304
return false;
1305
1306
return true;
1307
}
1308
1309
static bool
1310
v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1311
uint64_t packed_instr,
1312
struct v3d_qpu_instr *instr)
1313
{
1314
instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1315
1316
uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
1317
if (cond == 0)
1318
instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1319
else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1320
V3D_QPU_BRANCH_COND_ALLNA)
1321
instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1322
else
1323
return false;
1324
1325
uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
1326
if (msfign == 3)
1327
return false;
1328
instr->branch.msfign = msfign;
1329
1330
instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
1331
1332
instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
1333
if (instr->branch.ub) {
1334
instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1335
V3D_QPU_BRANCH_BDU);
1336
}
1337
1338
instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1339
V3D_QPU_RADDR_A);
1340
1341
instr->branch.offset = 0;
1342
1343
instr->branch.offset +=
1344
QPU_GET_FIELD(packed_instr,
1345
V3D_QPU_BRANCH_ADDR_LOW) << 3;
1346
1347
instr->branch.offset +=
1348
QPU_GET_FIELD(packed_instr,
1349
V3D_QPU_BRANCH_ADDR_HIGH) << 24;
1350
1351
return true;
1352
}
1353
1354
bool
1355
v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1356
uint64_t packed_instr,
1357
struct v3d_qpu_instr *instr)
1358
{
1359
if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
1360
return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1361
} else {
1362
uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
1363
1364
if ((sig & 24) == 16) {
1365
return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1366
instr);
1367
} else {
1368
return false;
1369
}
1370
}
1371
}
1372
1373
static bool
1374
v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1375
const struct v3d_qpu_instr *instr,
1376
uint64_t *packed_instr)
1377
{
1378
uint32_t sig;
1379
if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1380
return false;
1381
*packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1382
1383
if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1384
*packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
1385
*packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
1386
1387
if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1388
return false;
1389
if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1390
return false;
1391
1392
uint32_t flags;
1393
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1394
if (instr->flags.ac != V3D_QPU_COND_NONE ||
1395
instr->flags.mc != V3D_QPU_COND_NONE ||
1396
instr->flags.apf != V3D_QPU_PF_NONE ||
1397
instr->flags.mpf != V3D_QPU_PF_NONE ||
1398
instr->flags.auf != V3D_QPU_UF_NONE ||
1399
instr->flags.muf != V3D_QPU_UF_NONE) {
1400
return false;
1401
}
1402
1403
flags = instr->sig_addr;
1404
if (instr->sig_magic)
1405
flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
1406
} else {
1407
if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1408
return false;
1409
}
1410
1411
*packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
1412
} else {
1413
if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1414
return false;
1415
}
1416
1417
return true;
1418
}
1419
1420
static bool
1421
v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1422
const struct v3d_qpu_instr *instr,
1423
uint64_t *packed_instr)
1424
{
1425
*packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
1426
1427
if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1428
*packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1429
V3D_QPU_BRANCH_COND_A0),
1430
V3D_QPU_BRANCH_COND);
1431
}
1432
1433
*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1434
V3D_QPU_BRANCH_MSFIGN);
1435
1436
*packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1437
V3D_QPU_BRANCH_BDI);
1438
1439
if (instr->branch.ub) {
1440
*packed_instr |= V3D_QPU_BRANCH_UB;
1441
*packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1442
V3D_QPU_BRANCH_BDU);
1443
}
1444
1445
switch (instr->branch.bdi) {
1446
case V3D_QPU_BRANCH_DEST_ABS:
1447
case V3D_QPU_BRANCH_DEST_REL:
1448
*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1449
V3D_QPU_BRANCH_MSFIGN);
1450
1451
*packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1452
~0xff000000) >> 3,
1453
V3D_QPU_BRANCH_ADDR_LOW);
1454
1455
*packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1456
V3D_QPU_BRANCH_ADDR_HIGH);
1457
break;
1458
default:
1459
break;
1460
}
1461
1462
if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1463
instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1464
*packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1465
V3D_QPU_RADDR_A);
1466
}
1467
1468
return true;
1469
}
1470
1471
bool
1472
v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1473
const struct v3d_qpu_instr *instr,
1474
uint64_t *packed_instr)
1475
{
1476
*packed_instr = 0;
1477
1478
switch (instr->type) {
1479
case V3D_QPU_INSTR_TYPE_ALU:
1480
return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1481
case V3D_QPU_INSTR_TYPE_BRANCH:
1482
return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1483
default:
1484
return false;
1485
}
1486
}
1487
1488