Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/instr-a3xx.h
4565 views
1
/*
2
* Copyright (c) 2013 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#ifndef INSTR_A3XX_H_
25
#define INSTR_A3XX_H_
26
27
#define PACKED __attribute__((__packed__))
28
29
#include <assert.h>
30
#include <stdbool.h>
31
#include <stdint.h>
32
#include <stdio.h>
33
34
/* clang-format off */
35
void ir3_assert_handler(const char *expr, const char *file, int line,
36
const char *func) __attribute__((weak)) __attribute__((__noreturn__));
37
/* clang-format on */
38
39
/* A wrapper for assert() that allows overriding handling of a failed
40
* assert. This is needed for tools like crashdec which can want to
41
* attempt to disassemble memory that might not actually be valid
42
* instructions.
43
*/
44
#define ir3_assert(expr) \
45
do { \
46
if (!(expr)) { \
47
if (ir3_assert_handler) { \
48
ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \
49
} \
50
assert(expr); \
51
} \
52
} while (0)
53
/* size of largest OPC field of all the instruction categories: */
54
#define NOPC_BITS 6
55
56
#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
57
58
/* clang-format off */
59
typedef enum {
60
/* category 0: */
61
OPC_NOP = _OPC(0, 0),
62
OPC_B = _OPC(0, 1),
63
OPC_JUMP = _OPC(0, 2),
64
OPC_CALL = _OPC(0, 3),
65
OPC_RET = _OPC(0, 4),
66
OPC_KILL = _OPC(0, 5),
67
OPC_END = _OPC(0, 6),
68
OPC_EMIT = _OPC(0, 7),
69
OPC_CUT = _OPC(0, 8),
70
OPC_CHMASK = _OPC(0, 9),
71
OPC_CHSH = _OPC(0, 10),
72
OPC_FLOW_REV = _OPC(0, 11),
73
74
OPC_BKT = _OPC(0, 16),
75
OPC_STKS = _OPC(0, 17),
76
OPC_STKR = _OPC(0, 18),
77
OPC_XSET = _OPC(0, 19),
78
OPC_XCLR = _OPC(0, 20),
79
OPC_GETONE = _OPC(0, 21),
80
OPC_DBG = _OPC(0, 22),
81
OPC_SHPS = _OPC(0, 23), /* shader prologue start */
82
OPC_SHPE = _OPC(0, 24), /* shader prologue end */
83
84
OPC_PREDT = _OPC(0, 29), /* predicated true */
85
OPC_PREDF = _OPC(0, 30), /* predicated false */
86
OPC_PREDE = _OPC(0, 31), /* predicated end */
87
88
/* Logical opcodes for different branch instruction variations: */
89
OPC_BR = _OPC(0, 40),
90
OPC_BRAO = _OPC(0, 41),
91
OPC_BRAA = _OPC(0, 42),
92
OPC_BRAC = _OPC(0, 43),
93
OPC_BANY = _OPC(0, 44),
94
OPC_BALL = _OPC(0, 45),
95
OPC_BRAX = _OPC(0, 46),
96
97
/* Logical opcode to distinguish kill and demote */
98
OPC_DEMOTE = _OPC(0, 47),
99
100
/* category 1: */
101
OPC_MOV = _OPC(1, 0),
102
OPC_MOVP = _OPC(1, 1),
103
/* swz, gat, sct */
104
OPC_MOVMSK = _OPC(1, 3),
105
106
/* Virtual opcodes for instructions differentiated via a "sub-opcode" that
107
* replaces the repeat field:
108
*/
109
OPC_SWZ = _OPC(1, 4),
110
OPC_GAT = _OPC(1, 5),
111
OPC_SCT = _OPC(1, 6),
112
113
/* Logical opcodes for different variants of mov: */
114
OPC_MOV_IMMED = _OPC(1, 40),
115
OPC_MOV_CONST = _OPC(1, 41),
116
OPC_MOV_GPR = _OPC(1, 42),
117
OPC_MOV_RELGPR = _OPC(1, 43),
118
OPC_MOV_RELCONST = _OPC(1, 44),
119
120
/* Macros that expand to an if statement + move */
121
OPC_BALLOT_MACRO = _OPC(1, 50),
122
OPC_ANY_MACRO = _OPC(1, 51),
123
OPC_ALL_MACRO = _OPC(1, 52),
124
OPC_ELECT_MACRO = _OPC(1, 53),
125
OPC_READ_COND_MACRO = _OPC(1, 54),
126
OPC_READ_FIRST_MACRO = _OPC(1, 55),
127
OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
128
129
/* category 2: */
130
OPC_ADD_F = _OPC(2, 0),
131
OPC_MIN_F = _OPC(2, 1),
132
OPC_MAX_F = _OPC(2, 2),
133
OPC_MUL_F = _OPC(2, 3),
134
OPC_SIGN_F = _OPC(2, 4),
135
OPC_CMPS_F = _OPC(2, 5),
136
OPC_ABSNEG_F = _OPC(2, 6),
137
OPC_CMPV_F = _OPC(2, 7),
138
/* 8 - invalid */
139
OPC_FLOOR_F = _OPC(2, 9),
140
OPC_CEIL_F = _OPC(2, 10),
141
OPC_RNDNE_F = _OPC(2, 11),
142
OPC_RNDAZ_F = _OPC(2, 12),
143
OPC_TRUNC_F = _OPC(2, 13),
144
/* 14-15 - invalid */
145
OPC_ADD_U = _OPC(2, 16),
146
OPC_ADD_S = _OPC(2, 17),
147
OPC_SUB_U = _OPC(2, 18),
148
OPC_SUB_S = _OPC(2, 19),
149
OPC_CMPS_U = _OPC(2, 20),
150
OPC_CMPS_S = _OPC(2, 21),
151
OPC_MIN_U = _OPC(2, 22),
152
OPC_MIN_S = _OPC(2, 23),
153
OPC_MAX_U = _OPC(2, 24),
154
OPC_MAX_S = _OPC(2, 25),
155
OPC_ABSNEG_S = _OPC(2, 26),
156
/* 27 - invalid */
157
OPC_AND_B = _OPC(2, 28),
158
OPC_OR_B = _OPC(2, 29),
159
OPC_NOT_B = _OPC(2, 30),
160
OPC_XOR_B = _OPC(2, 31),
161
/* 32 - invalid */
162
OPC_CMPV_U = _OPC(2, 33),
163
OPC_CMPV_S = _OPC(2, 34),
164
/* 35-47 - invalid */
165
OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */
166
OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
167
OPC_MULL_U = _OPC(2, 50),
168
OPC_BFREV_B = _OPC(2, 51),
169
OPC_CLZ_S = _OPC(2, 52),
170
OPC_CLZ_B = _OPC(2, 53),
171
OPC_SHL_B = _OPC(2, 54),
172
OPC_SHR_B = _OPC(2, 55),
173
OPC_ASHR_B = _OPC(2, 56),
174
OPC_BARY_F = _OPC(2, 57),
175
OPC_MGEN_B = _OPC(2, 58),
176
OPC_GETBIT_B = _OPC(2, 59),
177
OPC_SETRM = _OPC(2, 60),
178
OPC_CBITS_B = _OPC(2, 61),
179
OPC_SHB = _OPC(2, 62),
180
OPC_MSAD = _OPC(2, 63),
181
182
/* category 3: */
183
OPC_MAD_U16 = _OPC(3, 0),
184
OPC_MADSH_U16 = _OPC(3, 1),
185
OPC_MAD_S16 = _OPC(3, 2),
186
OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */
187
OPC_MAD_U24 = _OPC(3, 4),
188
OPC_MAD_S24 = _OPC(3, 5),
189
OPC_MAD_F16 = _OPC(3, 6),
190
OPC_MAD_F32 = _OPC(3, 7),
191
OPC_SEL_B16 = _OPC(3, 8),
192
OPC_SEL_B32 = _OPC(3, 9),
193
OPC_SEL_S16 = _OPC(3, 10),
194
OPC_SEL_S32 = _OPC(3, 11),
195
OPC_SEL_F16 = _OPC(3, 12),
196
OPC_SEL_F32 = _OPC(3, 13),
197
OPC_SAD_S16 = _OPC(3, 14),
198
OPC_SAD_S32 = _OPC(3, 15),
199
OPC_SHLG_B16 = _OPC(3, 16),
200
201
/* category 4: */
202
OPC_RCP = _OPC(4, 0),
203
OPC_RSQ = _OPC(4, 1),
204
OPC_LOG2 = _OPC(4, 2),
205
OPC_EXP2 = _OPC(4, 3),
206
OPC_SIN = _OPC(4, 4),
207
OPC_COS = _OPC(4, 5),
208
OPC_SQRT = _OPC(4, 6),
209
/* NOTE that these are 8+opc from their highp equivs, so it's possible
210
* that the high order bit in the opc field has been repurposed for
211
* half-precision use? But note that other ops (rcp/lsin/cos/sqrt)
212
* still use the same opc as highp
213
*/
214
OPC_HRSQ = _OPC(4, 9),
215
OPC_HLOG2 = _OPC(4, 10),
216
OPC_HEXP2 = _OPC(4, 11),
217
218
/* category 5: */
219
OPC_ISAM = _OPC(5, 0),
220
OPC_ISAML = _OPC(5, 1),
221
OPC_ISAMM = _OPC(5, 2),
222
OPC_SAM = _OPC(5, 3),
223
OPC_SAMB = _OPC(5, 4),
224
OPC_SAML = _OPC(5, 5),
225
OPC_SAMGQ = _OPC(5, 6),
226
OPC_GETLOD = _OPC(5, 7),
227
OPC_CONV = _OPC(5, 8),
228
OPC_CONVM = _OPC(5, 9),
229
OPC_GETSIZE = _OPC(5, 10),
230
OPC_GETBUF = _OPC(5, 11),
231
OPC_GETPOS = _OPC(5, 12),
232
OPC_GETINFO = _OPC(5, 13),
233
OPC_DSX = _OPC(5, 14),
234
OPC_DSY = _OPC(5, 15),
235
OPC_GATHER4R = _OPC(5, 16),
236
OPC_GATHER4G = _OPC(5, 17),
237
OPC_GATHER4B = _OPC(5, 18),
238
OPC_GATHER4A = _OPC(5, 19),
239
OPC_SAMGP0 = _OPC(5, 20),
240
OPC_SAMGP1 = _OPC(5, 21),
241
OPC_SAMGP2 = _OPC(5, 22),
242
OPC_SAMGP3 = _OPC(5, 23),
243
OPC_DSXPP_1 = _OPC(5, 24),
244
OPC_DSYPP_1 = _OPC(5, 25),
245
OPC_RGETPOS = _OPC(5, 26),
246
OPC_RGETINFO = _OPC(5, 27),
247
/* cat5 meta instructions, placed above the cat5 opc field's size */
248
OPC_DSXPP_MACRO = _OPC(5, 32),
249
OPC_DSYPP_MACRO = _OPC(5, 33),
250
251
/* category 6: */
252
OPC_LDG = _OPC(6, 0), /* load-global */
253
OPC_LDL = _OPC(6, 1),
254
OPC_LDP = _OPC(6, 2),
255
OPC_STG = _OPC(6, 3), /* store-global */
256
OPC_STL = _OPC(6, 4),
257
OPC_STP = _OPC(6, 5),
258
OPC_LDIB = _OPC(6, 6),
259
OPC_G2L = _OPC(6, 7),
260
OPC_L2G = _OPC(6, 8),
261
OPC_PREFETCH = _OPC(6, 9),
262
OPC_LDLW = _OPC(6, 10),
263
OPC_STLW = _OPC(6, 11),
264
OPC_RESFMT = _OPC(6, 14),
265
OPC_RESINFO = _OPC(6, 15),
266
OPC_ATOMIC_ADD = _OPC(6, 16),
267
OPC_ATOMIC_SUB = _OPC(6, 17),
268
OPC_ATOMIC_XCHG = _OPC(6, 18),
269
OPC_ATOMIC_INC = _OPC(6, 19),
270
OPC_ATOMIC_DEC = _OPC(6, 20),
271
OPC_ATOMIC_CMPXCHG = _OPC(6, 21),
272
OPC_ATOMIC_MIN = _OPC(6, 22),
273
OPC_ATOMIC_MAX = _OPC(6, 23),
274
OPC_ATOMIC_AND = _OPC(6, 24),
275
OPC_ATOMIC_OR = _OPC(6, 25),
276
OPC_ATOMIC_XOR = _OPC(6, 26),
277
OPC_LDGB = _OPC(6, 27),
278
OPC_STGB = _OPC(6, 28),
279
OPC_STIB = _OPC(6, 29),
280
OPC_LDC = _OPC(6, 30),
281
OPC_LDLV = _OPC(6, 31),
282
OPC_PIPR = _OPC(6, 32), /* ??? */
283
OPC_PIPC = _OPC(6, 33), /* ??? */
284
OPC_EMIT2 = _OPC(6, 34), /* ??? */
285
OPC_ENDLS = _OPC(6, 35), /* ??? */
286
OPC_GETSPID = _OPC(6, 36), /* SP ID */
287
OPC_GETWID = _OPC(6, 37), /* wavefront ID */
288
289
/* Logical opcodes for things that differ in a6xx+ */
290
OPC_STC = _OPC(6, 40),
291
OPC_RESINFO_B = _OPC(6, 41),
292
OPC_LDIB_B = _OPC(6, 42),
293
OPC_STIB_B = _OPC(6, 43),
294
295
/* Logical opcodes for different atomic instruction variations: */
296
OPC_ATOMIC_B_ADD = _OPC(6, 44),
297
OPC_ATOMIC_B_SUB = _OPC(6, 45),
298
OPC_ATOMIC_B_XCHG = _OPC(6, 46),
299
OPC_ATOMIC_B_INC = _OPC(6, 47),
300
OPC_ATOMIC_B_DEC = _OPC(6, 48),
301
OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49),
302
OPC_ATOMIC_B_MIN = _OPC(6, 50),
303
OPC_ATOMIC_B_MAX = _OPC(6, 51),
304
OPC_ATOMIC_B_AND = _OPC(6, 52),
305
OPC_ATOMIC_B_OR = _OPC(6, 53),
306
OPC_ATOMIC_B_XOR = _OPC(6, 54),
307
308
OPC_LDG_A = _OPC(6, 55),
309
OPC_STG_A = _OPC(6, 56),
310
311
/* category 7: */
312
OPC_BAR = _OPC(7, 0),
313
OPC_FENCE = _OPC(7, 1),
314
315
/* meta instructions (category -1): */
316
/* placeholder instr to mark shader inputs: */
317
OPC_META_INPUT = _OPC(-1, 0),
318
/* The "collect" and "split" instructions are used for keeping
319
* track of instructions that write to multiple dst registers
320
* (split) like texture sample instructions, or read multiple
321
* consecutive scalar registers (collect) (bary.f, texture samp)
322
*
323
* A "split" extracts a scalar component from a vecN, and a
324
* "collect" gathers multiple scalar components into a vecN
325
*/
326
OPC_META_SPLIT = _OPC(-1, 2),
327
OPC_META_COLLECT = _OPC(-1, 3),
328
329
/* placeholder for texture fetches that run before FS invocation
330
* starts:
331
*/
332
OPC_META_TEX_PREFETCH = _OPC(-1, 4),
333
334
/* Parallel copies have multiple destinations, and copy each destination
335
* to its corresponding source. This happens "in parallel," meaning that
336
* it happens as-if every source is read first and then every destination
337
* is stored. These are produced in RA when register shuffling is
338
* required, and then lowered away immediately afterwards.
339
*/
340
OPC_META_PARALLEL_COPY = _OPC(-1, 5),
341
OPC_META_PHI = _OPC(-1, 6),
342
} opc_t;
343
/* clang-format on */
344
345
#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
346
#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
347
348
const char *disasm_a3xx_instr_name(opc_t opc);
349
350
typedef enum {
351
TYPE_F16 = 0,
352
TYPE_F32 = 1,
353
TYPE_U16 = 2,
354
TYPE_U32 = 3,
355
TYPE_S16 = 4,
356
TYPE_S32 = 5,
357
TYPE_U8 = 6,
358
TYPE_S8 = 7, // XXX I assume?
359
} type_t;
360
361
static inline uint32_t
362
type_size(type_t type)
363
{
364
switch (type) {
365
case TYPE_F32:
366
case TYPE_U32:
367
case TYPE_S32:
368
return 32;
369
case TYPE_F16:
370
case TYPE_U16:
371
case TYPE_S16:
372
return 16;
373
case TYPE_U8:
374
case TYPE_S8:
375
return 8;
376
default:
377
ir3_assert(0); /* invalid type */
378
return 0;
379
}
380
}
381
382
static inline int
383
type_float(type_t type)
384
{
385
return (type == TYPE_F32) || (type == TYPE_F16);
386
}
387
388
static inline int
389
type_uint(type_t type)
390
{
391
return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
392
}
393
394
static inline int
395
type_sint(type_t type)
396
{
397
return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
398
}
399
400
typedef enum {
401
ROUND_ZERO = 0,
402
ROUND_EVEN = 1,
403
ROUND_POS_INF = 2,
404
ROUND_NEG_INF = 3,
405
} round_t;
406
407
typedef union PACKED {
408
/* normal gpr or const src register: */
409
struct PACKED {
410
uint32_t comp : 2;
411
uint32_t num : 10;
412
};
413
/* for immediate val: */
414
int32_t iim_val : 11;
415
/* to make compiler happy: */
416
uint32_t dummy32;
417
uint32_t dummy10 : 10;
418
int32_t idummy10 : 10;
419
uint32_t dummy11 : 11;
420
uint32_t dummy12 : 12;
421
uint32_t dummy13 : 13;
422
uint32_t dummy8 : 8;
423
int32_t idummy13 : 13;
424
int32_t idummy8 : 8;
425
} reg_t;
426
427
/* comp:
428
* 0 - x
429
* 1 - y
430
* 2 - z
431
* 3 - w
432
*/
433
static inline uint32_t
434
regid(int num, int comp)
435
{
436
return (num << 2) | (comp & 0x3);
437
}
438
439
#define INVALID_REG regid(63, 0)
440
#define VALIDREG(r) ((r) != INVALID_REG)
441
#define CONDREG(r, val) COND(VALIDREG(r), (val))
442
443
/* special registers: */
444
#define REG_A0 61 /* address register */
445
#define REG_P0 62 /* predicate register */
446
447
static inline int
448
reg_special(reg_t reg)
449
{
450
return (reg.num == REG_A0) || (reg.num == REG_P0);
451
}
452
453
typedef enum {
454
BRANCH_PLAIN = 0, /* br */
455
BRANCH_OR = 1, /* brao */
456
BRANCH_AND = 2, /* braa */
457
BRANCH_CONST = 3, /* brac */
458
BRANCH_ANY = 4, /* bany */
459
BRANCH_ALL = 5, /* ball */
460
BRANCH_X = 6, /* brax ??? */
461
} brtype_t;
462
463
typedef struct PACKED {
464
/* dword0: */
465
union PACKED {
466
struct PACKED {
467
int16_t immed : 16;
468
uint32_t dummy1 : 16;
469
} a3xx;
470
struct PACKED {
471
int32_t immed : 20;
472
uint32_t dummy1 : 12;
473
} a4xx;
474
struct PACKED {
475
int32_t immed : 32;
476
} a5xx;
477
};
478
479
/* dword1: */
480
uint32_t idx : 5; /* brac.N index */
481
uint32_t brtype : 3; /* branch type, see brtype_t */
482
uint32_t repeat : 3;
483
uint32_t dummy3 : 1;
484
uint32_t ss : 1;
485
uint32_t inv2 : 1;
486
uint32_t comp2 : 2;
487
uint32_t eq : 1;
488
uint32_t opc_hi : 1; /* at least one bit */
489
uint32_t dummy4 : 2;
490
uint32_t inv1 : 1;
491
uint32_t comp1 : 2; /* component for first src */
492
uint32_t opc : 4;
493
uint32_t jmp_tgt : 1;
494
uint32_t sync : 1;
495
uint32_t opc_cat : 3;
496
} instr_cat0_t;
497
498
typedef struct PACKED {
499
/* dword0: */
500
union PACKED {
501
/* for normal src register: */
502
struct PACKED {
503
uint32_t src : 11;
504
/* at least low bit of pad must be zero or it will
505
* look like a address relative src
506
*/
507
uint32_t pad : 21;
508
};
509
/* for address relative: */
510
struct PACKED {
511
int32_t off : 10;
512
uint32_t src_rel_c : 1;
513
uint32_t src_rel : 1;
514
uint32_t unknown : 20;
515
};
516
/* for immediate: */
517
int32_t iim_val;
518
uint32_t uim_val;
519
float fim_val;
520
};
521
522
/* dword1: */
523
uint32_t dst : 8;
524
uint32_t repeat : 3;
525
uint32_t src_r : 1;
526
uint32_t ss : 1;
527
uint32_t ul : 1;
528
uint32_t dst_type : 3;
529
uint32_t dst_rel : 1;
530
uint32_t src_type : 3;
531
uint32_t src_c : 1;
532
uint32_t src_im : 1;
533
uint32_t even : 1;
534
uint32_t pos_inf : 1;
535
uint32_t opc : 2;
536
uint32_t jmp_tgt : 1;
537
uint32_t sync : 1;
538
uint32_t opc_cat : 3;
539
} instr_cat1_t;
540
541
typedef struct PACKED {
542
/* dword0: */
543
union PACKED {
544
struct PACKED {
545
uint32_t src1 : 11;
546
uint32_t must_be_zero1 : 2;
547
uint32_t src1_im : 1; /* immediate */
548
uint32_t src1_neg : 1; /* negate */
549
uint32_t src1_abs : 1; /* absolute value */
550
};
551
struct PACKED {
552
uint32_t src1 : 10;
553
uint32_t src1_c : 1; /* relative-const */
554
uint32_t src1_rel : 1; /* relative address */
555
uint32_t must_be_zero : 1;
556
uint32_t dummy : 3;
557
} rel1;
558
struct PACKED {
559
uint32_t src1 : 12;
560
uint32_t src1_c : 1; /* const */
561
int32_t dummy : 3;
562
} c1;
563
};
564
565
union PACKED {
566
struct PACKED {
567
uint32_t src2 : 11;
568
uint32_t must_be_zero2 : 2;
569
uint32_t src2_im : 1; /* immediate */
570
uint32_t src2_neg : 1; /* negate */
571
uint32_t src2_abs : 1; /* absolute value */
572
};
573
struct PACKED {
574
uint32_t src2 : 10;
575
uint32_t src2_c : 1; /* relative-const */
576
uint32_t src2_rel : 1; /* relative address */
577
uint32_t must_be_zero : 1;
578
uint32_t dummy : 3;
579
} rel2;
580
struct PACKED {
581
uint32_t src2 : 12;
582
uint32_t src2_c : 1; /* const */
583
uint32_t dummy : 3;
584
} c2;
585
};
586
587
/* dword1: */
588
uint32_t dst : 8;
589
uint32_t repeat : 2;
590
uint32_t sat : 1;
591
uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
592
uint32_t ss : 1;
593
uint32_t ul : 1; /* dunno */
594
uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
595
uint32_t ei : 1;
596
uint32_t cond : 3;
597
uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
598
uint32_t full : 1; /* not half */
599
uint32_t opc : 6;
600
uint32_t jmp_tgt : 1;
601
uint32_t sync : 1;
602
uint32_t opc_cat : 3;
603
} instr_cat2_t;
604
605
typedef struct PACKED {
606
/* dword0: */
607
union PACKED {
608
struct PACKED {
609
uint32_t src1 : 11;
610
uint32_t must_be_zero1 : 2;
611
uint32_t src2_c : 1;
612
uint32_t src1_neg : 1;
613
uint32_t src2_r : 1; /* doubles as nop1 if repeat==0 */
614
};
615
struct PACKED {
616
uint32_t src1 : 10;
617
uint32_t src1_c : 1;
618
uint32_t src1_rel : 1;
619
uint32_t must_be_zero : 1;
620
uint32_t dummy : 3;
621
} rel1;
622
struct PACKED {
623
uint32_t src1 : 12;
624
uint32_t src1_c : 1;
625
uint32_t dummy : 3;
626
} c1;
627
};
628
629
union PACKED {
630
struct PACKED {
631
uint32_t src3 : 11;
632
uint32_t must_be_zero2 : 2;
633
uint32_t src3_r : 1;
634
uint32_t src2_neg : 1;
635
uint32_t src3_neg : 1;
636
};
637
struct PACKED {
638
uint32_t src3 : 10;
639
uint32_t src3_c : 1;
640
uint32_t src3_rel : 1;
641
uint32_t must_be_zero : 1;
642
uint32_t dummy : 3;
643
} rel2;
644
struct PACKED {
645
uint32_t src3 : 12;
646
uint32_t src3_c : 1;
647
uint32_t dummy : 3;
648
} c2;
649
};
650
651
/* dword1: */
652
uint32_t dst : 8;
653
uint32_t repeat : 2;
654
uint32_t sat : 1;
655
uint32_t src1_r : 1; /* doubles as nop0 if repeat==0 */
656
uint32_t ss : 1;
657
uint32_t ul : 1;
658
uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
659
uint32_t src2 : 8;
660
uint32_t opc : 4;
661
uint32_t jmp_tgt : 1;
662
uint32_t sync : 1;
663
uint32_t opc_cat : 3;
664
} instr_cat3_t;
665
666
static inline bool
667
instr_cat3_full(instr_cat3_t *cat3)
668
{
669
switch (_OPC(3, cat3->opc)) {
670
case OPC_MAD_F16:
671
case OPC_MAD_U16:
672
case OPC_MAD_S16:
673
case OPC_SEL_B16:
674
case OPC_SEL_S16:
675
case OPC_SEL_F16:
676
case OPC_SAD_S16:
677
case OPC_SAD_S32: // really??
678
return false;
679
default:
680
return true;
681
}
682
}
683
684
typedef struct PACKED {
685
/* dword0: */
686
union PACKED {
687
struct PACKED {
688
uint32_t src : 11;
689
uint32_t must_be_zero1 : 2;
690
uint32_t src_im : 1; /* immediate */
691
uint32_t src_neg : 1; /* negate */
692
uint32_t src_abs : 1; /* absolute value */
693
};
694
struct PACKED {
695
uint32_t src : 10;
696
uint32_t src_c : 1; /* relative-const */
697
uint32_t src_rel : 1; /* relative address */
698
uint32_t must_be_zero : 1;
699
uint32_t dummy : 3;
700
} rel;
701
struct PACKED {
702
uint32_t src : 12;
703
uint32_t src_c : 1; /* const */
704
uint32_t dummy : 3;
705
} c;
706
};
707
uint32_t dummy1 : 16; /* seem to be ignored */
708
709
/* dword1: */
710
uint32_t dst : 8;
711
uint32_t repeat : 2;
712
uint32_t sat : 1;
713
uint32_t src_r : 1;
714
uint32_t ss : 1;
715
uint32_t ul : 1;
716
uint32_t dst_half : 1; /* or widen/narrow.. ie. dst hrN <-> rN */
717
uint32_t dummy2 : 5; /* seem to be ignored */
718
uint32_t full : 1; /* not half */
719
uint32_t opc : 6;
720
uint32_t jmp_tgt : 1;
721
uint32_t sync : 1;
722
uint32_t opc_cat : 3;
723
} instr_cat4_t;
724
725
/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
726
* if so, how to get the (base, index) pair for both sampler and texture.
727
* There is a single base embedded in the instruction, which is always used
728
* for the texture.
729
*/
730
typedef enum {
731
/* Use traditional GL binding model, get texture and sampler index
732
* from src3 which is not presumed to be uniform. This is
733
* backwards-compatible with earlier generations, where this field was
734
* always 0 and nonuniform-indexed sampling always worked.
735
*/
736
CAT5_NONUNIFORM = 0,
737
738
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
739
* and texture index come from src3 which is presumed to be uniform.
740
*/
741
CAT5_BINDLESS_A1_UNIFORM = 1,
742
743
/* The texture and sampler share the same base, and the sampler and
744
* texture index come from src3 which is *not* presumed to be uniform.
745
*/
746
CAT5_BINDLESS_NONUNIFORM = 2,
747
748
/* The sampler base comes from the low 3 bits of a1.x, and the sampler
749
* and texture index come from src3 which is *not* presumed to be
750
* uniform.
751
*/
752
CAT5_BINDLESS_A1_NONUNIFORM = 3,
753
754
/* Use traditional GL binding model, get texture and sampler index
755
* from src3 which is presumed to be uniform.
756
*/
757
CAT5_UNIFORM = 4,
758
759
/* The texture and sampler share the same base, and the sampler and
760
* texture index come from src3 which is presumed to be uniform.
761
*/
762
CAT5_BINDLESS_UNIFORM = 5,
763
764
/* The texture and sampler share the same base, get sampler index from low
765
* 4 bits of src3 and texture index from high 4 bits.
766
*/
767
CAT5_BINDLESS_IMM = 6,
768
769
/* The sampler base comes from the low 3 bits of a1.x, and the texture
770
* index comes from the next 8 bits of a1.x. The sampler index is an
771
* immediate in src3.
772
*/
773
CAT5_BINDLESS_A1_IMM = 7,
774
} cat5_desc_mode_t;
775
776
typedef struct PACKED {
777
/* dword0: */
778
union PACKED {
779
/* normal case: */
780
struct PACKED {
781
uint32_t full : 1; /* not half */
782
uint32_t src1 : 8;
783
uint32_t src2 : 8;
784
uint32_t dummy1 : 4; /* seem to be ignored */
785
uint32_t samp : 4;
786
uint32_t tex : 7;
787
} norm;
788
/* s2en case: */
789
struct PACKED {
790
uint32_t full : 1; /* not half */
791
uint32_t src1 : 8;
792
uint32_t src2 : 8;
793
uint32_t dummy1 : 2;
794
uint32_t base_hi : 2;
795
uint32_t src3 : 8;
796
uint32_t desc_mode : 3;
797
} s2en_bindless;
798
/* same in either case: */
799
// XXX I think, confirm this
800
struct PACKED {
801
uint32_t full : 1; /* not half */
802
uint32_t src1 : 8;
803
uint32_t src2 : 8;
804
uint32_t pad : 15;
805
};
806
};
807
808
/* dword1: */
809
uint32_t dst : 8;
810
uint32_t wrmask : 4; /* write-mask */
811
uint32_t type : 3;
812
uint32_t base_lo : 1; /* used with bindless */
813
uint32_t is_3d : 1;
814
815
uint32_t is_a : 1;
816
uint32_t is_s : 1;
817
uint32_t is_s2en_bindless : 1;
818
uint32_t is_o : 1;
819
uint32_t is_p : 1;
820
821
uint32_t opc : 5;
822
uint32_t jmp_tgt : 1;
823
uint32_t sync : 1;
824
uint32_t opc_cat : 3;
825
} instr_cat5_t;
826
827
/* dword0 encoding for src_off: [src1 + off], src3: */
828
typedef struct PACKED {
829
/* dword0: */
830
uint32_t mustbe1 : 1;
831
int32_t off : 13; /* src2 */
832
uint32_t src1 : 8;
833
uint32_t src1_im : 1;
834
uint32_t src3_im : 1;
835
uint32_t src3 : 8;
836
837
/* dword1: */
838
uint32_t dword1;
839
} instr_cat6a_t;
840
841
/* dword0 encoding for !src_off: [src1], src2 */
842
typedef struct PACKED {
843
/* dword0: */
844
uint32_t mustbe0 : 1;
845
uint32_t src1 : 8;
846
uint32_t pad : 5;
847
uint32_t ignore0 : 8;
848
uint32_t src1_im : 1;
849
uint32_t src2_im : 1;
850
uint32_t src2 : 8;
851
852
/* dword1: */
853
uint32_t dword1;
854
} instr_cat6b_t;
855
856
/* dword1 encoding for dst_off: */
857
typedef struct PACKED {
858
/* dword0: */
859
uint32_t dw0_pad1 : 9;
860
int32_t off_high : 5;
861
uint32_t dw0_pad2 : 18;
862
863
uint32_t off : 8;
864
uint32_t mustbe1 : 1;
865
uint32_t dst : 8;
866
uint32_t pad1 : 15;
867
} instr_cat6c_t;
868
869
/* dword1 encoding for !dst_off: */
870
typedef struct PACKED {
871
/* dword0: */
872
uint32_t dword0;
873
874
uint32_t dst : 8;
875
uint32_t mustbe0 : 1;
876
uint32_t idx : 8;
877
uint32_t pad0 : 15;
878
} instr_cat6d_t;
879
880
/* ldgb and atomics..
881
*
882
* ldgb: pad0=0, pad3=1
883
* atomic .g: pad0=1, pad3=1
884
* .l: pad0=1, pad3=0
885
*/
886
typedef struct PACKED {
887
/* dword0: */
888
uint32_t pad0 : 1;
889
uint32_t src3 : 8;
890
uint32_t d : 2;
891
uint32_t typed : 1;
892
uint32_t type_size : 2;
893
uint32_t src1 : 8;
894
uint32_t src1_im : 1;
895
uint32_t src2_im : 1;
896
uint32_t src2 : 8;
897
898
/* dword1: */
899
uint32_t dst : 8;
900
uint32_t mustbe0 : 1;
901
uint32_t src_ssbo : 8;
902
uint32_t pad2 : 3; // type
903
uint32_t g : 1;
904
uint32_t src_ssbo_im : 1;
905
uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
906
} instr_cat6ldgb_t;
907
908
/* stgb, pad0=0, pad3=2
909
*/
910
typedef struct PACKED {
911
/* dword0: */
912
uint32_t mustbe1 : 1; // ???
913
uint32_t src1 : 8;
914
uint32_t d : 2;
915
uint32_t typed : 1;
916
uint32_t type_size : 2;
917
uint32_t pad0 : 9;
918
uint32_t src2_im : 1;
919
uint32_t src2 : 8;
920
921
/* dword1: */
922
uint32_t src3 : 8;
923
uint32_t src3_im : 1;
924
uint32_t dst_ssbo : 8;
925
uint32_t pad2 : 3; // type
926
uint32_t pad3 : 2;
927
uint32_t pad4 : 10; // opc/jmp_tgt/sync/opc_cat
928
} instr_cat6stgb_t;
929
930
typedef union PACKED {
931
instr_cat6a_t a;
932
instr_cat6b_t b;
933
instr_cat6c_t c;
934
instr_cat6d_t d;
935
instr_cat6ldgb_t ldgb;
936
instr_cat6stgb_t stgb;
937
struct PACKED {
938
/* dword0: */
939
uint32_t src_off : 1;
940
uint32_t pad1 : 31;
941
942
/* dword1: */
943
uint32_t pad2 : 8;
944
uint32_t dst_off : 1;
945
uint32_t pad3 : 8;
946
uint32_t type : 3;
947
uint32_t g : 1; /* or in some cases it means dst immed */
948
uint32_t pad4 : 1;
949
uint32_t opc : 5;
950
uint32_t jmp_tgt : 1;
951
uint32_t sync : 1;
952
uint32_t opc_cat : 3;
953
};
954
} instr_cat6_t;
955
956
/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
957
*/
958
typedef enum {
959
/* Use old GL binding model with an immediate index. */
960
CAT6_IMM = 0,
961
962
CAT6_UNIFORM = 1,
963
964
CAT6_NONUNIFORM = 2,
965
966
/* Use the bindless model, with an immediate index.
967
*/
968
CAT6_BINDLESS_IMM = 4,
969
970
/* Use the bindless model, with a uniform register index.
971
*/
972
CAT6_BINDLESS_UNIFORM = 5,
973
974
/* Use the bindless model, with a register index that isn't guaranteed
975
* to be uniform. This presumably checks if the indices are equal and
976
* splits up the load/store, because it works the way you would
977
* expect.
978
*/
979
CAT6_BINDLESS_NONUNIFORM = 6,
980
} cat6_desc_mode_t;
981
982
/**
983
* For atomic ops (which return a value):
984
*
985
* pad1=1, pad3=6, pad5=3
986
* src1 - vecN offset/coords
987
* src2.x - is actually dest register
988
* src2.y - is 'data' except for cmpxchg where src2.y is 'compare'
989
* and src2.z is 'data'
990
*
991
* For stib (which does not return a value):
992
* pad1=0, pad3=6, pad5=2
993
* src1 - vecN offset/coords
994
* src2 - value to store
995
*
996
* For ldib:
997
* pad1=1, pad3=6, pad5=2
998
* src1 - vecN offset/coords
999
*
1000
* for ldc (load from UBO using descriptor):
1001
* pad1=0, pad3=4, pad5=2
1002
*
1003
* pad2 and pad5 are only observed to be 0.
1004
*/
1005
typedef struct PACKED {
1006
/* dword0: */
1007
uint32_t pad1 : 1;
1008
uint32_t base : 3;
1009
uint32_t pad2 : 2;
1010
uint32_t desc_mode : 3;
1011
uint32_t d : 2;
1012
uint32_t typed : 1;
1013
uint32_t type_size : 2;
1014
uint32_t opc : 6;
1015
uint32_t pad3 : 4;
1016
uint32_t src1 : 8; /* coordinate/offset */
1017
1018
/* dword1: */
1019
uint32_t src2 : 8; /* or the dst for load instructions */
1020
uint32_t pad4 : 1; // mustbe0 ??
1021
uint32_t ssbo : 8; /* ssbo/image binding point */
1022
uint32_t type : 3;
1023
uint32_t pad5 : 7;
1024
uint32_t jmp_tgt : 1;
1025
uint32_t sync : 1;
1026
uint32_t opc_cat : 3;
1027
} instr_cat6_a6xx_t;
1028
1029
typedef struct PACKED {
1030
/* dword0: */
1031
uint32_t pad1 : 32;
1032
1033
/* dword1: */
1034
uint32_t pad2 : 12;
1035
uint32_t ss : 1; /* maybe in the encoding, but blob only uses (sy) */
1036
uint32_t pad3 : 6;
1037
uint32_t w : 1; /* write */
1038
uint32_t r : 1; /* read */
1039
uint32_t l : 1; /* local */
1040
uint32_t g : 1; /* global */
1041
uint32_t opc : 4; /* presumed, but only a couple known OPCs */
1042
uint32_t jmp_tgt : 1; /* (jp) */
1043
uint32_t sync : 1; /* (sy) */
1044
uint32_t opc_cat : 3;
1045
} instr_cat7_t;
1046
1047
typedef union PACKED {
1048
instr_cat0_t cat0;
1049
instr_cat1_t cat1;
1050
instr_cat2_t cat2;
1051
instr_cat3_t cat3;
1052
instr_cat4_t cat4;
1053
instr_cat5_t cat5;
1054
instr_cat6_t cat6;
1055
instr_cat6_a6xx_t cat6_a6xx;
1056
instr_cat7_t cat7;
1057
struct PACKED {
1058
/* dword0: */
1059
uint32_t pad1 : 32;
1060
1061
/* dword1: */
1062
uint32_t pad2 : 12;
1063
uint32_t ss : 1; /* cat1-cat4 (cat0??) and cat7 (?) */
1064
uint32_t ul : 1; /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
1065
uint32_t pad3 : 13;
1066
uint32_t jmp_tgt : 1;
1067
uint32_t sync : 1;
1068
uint32_t opc_cat : 3;
1069
};
1070
} instr_t;
1071
1072
static inline uint32_t
1073
instr_repeat(instr_t *instr)
1074
{
1075
switch (instr->opc_cat) {
1076
case 0:
1077
return instr->cat0.repeat;
1078
case 1:
1079
return instr->cat1.repeat;
1080
case 2:
1081
return instr->cat2.repeat;
1082
case 3:
1083
return instr->cat3.repeat;
1084
case 4:
1085
return instr->cat4.repeat;
1086
default:
1087
return 0;
1088
}
1089
}
1090
1091
static inline bool
1092
instr_sat(instr_t *instr)
1093
{
1094
switch (instr->opc_cat) {
1095
case 2:
1096
return instr->cat2.sat;
1097
case 3:
1098
return instr->cat3.sat;
1099
case 4:
1100
return instr->cat4.sat;
1101
default:
1102
return false;
1103
}
1104
}
1105
1106
static inline bool
1107
is_sat_compatible(opc_t opc)
1108
{
1109
/* On a6xx saturation doesn't work on cat4 */
1110
if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
1111
return false;
1112
1113
switch (opc) {
1114
/* On a3xx and a6xx saturation doesn't work on bary.f */
1115
case OPC_BARY_F:
1116
/* On a6xx saturation doesn't work on sel.* */
1117
case OPC_SEL_B16:
1118
case OPC_SEL_B32:
1119
case OPC_SEL_S16:
1120
case OPC_SEL_S32:
1121
case OPC_SEL_F16:
1122
case OPC_SEL_F32:
1123
return false;
1124
default:
1125
return true;
1126
}
1127
}
1128
1129
/* We can probably drop the gpu_id arg, but keeping it for now so we can
1130
* assert if we see something we think should be new encoding on an older
1131
* gpu.
1132
*/
1133
static inline bool
1134
is_cat6_legacy(instr_t *instr, unsigned gpu_id)
1135
{
1136
instr_cat6_a6xx_t *cat6 = &instr->cat6_a6xx;
1137
1138
if (gpu_id < 600)
1139
return true;
1140
1141
/* At least one of these two bits is pad in all the possible
1142
* "legacy" cat6 encodings, and a analysis of all the pre-a6xx
1143
* cmdstream traces I have indicates that the pad bit is zero
1144
* in all cases. So we can use this to detect new encoding:
1145
*/
1146
if ((cat6->pad3 & 0x4) && (cat6->pad5 & 0x2)) {
1147
ir3_assert(instr->cat6.opc == 0);
1148
return false;
1149
}
1150
1151
return true;
1152
}
1153
1154
static inline uint32_t
1155
instr_opc(instr_t *instr, unsigned gpu_id)
1156
{
1157
switch (instr->opc_cat) {
1158
case 0:
1159
return instr->cat0.opc | instr->cat0.opc_hi << 4;
1160
case 1:
1161
return instr->cat1.opc;
1162
case 2:
1163
return instr->cat2.opc;
1164
case 3:
1165
return instr->cat3.opc;
1166
case 4:
1167
return instr->cat4.opc;
1168
case 5:
1169
return instr->cat5.opc;
1170
case 6:
1171
if (!is_cat6_legacy(instr, gpu_id))
1172
return instr->cat6_a6xx.opc;
1173
return instr->cat6.opc;
1174
case 7:
1175
return instr->cat7.opc;
1176
default:
1177
return 0;
1178
}
1179
}
1180
1181
static inline bool
1182
is_mad(opc_t opc)
1183
{
1184
switch (opc) {
1185
case OPC_MAD_U16:
1186
case OPC_MAD_S16:
1187
case OPC_MAD_U24:
1188
case OPC_MAD_S24:
1189
case OPC_MAD_F16:
1190
case OPC_MAD_F32:
1191
return true;
1192
default:
1193
return false;
1194
}
1195
}
1196
1197
static inline bool
1198
is_madsh(opc_t opc)
1199
{
1200
switch (opc) {
1201
case OPC_MADSH_U16:
1202
case OPC_MADSH_M16:
1203
return true;
1204
default:
1205
return false;
1206
}
1207
}
1208
1209
static inline bool
1210
is_atomic(opc_t opc)
1211
{
1212
switch (opc) {
1213
case OPC_ATOMIC_ADD:
1214
case OPC_ATOMIC_SUB:
1215
case OPC_ATOMIC_XCHG:
1216
case OPC_ATOMIC_INC:
1217
case OPC_ATOMIC_DEC:
1218
case OPC_ATOMIC_CMPXCHG:
1219
case OPC_ATOMIC_MIN:
1220
case OPC_ATOMIC_MAX:
1221
case OPC_ATOMIC_AND:
1222
case OPC_ATOMIC_OR:
1223
case OPC_ATOMIC_XOR:
1224
return true;
1225
default:
1226
return false;
1227
}
1228
}
1229
1230
static inline bool
1231
is_ssbo(opc_t opc)
1232
{
1233
switch (opc) {
1234
case OPC_RESFMT:
1235
case OPC_RESINFO:
1236
case OPC_LDGB:
1237
case OPC_STGB:
1238
case OPC_STIB:
1239
return true;
1240
default:
1241
return false;
1242
}
1243
}
1244
1245
static inline bool
1246
is_isam(opc_t opc)
1247
{
1248
switch (opc) {
1249
case OPC_ISAM:
1250
case OPC_ISAML:
1251
case OPC_ISAMM:
1252
return true;
1253
default:
1254
return false;
1255
}
1256
}
1257
1258
static inline bool
1259
is_cat2_float(opc_t opc)
1260
{
1261
switch (opc) {
1262
case OPC_ADD_F:
1263
case OPC_MIN_F:
1264
case OPC_MAX_F:
1265
case OPC_MUL_F:
1266
case OPC_SIGN_F:
1267
case OPC_CMPS_F:
1268
case OPC_ABSNEG_F:
1269
case OPC_CMPV_F:
1270
case OPC_FLOOR_F:
1271
case OPC_CEIL_F:
1272
case OPC_RNDNE_F:
1273
case OPC_RNDAZ_F:
1274
case OPC_TRUNC_F:
1275
return true;
1276
1277
default:
1278
return false;
1279
}
1280
}
1281
1282
static inline bool
1283
is_cat3_float(opc_t opc)
1284
{
1285
switch (opc) {
1286
case OPC_MAD_F16:
1287
case OPC_MAD_F32:
1288
case OPC_SEL_F16:
1289
case OPC_SEL_F32:
1290
return true;
1291
default:
1292
return false;
1293
}
1294
}
1295
1296
#endif /* INSTR_A3XX_H_ */
1297
1298