Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/aco_opcodes.py
4550 views
1
#
2
# Copyright (c) 2018 Valve Corporation
3
#
4
# Permission is hereby granted, free of charge, to any person obtaining a
5
# copy of this software and associated documentation files (the "Software"),
6
# to deal in the Software without restriction, including without limitation
7
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
# and/or sell copies of the Software, and to permit persons to whom the
9
# Software is furnished to do so, subject to the following conditions:
10
#
11
# The above copyright notice and this permission notice (including the next
12
# paragraph) shall be included in all copies or substantial portions of the
13
# Software.
14
#
15
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
# IN THE SOFTWARE.
22
#
23
24
# Class that represents all the information we have about the opcode
25
# NOTE: this must be kept in sync with aco_op_info
26
27
import sys
28
from enum import Enum
29
30
class InstrClass(Enum):
31
Valu32 = 0
32
ValuConvert32 = 1
33
Valu64 = 2
34
ValuQuarterRate32 = 3
35
ValuFma = 4
36
ValuTranscendental32 = 5
37
ValuDouble = 6
38
ValuDoubleAdd = 7
39
ValuDoubleConvert = 8
40
ValuDoubleTranscendental = 9
41
Salu = 10
42
SMem = 11
43
Barrier = 12
44
Branch = 13
45
Sendmsg = 14
46
DS = 15
47
Export = 16
48
VMem = 17
49
Waitcnt = 18
50
Other = 19
51
52
class Format(Enum):
53
PSEUDO = 0
54
SOP1 = 1
55
SOP2 = 2
56
SOPK = 3
57
SOPP = 4
58
SOPC = 5
59
SMEM = 6
60
DS = 8
61
MTBUF = 9
62
MUBUF = 10
63
MIMG = 11
64
EXP = 12
65
FLAT = 13
66
GLOBAL = 14
67
SCRATCH = 15
68
PSEUDO_BRANCH = 16
69
PSEUDO_BARRIER = 17
70
PSEUDO_REDUCTION = 18
71
VOP3P = 19
72
VOP1 = 1 << 8
73
VOP2 = 1 << 9
74
VOPC = 1 << 10
75
VOP3 = 1 << 11
76
VINTRP = 1 << 12
77
DPP = 1 << 13
78
SDWA = 1 << 14
79
80
def get_builder_fields(self):
81
if self == Format.SOPK:
82
return [('uint16_t', 'imm', None)]
83
elif self == Format.SOPP:
84
return [('uint32_t', 'block', '-1'),
85
('uint32_t', 'imm', '0')]
86
elif self == Format.SMEM:
87
return [('memory_sync_info', 'sync', 'memory_sync_info()'),
88
('bool', 'glc', 'false'),
89
('bool', 'dlc', 'false'),
90
('bool', 'nv', 'false')]
91
elif self == Format.DS:
92
return [('int16_t', 'offset0', '0'),
93
('int8_t', 'offset1', '0'),
94
('bool', 'gds', 'false')]
95
elif self == Format.MTBUF:
96
return [('unsigned', 'dfmt', None),
97
('unsigned', 'nfmt', None),
98
('unsigned', 'offset', None),
99
('bool', 'offen', None),
100
('bool', 'idxen', 'false'),
101
('bool', 'disable_wqm', 'false'),
102
('bool', 'glc', 'false'),
103
('bool', 'dlc', 'false'),
104
('bool', 'slc', 'false'),
105
('bool', 'tfe', 'false')]
106
elif self == Format.MUBUF:
107
return [('unsigned', 'offset', None),
108
('bool', 'offen', None),
109
('bool', 'swizzled', 'false'),
110
('bool', 'idxen', 'false'),
111
('bool', 'addr64', 'false'),
112
('bool', 'disable_wqm', 'false'),
113
('bool', 'glc', 'false'),
114
('bool', 'dlc', 'false'),
115
('bool', 'slc', 'false'),
116
('bool', 'tfe', 'false'),
117
('bool', 'lds', 'false')]
118
elif self == Format.MIMG:
119
return [('unsigned', 'dmask', '0xF'),
120
('bool', 'da', 'false'),
121
('bool', 'unrm', 'true'),
122
('bool', 'disable_wqm', 'false'),
123
('bool', 'glc', 'false'),
124
('bool', 'dlc', 'false'),
125
('bool', 'slc', 'false'),
126
('bool', 'tfe', 'false'),
127
('bool', 'lwe', 'false'),
128
('bool', 'r128_a16', 'false', 'r128'),
129
('bool', 'd16', 'false')]
130
return [('unsigned', 'attribute', None),
131
('unsigned', 'component', None)]
132
elif self == Format.EXP:
133
return [('unsigned', 'enabled_mask', None),
134
('unsigned', 'dest', None),
135
('bool', 'compr', 'false', 'compressed'),
136
('bool', 'done', 'false'),
137
('bool', 'vm', 'false', 'valid_mask')]
138
elif self == Format.PSEUDO_BRANCH:
139
return [('uint32_t', 'target0', '0', 'target[0]'),
140
('uint32_t', 'target1', '0', 'target[1]')]
141
elif self == Format.PSEUDO_REDUCTION:
142
return [('ReduceOp', 'op', None, 'reduce_op'),
143
('unsigned', 'cluster_size', '0')]
144
elif self == Format.PSEUDO_BARRIER:
145
return [('memory_sync_info', 'sync', None),
146
('sync_scope', 'exec_scope', 'scope_invocation')]
147
elif self == Format.VINTRP:
148
return [('unsigned', 'attribute', None),
149
('unsigned', 'component', None)]
150
elif self == Format.DPP:
151
return [('uint16_t', 'dpp_ctrl', None),
152
('uint8_t', 'row_mask', '0xF'),
153
('uint8_t', 'bank_mask', '0xF'),
154
('bool', 'bound_ctrl', 'true')]
155
elif self == Format.VOP3P:
156
return [('uint8_t', 'opsel_lo', None),
157
('uint8_t', 'opsel_hi', None)]
158
elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:
159
return [('uint16_t', 'offset', 0),
160
('memory_sync_info', 'sync', 'memory_sync_info()'),
161
('bool', 'glc', 'false'),
162
('bool', 'slc', 'false'),
163
('bool', 'lds', 'false'),
164
('bool', 'nv', 'false')]
165
else:
166
return []
167
168
def get_builder_field_names(self):
169
return [f[1] for f in self.get_builder_fields()]
170
171
def get_builder_field_dests(self):
172
return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]
173
174
def get_builder_field_decls(self):
175
return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]
176
177
def get_builder_initialization(self, num_operands):
178
res = ''
179
if self == Format.SDWA:
180
for i in range(min(num_operands, 2)):
181
res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)
182
res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'
183
res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'
184
return res
185
186
187
class Opcode(object):
188
"""Class that represents all the information we have about the opcode
189
NOTE: this must be kept in sync with aco_op_info
190
"""
191
def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls):
192
"""Parameters:
193
194
- name is the name of the opcode (prepend nir_op_ for the enum name)
195
- all types are strings that get nir_type_ prepended to them
196
- input_types is a list of types
197
- algebraic_properties is a space-seperated string, where nir_op_is_ is
198
prepended before each entry
199
- const_expr is an expression or series of statements that computes the
200
constant value of the opcode given the constant values of its inputs.
201
"""
202
assert isinstance(name, str)
203
assert isinstance(opcode_gfx7, int)
204
assert isinstance(opcode_gfx9, int)
205
assert isinstance(opcode_gfx10, int)
206
assert isinstance(format, Format)
207
assert isinstance(input_mod, bool)
208
assert isinstance(output_mod, bool)
209
210
self.name = name
211
self.opcode_gfx7 = opcode_gfx7
212
self.opcode_gfx9 = opcode_gfx9
213
self.opcode_gfx10 = opcode_gfx10
214
self.input_mod = "1" if input_mod else "0"
215
self.output_mod = "1" if output_mod else "0"
216
self.is_atomic = "1" if is_atomic else "0"
217
self.format = format
218
self.cls = cls
219
220
parts = name.replace('_e64', '').rsplit('_', 2)
221
op_dtype = parts[-1]
222
def_dtype = parts[-2] if len(parts) > 1 else parts[-1]
223
224
def_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]}
225
op_dtype_sizes = {k:v for k, v in def_dtype_sizes.items()}
226
# inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841
227
op_dtype_sizes['b16'] = 32
228
op_dtype_sizes['i16'] = 32
229
op_dtype_sizes['u16'] = 32
230
231
# If we can't tell the definition size and the operand size, default to
232
# 32. Some opcodes can have a larger definition size, but
233
# get_subdword_definition_info() handles that.
234
self.operand_size = op_dtype_sizes.get(op_dtype, 32)
235
self.definition_size = def_dtype_sizes.get(def_dtype, self.operand_size)
236
237
# exceptions for operands:
238
if 'qsad_' in name:
239
self.operand_size = 0
240
elif 'sad_' in name:
241
self.operand_size = 32
242
elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:
243
self.operand_size = 0
244
elif self.operand_size == 24:
245
self.operand_size = 32
246
elif op_dtype == 'u8' or op_dtype == 'i8':
247
self.operand_size = 32
248
elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',
249
'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:
250
self.operand_size = 32
251
252
# exceptions for definitions:
253
if 'qsad_' in name:
254
self.definition_size = 0
255
elif 'sad_' in name:
256
self.definition_size = 32
257
elif '_pk' in name:
258
self.definition_size = 32
259
260
261
# global dictionary of opcodes
262
opcodes = {}
263
264
def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):
265
assert name not in opcodes
266
opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls)
267
268
def default_class(opcodes, cls):
269
for op in opcodes:
270
if isinstance(op[-1], InstrClass):
271
yield op
272
else:
273
yield op + (cls,)
274
275
opcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)
276
opcode("p_parallelcopy")
277
opcode("p_startpgm")
278
opcode("p_phi")
279
opcode("p_linear_phi")
280
opcode("p_as_uniform")
281
opcode("p_unit_test")
282
283
opcode("p_create_vector")
284
opcode("p_extract_vector")
285
opcode("p_split_vector")
286
287
# start/end the parts where we can use exec based instructions
288
# implicitly
289
opcode("p_logical_start")
290
opcode("p_logical_end")
291
292
# e.g. subgroupMin() in SPIR-V
293
opcode("p_reduce", format=Format.PSEUDO_REDUCTION)
294
# e.g. subgroupInclusiveMin()
295
opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)
296
# e.g. subgroupExclusiveMin()
297
opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)
298
299
opcode("p_branch", format=Format.PSEUDO_BRANCH)
300
opcode("p_cbranch", format=Format.PSEUDO_BRANCH)
301
opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)
302
opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)
303
304
opcode("p_barrier", format=Format.PSEUDO_BARRIER)
305
306
opcode("p_spill")
307
opcode("p_reload")
308
309
# start/end linear vgprs
310
opcode("p_start_linear_vgpr")
311
opcode("p_end_linear_vgpr")
312
313
opcode("p_wqm")
314
opcode("p_discard_if")
315
opcode("p_demote_to_helper")
316
opcode("p_is_helper")
317
opcode("p_exit_early_if")
318
319
# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
320
opcode("p_bpermute")
321
322
opcode("p_constaddr")
323
324
# These don't have to be pseudo-ops, but it makes optimization easier to only
325
# have to consider two instructions.
326
# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension
327
opcode("p_extract") # src1=index, src2=bits, src3=signext
328
# (src0 & ((1 << bits) - 1)) << (index * bits)
329
opcode("p_insert") # src1=index, src2=bits
330
331
332
# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)
333
SOP2 = {
334
# GFX6, GFX7, GFX8, GFX9, GFX10, name
335
(0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),
336
(0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),
337
(0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),
338
(0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),
339
(0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),
340
(0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),
341
(0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"),
342
(0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"),
343
(0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"),
344
(0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"),
345
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"),
346
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"),
347
(0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"),
348
(0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"),
349
(0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"),
350
(0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"),
351
(0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"),
352
(0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"),
353
(0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"),
354
(0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"),
355
(0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"),
356
(0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"),
357
(0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"),
358
(0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"),
359
(0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"),
360
(0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"),
361
(0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"),
362
(0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"),
363
(0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"),
364
(0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"),
365
(0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"),
366
(0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"),
367
(0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"),
368
(0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"),
369
(0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"),
370
(0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"),
371
(0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"),
372
(0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"),
373
(0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),
374
(0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),
375
(0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),
376
(0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork", InstrClass.Branch),
377
(0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),
378
( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64", InstrClass.Branch),
379
( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"),
380
( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"),
381
( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"),
382
( -1, -1, -1, 0x31, 0x31, "s_lshl4_add_u32"),
383
( -1, -1, -1, 0x32, 0x32, "s_pack_ll_b32_b16"),
384
( -1, -1, -1, 0x33, 0x33, "s_pack_lh_b32_b16"),
385
( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"),
386
( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"),
387
( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"),
388
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.
389
( -1, -1, -1, -1, -1, "p_constaddr_addlo"),
390
}
391
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu):
392
opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls)
393
394
395
# SOPK instructions: 0 input (+ imm), 1 output + optional scc
396
SOPK = {
397
# GFX6, GFX7, GFX8, GFX9, GFX10, name
398
(0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),
399
( -1, -1, -1, -1, 0x01, "s_version"), # GFX10+
400
(0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9
401
(0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"),
402
(0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"),
403
(0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"),
404
(0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"),
405
(0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"),
406
(0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"),
407
(0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"),
408
(0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"),
409
(0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"),
410
(0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"),
411
(0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"),
412
(0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),
413
(0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),
414
(0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),
415
(0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork", InstrClass.Branch),
416
(0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),
417
(0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),
418
(0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal
419
( -1, -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch),
420
( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt),
421
( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt),
422
( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt),
423
( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),
424
( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch),
425
( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch),
426
}
427
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu):
428
opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls)
429
430
431
# SOP1 instructions: 1 input, 1 output (+optional SCC)
432
SOP1 = {
433
# GFX6, GFX7, GFX8, GFX9, GFX10, name
434
(0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"),
435
(0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"),
436
(0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"),
437
(0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"),
438
(0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"),
439
(0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"),
440
(0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"),
441
(0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"),
442
(0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"),
443
(0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"),
444
(0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"),
445
(0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"),
446
(0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"),
447
(0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"),
448
(0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"),
449
(0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"),
450
(0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"),
451
(0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"),
452
(0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"),
453
(0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"),
454
(0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"),
455
(0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"),
456
(0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"),
457
(0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"),
458
(0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"),
459
(0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"),
460
(0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),
461
(0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),
462
(0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),
463
(0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch),
464
(0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch),
465
(0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch),
466
(0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),
467
(0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),
468
(0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),
469
(0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"),
470
(0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"),
471
(0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"),
472
(0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"),
473
(0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"),
474
(0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"),
475
(0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"),
476
(0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"),
477
(0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),
478
(0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),
479
(0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),
480
(0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join", InstrClass.Branch),
481
(0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),
482
(0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"),
483
( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"),
484
( -1, -1, -1, 0x33, 0x37, "s_andn1_saveexec_b64"),
485
( -1, -1, -1, 0x34, 0x38, "s_orn1_saveexec_b64"),
486
( -1, -1, -1, 0x35, 0x39, "s_andn1_wrexec_b64"),
487
( -1, -1, -1, 0x36, 0x3a, "s_andn2_wrexec_b64"),
488
( -1, -1, -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"),
489
( -1, -1, -1, -1, 0x3c, "s_and_saveexec_b32"),
490
( -1, -1, -1, -1, 0x3d, "s_or_saveexec_b32"),
491
( -1, -1, -1, -1, 0x3e, "s_xor_saveexec_b32"),
492
( -1, -1, -1, -1, 0x3f, "s_andn2_saveexec_b32"),
493
( -1, -1, -1, -1, 0x40, "s_orn2_saveexec_b32"),
494
( -1, -1, -1, -1, 0x41, "s_nand_saveexec_b32"),
495
( -1, -1, -1, -1, 0x42, "s_nor_saveexec_b32"),
496
( -1, -1, -1, -1, 0x43, "s_xnor_saveexec_b32"),
497
( -1, -1, -1, -1, 0x44, "s_andn1_saveexec_b32"),
498
( -1, -1, -1, -1, 0x45, "s_orn1_saveexec_b32"),
499
( -1, -1, -1, -1, 0x46, "s_andn1_wrexec_b32"),
500
( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"),
501
( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"),
502
# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.
503
( -1, -1, -1, -1, -1, "p_constaddr_getpc"),
504
}
505
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu):
506
opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls)
507
508
509
# SOPC instructions: 2 inputs and 0 outputs (+SCC)
510
SOPC = {
511
# GFX6, GFX7, GFX8, GFX9, GFX10, name
512
(0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),
513
(0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),
514
(0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),
515
(0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),
516
(0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),
517
(0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),
518
(0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),
519
(0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),
520
(0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),
521
(0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),
522
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),
523
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),
524
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),
525
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),
526
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),
527
(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),
528
(0x10, 0x10, 0x10, 0x10, -1, "s_setvskip"),
529
( -1, -1, 0x11, 0x11, -1, "s_set_gpr_idx_on"),
530
( -1, -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"),
531
( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),
532
}
533
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:
534
opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu)
535
536
537
# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs
538
SOPP = {
539
# GFX6, GFX7, GFX8, GFX9, GFX10, name
540
(0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),
541
(0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),
542
(0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch),
543
( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"),
544
(0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch),
545
(0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch),
546
(0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch),
547
(0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch),
548
(0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch),
549
(0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch),
550
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier),
551
( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),
552
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt),
553
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),
554
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),
555
(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),
556
(0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg),
557
(0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg),
558
(0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch),
559
(0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),
560
(0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),
561
(0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),
562
(0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),
563
( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch),
564
( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch),
565
( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),
566
( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),
567
( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),
568
( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"),
569
( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"),
570
( -1, -1, -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"),
571
( -1, -1, -1, -1, 0x1f, "s_code_end"),
572
( -1, -1, -1, -1, 0x20, "s_inst_prefetch"),
573
( -1, -1, -1, -1, 0x21, "s_clause"),
574
( -1, -1, -1, -1, 0x22, "s_wait_idle"),
575
( -1, -1, -1, -1, 0x23, "s_waitcnt_depctr"),
576
( -1, -1, -1, -1, 0x24, "s_round_mode"),
577
( -1, -1, -1, -1, 0x25, "s_denorm_mode"),
578
( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"),
579
}
580
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu):
581
opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls)
582
583
584
# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output
585
# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions
586
SMEM = {
587
# GFX6, GFX7, GFX8, GFX9, GFX10, name
588
(0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"),
589
(0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"),
590
(0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"),
591
(0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"),
592
(0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"),
593
( -1, -1, -1, 0x05, 0x05, "s_scratch_load_dword"),
594
( -1, -1, -1, 0x06, 0x06, "s_scratch_load_dwordx2"),
595
( -1, -1, -1, 0x07, 0x07, "s_scratch_load_dwordx4"),
596
(0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"),
597
(0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"),
598
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"),
599
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"),
600
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"),
601
( -1, -1, 0x10, 0x10, 0x10, "s_store_dword"),
602
( -1, -1, 0x11, 0x11, 0x11, "s_store_dwordx2"),
603
( -1, -1, 0x12, 0x12, 0x12, "s_store_dwordx4"),
604
( -1, -1, -1, 0x15, 0x15, "s_scratch_store_dword"),
605
( -1, -1, -1, 0x16, 0x16, "s_scratch_store_dwordx2"),
606
( -1, -1, -1, 0x17, 0x17, "s_scratch_store_dwordx4"),
607
( -1, -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"),
608
( -1, -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"),
609
( -1, -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"),
610
( -1, -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"),
611
(0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"),
612
( -1, -1, 0x21, 0x21, 0x21, "s_dcache_wb"),
613
( -1, 0x1d, 0x22, 0x22, -1, "s_dcache_inv_vol"),
614
( -1, -1, 0x23, 0x23, -1, "s_dcache_wb_vol"),
615
(0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"), #GFX6-GFX10
616
( -1, -1, 0x25, 0x25, 0x25, "s_memrealtime"),
617
( -1, -1, 0x26, 0x26, 0x26, "s_atc_probe"),
618
( -1, -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"),
619
( -1, -1, -1, 0x28, 0x28, "s_dcache_discard"),
620
( -1, -1, -1, 0x29, 0x29, "s_dcache_discard_x2"),
621
( -1, -1, -1, -1, 0x2a, "s_get_waveid_in_workgroup"),
622
( -1, -1, -1, 0x40, 0x40, "s_buffer_atomic_swap"),
623
( -1, -1, -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"),
624
( -1, -1, -1, 0x42, 0x42, "s_buffer_atomic_add"),
625
( -1, -1, -1, 0x43, 0x43, "s_buffer_atomic_sub"),
626
( -1, -1, -1, 0x44, 0x44, "s_buffer_atomic_smin"),
627
( -1, -1, -1, 0x45, 0x45, "s_buffer_atomic_umin"),
628
( -1, -1, -1, 0x46, 0x46, "s_buffer_atomic_smax"),
629
( -1, -1, -1, 0x47, 0x47, "s_buffer_atomic_umax"),
630
( -1, -1, -1, 0x48, 0x48, "s_buffer_atomic_and"),
631
( -1, -1, -1, 0x49, 0x49, "s_buffer_atomic_or"),
632
( -1, -1, -1, 0x4a, 0x4a, "s_buffer_atomic_xor"),
633
( -1, -1, -1, 0x4b, 0x4b, "s_buffer_atomic_inc"),
634
( -1, -1, -1, 0x4c, 0x4c, "s_buffer_atomic_dec"),
635
( -1, -1, -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"),
636
( -1, -1, -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"),
637
( -1, -1, -1, 0x62, 0x62, "s_buffer_atomic_add_x2"),
638
( -1, -1, -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"),
639
( -1, -1, -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"),
640
( -1, -1, -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"),
641
( -1, -1, -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"),
642
( -1, -1, -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"),
643
( -1, -1, -1, 0x68, 0x68, "s_buffer_atomic_and_x2"),
644
( -1, -1, -1, 0x69, 0x69, "s_buffer_atomic_or_x2"),
645
( -1, -1, -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"),
646
( -1, -1, -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"),
647
( -1, -1, -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"),
648
( -1, -1, -1, 0x80, 0x80, "s_atomic_swap"),
649
( -1, -1, -1, 0x81, 0x81, "s_atomic_cmpswap"),
650
( -1, -1, -1, 0x82, 0x82, "s_atomic_add"),
651
( -1, -1, -1, 0x83, 0x83, "s_atomic_sub"),
652
( -1, -1, -1, 0x84, 0x84, "s_atomic_smin"),
653
( -1, -1, -1, 0x85, 0x85, "s_atomic_umin"),
654
( -1, -1, -1, 0x86, 0x86, "s_atomic_smax"),
655
( -1, -1, -1, 0x87, 0x87, "s_atomic_umax"),
656
( -1, -1, -1, 0x88, 0x88, "s_atomic_and"),
657
( -1, -1, -1, 0x89, 0x89, "s_atomic_or"),
658
( -1, -1, -1, 0x8a, 0x8a, "s_atomic_xor"),
659
( -1, -1, -1, 0x8b, 0x8b, "s_atomic_inc"),
660
( -1, -1, -1, 0x8c, 0x8c, "s_atomic_dec"),
661
( -1, -1, -1, 0xa0, 0xa0, "s_atomic_swap_x2"),
662
( -1, -1, -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"),
663
( -1, -1, -1, 0xa2, 0xa2, "s_atomic_add_x2"),
664
( -1, -1, -1, 0xa3, 0xa3, "s_atomic_sub_x2"),
665
( -1, -1, -1, 0xa4, 0xa4, "s_atomic_smin_x2"),
666
( -1, -1, -1, 0xa5, 0xa5, "s_atomic_umin_x2"),
667
( -1, -1, -1, 0xa6, 0xa6, "s_atomic_smax_x2"),
668
( -1, -1, -1, 0xa7, 0xa7, "s_atomic_umax_x2"),
669
( -1, -1, -1, 0xa8, 0xa8, "s_atomic_and_x2"),
670
( -1, -1, -1, 0xa9, 0xa9, "s_atomic_or_x2"),
671
( -1, -1, -1, 0xaa, 0xaa, "s_atomic_xor_x2"),
672
( -1, -1, -1, 0xab, 0xab, "s_atomic_inc_x2"),
673
( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),
674
}
675
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:
676
opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)
677
678
679
# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)
680
# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8
681
VOP2 = {
682
# GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers
683
(0x01, 0x01, -1, -1, -1, "v_readlane_b32", False),
684
(0x02, 0x02, -1, -1, -1, "v_writelane_b32", False),
685
(0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),
686
(0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),
687
(0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),
688
(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True),
689
(0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),
690
(0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),
691
(0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),
692
(0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),
693
(0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),
694
(0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),
695
(0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True),
696
(0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True),
697
(0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),
698
(0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True),
699
(0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False),
700
(0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False),
701
(0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False),
702
(0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False),
703
(0x15, 0x15, -1, -1, -1, "v_lshr_b32", False),
704
(0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False),
705
(0x17, 0x17, -1, -1, -1, "v_ashr_i32", False),
706
(0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False),
707
(0x19, 0x19, -1, -1, -1, "v_lshl_b32", False),
708
(0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False),
709
(0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False),
710
(0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False),
711
(0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False),
712
( -1, -1, -1, -1, 0x1e, "v_xnor_b32", False),
713
(0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True),
714
(0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False),
715
(0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False),
716
(0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False),
717
(0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA
718
(0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA
719
(0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA
720
(0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA
721
(0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA
722
(0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA
723
( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True),
724
( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True),
725
( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True),
726
(0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),
727
( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),
728
( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True),
729
( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),
730
( -1, -1, 0x22, 0x22, 0x35, "v_mul_f16", True),
731
( -1, -1, 0x23, 0x23, -1, "v_mac_f16", True),
732
( -1, -1, 0x24, 0x24, -1, "v_madmk_f16", False),
733
( -1, -1, 0x25, 0x25, -1, "v_madak_f16", False),
734
( -1, -1, 0x26, 0x26, -1, "v_add_u16", False),
735
( -1, -1, 0x27, 0x27, -1, "v_sub_u16", False),
736
( -1, -1, 0x28, 0x28, -1, "v_subrev_u16", False),
737
( -1, -1, 0x29, 0x29, -1, "v_mul_lo_u16", False),
738
( -1, -1, 0x2a, 0x2a, -1, "v_lshlrev_b16", False),
739
( -1, -1, 0x2b, 0x2b, -1, "v_lshrrev_b16", False),
740
( -1, -1, 0x2c, 0x2c, -1, "v_ashrrev_i16", False),
741
( -1, -1, 0x2d, 0x2d, 0x39, "v_max_f16", True),
742
( -1, -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True),
743
( -1, -1, 0x2f, 0x2f, -1, "v_max_u16", False),
744
( -1, -1, 0x30, 0x30, -1, "v_max_i16", False),
745
( -1, -1, 0x31, 0x31, -1, "v_min_u16", False),
746
( -1, -1, 0x32, 0x32, -1, "v_min_i16", False),
747
( -1, -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False),
748
( -1, -1, -1, 0x34, 0x25, "v_add_u32", False), # use v_add_co_u32 on GFX8, called v_add_nc_u32 in RDNA
749
( -1, -1, -1, 0x35, 0x26, "v_sub_u32", False), # use v_sub_co_u32 on GFX8, called v_sub_nc_u32 in RDNA
750
( -1, -1, -1, 0x36, 0x27, "v_subrev_u32", False), # use v_subrev_co_u32 on GFX8, called v_subrev_nc_u32 in RDNA
751
( -1, -1, -1, -1, 0x36, "v_fmac_f16", False),
752
( -1, -1, -1, -1, 0x37, "v_fmamk_f16", False),
753
( -1, -1, -1, -1, 0x38, "v_fmaak_f16", False),
754
( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False),
755
}
756
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:
757
opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers)
758
759
if True:
760
# v_cndmask_b32 can use input modifiers but not output modifiers
761
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")
762
opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False)
763
764
765
# VOP1 instructions: instructions with 1 input and 1 output
766
VOP1 = {
767
# GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers
768
(0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),
769
(0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),
770
(0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),
771
(0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),
772
(0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),
773
(0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),
774
(0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),
775
(0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),
776
(0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),
777
(0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9
778
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),
779
( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),
780
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),
781
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),
782
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),
783
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),
784
(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),
785
(0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),
786
(0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),
787
(0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),
788
(0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),
789
(0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),
790
(0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),
791
(0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),
792
( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),
793
( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),
794
( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),
795
( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),
796
( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False),
797
(0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),
798
(0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),
799
(0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),
800
(0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),
801
(0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),
802
(0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),
803
(0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),
804
(0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),
805
(0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),
806
(0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
807
(0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),
808
(0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),
809
(0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),
810
(0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),
811
(0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),
812
(0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),
813
(0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
814
(0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),
815
(0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),
816
(0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),
817
(0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),
818
(0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),
819
(0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),
820
(0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),
821
(0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),
822
(0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),
823
(0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),
824
(0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),
825
(0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),
826
(0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),
827
(0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),
828
(0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),
829
(0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),
830
(0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),
831
(0x42, 0x42, 0x36, -1, 0x42, "v_movreld_b32", False, False),
832
(0x43, 0x43, 0x37, -1, 0x43, "v_movrels_b32", False, False),
833
(0x44, 0x44, 0x38, -1, 0x44, "v_movrelsd_b32", False, False),
834
( -1, -1, -1, -1, 0x48, "v_movrelsd_2_b32", False, False),
835
( -1, -1, -1, 0x37, -1, "v_screen_partition_4se_b32", False, False),
836
( -1, -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True),
837
( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),
838
( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),
839
( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),
840
( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),
841
( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),
842
( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),
843
( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),
844
( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),
845
( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),
846
( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),
847
( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),
848
( -1, -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True),
849
( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),
850
( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),
851
( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),
852
( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),
853
( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),
854
( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),
855
( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),
856
( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),
857
( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),
858
( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),
859
( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False),
860
( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False),
861
}
862
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):
863
opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod)
864
865
866
# VOPC instructions:
867
868
VOPC_CLASS = {
869
(0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"),
870
( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),
871
(0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),
872
( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),
873
(0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble),
874
(0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble),
875
}
876
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):
877
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False)
878
879
COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]
880
881
for i in range(8):
882
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")
883
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
884
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")
885
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
886
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")
887
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
888
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")
889
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
890
891
for i in range(16):
892
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")
893
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
894
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")
895
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)
896
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")
897
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
898
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")
899
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)
900
# GFX_6_7
901
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")
902
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")
903
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")
904
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")
905
906
COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]
907
908
# GFX_8_9
909
for i in [0,7]: # only 0 and 7
910
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")
911
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
912
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")
913
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
914
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")
915
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
916
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")
917
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
918
919
for i in range(1, 7): # [1..6]
920
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")
921
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
922
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")
923
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
924
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")
925
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
926
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")
927
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
928
929
for i in range(8):
930
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")
931
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
932
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")
933
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
934
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")
935
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
936
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")
937
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
938
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")
939
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
940
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")
941
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)
942
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")
943
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
944
(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")
945
opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)
946
947
948
# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output
949
VOPP = {
950
# opcode, name, input/output modifiers
951
(0x00, "v_pk_mad_i16", False),
952
(0x01, "v_pk_mul_lo_u16", False),
953
(0x02, "v_pk_add_i16", False),
954
(0x03, "v_pk_sub_i16", False),
955
(0x04, "v_pk_lshlrev_b16", False),
956
(0x05, "v_pk_lshrrev_b16", False),
957
(0x06, "v_pk_ashrrev_i16", False),
958
(0x07, "v_pk_max_i16", False),
959
(0x08, "v_pk_min_i16", False),
960
(0x09, "v_pk_mad_u16", False),
961
(0x0a, "v_pk_add_u16", False),
962
(0x0b, "v_pk_sub_u16", False),
963
(0x0c, "v_pk_max_u16", False),
964
(0x0d, "v_pk_min_u16", False),
965
(0x0e, "v_pk_fma_f16", True),
966
(0x0f, "v_pk_add_f16", True),
967
(0x10, "v_pk_mul_f16", True),
968
(0x11, "v_pk_min_f16", True),
969
(0x12, "v_pk_max_f16", True),
970
(0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA
971
(0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA
972
(0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA
973
}
974
# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here
975
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)
976
for (code, name, modifiers) in VOPP:
977
opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)
978
979
980
# VINTERP instructions:
981
VINTRP = {
982
(0x00, "v_interp_p1_f32"),
983
(0x01, "v_interp_p2_f32"),
984
(0x02, "v_interp_mov_f32"),
985
}
986
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
987
for (code, name) in VINTRP:
988
opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32)
989
990
# VOP3 instructions: 3 inputs, 1 output
991
# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out
992
VOP3 = {
993
(0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True), # GFX6-GFX10
994
(0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True),
995
(0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False),
996
(0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False),
997
(0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True),
998
(0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True),
999
(0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True),
1000
(0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True),
1001
(0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),
1002
(0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),
1003
(0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),
1004
(0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma),
1005
(0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble),
1006
(0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),
1007
(0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),
1008
(0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),
1009
(0x150, 0x150, -1, -1, 0x150, "v_mullit_f32", True, True),
1010
(0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True),
1011
(0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False),
1012
(0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False),
1013
(0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True),
1014
(0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False),
1015
(0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False),
1016
(0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True),
1017
(0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False),
1018
(0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False),
1019
(0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False),
1020
(0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False),
1021
(0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False),
1022
(0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False),
1023
(0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),
1024
(0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),
1025
(0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),
1026
(0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64),
1027
(0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64),
1028
(0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64),
1029
(0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),
1030
(0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble),
1031
(0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble),
1032
(0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble),
1033
(0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers
1034
(0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),
1035
(0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),
1036
(0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u32
1037
(0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),
1038
(0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC
1039
(0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC
1040
(0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input
1041
(0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input
1042
(0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),
1043
(0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),
1044
(0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?
1045
(0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),
1046
(0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?
1047
(0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),
1048
( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),
1049
( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64),
1050
( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64),
1051
( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True),
1052
( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False),
1053
( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False),
1054
( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),
1055
( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),
1056
( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True),
1057
(0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False),
1058
( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),
1059
( -1, -1, -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False),
1060
( -1, -1, -1, 0x1f3, 0x345, "v_xad_u32", False, False),
1061
( -1, -1, -1, 0x1f4, 0x351, "v_min3_f16", True, True),
1062
( -1, -1, -1, 0x1f5, 0x352, "v_min3_i16", False, False),
1063
( -1, -1, -1, 0x1f6, 0x353, "v_min3_u16", False, False),
1064
( -1, -1, -1, 0x1f7, 0x354, "v_max3_f16", True, True),
1065
( -1, -1, -1, 0x1f8, 0x355, "v_max3_i16", False, False),
1066
( -1, -1, -1, 0x1f9, 0x356, "v_max3_u16", False, False),
1067
( -1, -1, -1, 0x1fa, 0x357, "v_med3_f16", True, True),
1068
( -1, -1, -1, 0x1fb, 0x358, "v_med3_i16", False, False),
1069
( -1, -1, -1, 0x1fc, 0x359, "v_med3_u16", False, False),
1070
( -1, -1, -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False),
1071
( -1, -1, -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False),
1072
( -1, -1, -1, 0x1ff, 0x36d, "v_add3_u32", False, False),
1073
( -1, -1, -1, 0x200, 0x36f, "v_lshl_or_b32", False, False),
1074
( -1, -1, -1, 0x201, 0x371, "v_and_or_b32", False, False),
1075
( -1, -1, -1, 0x202, 0x372, "v_or3_b32", False, False),
1076
( -1, -1, -1, 0x203, -1, "v_mad_f16", True, True),
1077
( -1, -1, -1, 0x204, 0x340, "v_mad_u16", False, False),
1078
( -1, -1, -1, 0x205, 0x35e, "v_mad_i16", False, False),
1079
( -1, -1, -1, 0x206, 0x34b, "v_fma_f16", True, True),
1080
( -1, -1, -1, 0x207, 0x35f, "v_div_fixup_f16", True, True),
1081
( -1, -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True),
1082
( -1, -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True),
1083
( -1, -1, 0x276, 0x276, -1, "v_interp_p2_legacy_f16", True, True),
1084
( -1, -1, -1, 0x277, 0x35a, "v_interp_p2_f16", True, True),
1085
(0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True),
1086
( -1, -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False),
1087
( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False),
1088
(0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),
1089
(0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),
1090
( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False),
1091
( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64),
1092
( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64),
1093
( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64),
1094
(0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),
1095
(0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),
1096
(0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),
1097
(0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f
1098
(0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),
1099
(0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),
1100
( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),
1101
( -1, -1, -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False),
1102
( -1, -1, -1, 0x29c, 0x37f, "v_add_i32", False, False),
1103
( -1, -1, -1, 0x29d, 0x376, "v_sub_i32", False, False),
1104
( -1, -1, -1, 0x29e, 0x30d, "v_add_i16", False, False),
1105
( -1, -1, -1, 0x29f, 0x30e, "v_sub_i16", False, False),
1106
( -1, -1, -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False),
1107
( -1, -1, -1, -1, 0x178, "v_xor3_b32", False, False),
1108
( -1, -1, -1, -1, 0x377, "v_permlane16_b32", False, False),
1109
( -1, -1, -1, -1, 0x378, "v_permlanex16_b32", False, False),
1110
( -1, -1, -1, -1, 0x30f, "v_add_co_u32_e64", False, False),
1111
( -1, -1, -1, -1, 0x310, "v_sub_co_u32_e64", False, False),
1112
( -1, -1, -1, -1, 0x319, "v_subrev_co_u32_e64", False, False),
1113
( -1, -1, -1, -1, 0x303, "v_add_u16_e64", False, False),
1114
( -1, -1, -1, -1, 0x304, "v_sub_u16_e64", False, False),
1115
( -1, -1, -1, -1, 0x305, "v_mul_lo_u16_e64", False, False),
1116
( -1, -1, -1, -1, 0x309, "v_max_u16_e64", False, False),
1117
( -1, -1, -1, -1, 0x30a, "v_max_i16_e64", False, False),
1118
( -1, -1, -1, -1, 0x30b, "v_min_u16_e64", False, False),
1119
( -1, -1, -1, -1, 0x30c, "v_min_i16_e64", False, False),
1120
( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False),
1121
( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False),
1122
( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False),
1123
( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+
1124
}
1125
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):
1126
opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod)
1127
1128
1129
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
1130
DS = {
1131
(0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),
1132
(0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),
1133
(0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),
1134
(0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),
1135
(0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),
1136
(0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),
1137
(0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),
1138
(0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),
1139
(0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),
1140
(0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),
1141
(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),
1142
(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),
1143
(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),
1144
(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"),
1145
(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"),
1146
(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"),
1147
(0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"),
1148
(0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"),
1149
(0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),
1150
(0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),
1151
( -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"),
1152
( -1, -1, 0x15, 0x15, 0x15, "ds_add_f32"),
1153
( -1, -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"),
1154
(0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"),
1155
(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"),
1156
(0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),
1157
(0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),
1158
(0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),
1159
(0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),
1160
(0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),
1161
(0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),
1162
(0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),
1163
(0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),
1164
(0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),
1165
(0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),
1166
(0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),
1167
(0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),
1168
(0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),
1169
(0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"),
1170
(0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"),
1171
(0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"),
1172
(0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"),
1173
(0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"),
1174
(0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),
1175
(0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),
1176
( -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),
1177
( -1, -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"),
1178
(0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"),
1179
(0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"),
1180
(0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"),
1181
(0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"),
1182
(0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"),
1183
(0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"),
1184
(0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"),
1185
(0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data2
1186
( -1, -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"),
1187
( -1, -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"),
1188
(0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),
1189
(0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),
1190
(0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),
1191
(0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),
1192
(0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),
1193
(0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),
1194
(0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),
1195
(0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),
1196
(0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),
1197
(0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),
1198
(0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),
1199
(0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),
1200
(0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),
1201
(0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"),
1202
(0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"),
1203
(0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"),
1204
(0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"),
1205
(0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),
1206
(0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),
1207
(0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),
1208
( -1, -1, -1, 0x54, 0xa0, "ds_write_b8_d16_hi"),
1209
( -1, -1, -1, 0x55, 0xa1, "ds_write_b16_d16_hi"),
1210
( -1, -1, -1, 0x56, 0xa2, "ds_read_u8_d16"),
1211
( -1, -1, -1, 0x57, 0xa3, "ds_read_u8_d16_hi"),
1212
( -1, -1, -1, 0x58, 0xa4, "ds_read_i8_d16"),
1213
( -1, -1, -1, 0x59, 0xa5, "ds_read_i8_d16_hi"),
1214
( -1, -1, -1, 0x5a, 0xa6, "ds_read_u16_d16"),
1215
( -1, -1, -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"),
1216
(0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),
1217
(0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),
1218
(0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),
1219
(0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),
1220
(0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),
1221
(0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),
1222
(0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),
1223
(0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),
1224
(0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),
1225
(0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),
1226
(0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),
1227
(0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),
1228
(0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),
1229
(0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"),
1230
(0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"),
1231
(0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"),
1232
(0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"),
1233
(0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"),
1234
(0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),
1235
(0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),
1236
(0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"),
1237
(0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"),
1238
(0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"),
1239
( -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),
1240
(0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"),
1241
(0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"),
1242
(0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"),
1243
(0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"),
1244
(0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"),
1245
(0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"),
1246
(0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"),
1247
(0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"),
1248
(0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"),
1249
(0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"),
1250
(0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"),
1251
(0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"),
1252
(0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"),
1253
(0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"),
1254
(0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"),
1255
( -1, -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"),
1256
( -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"),
1257
(0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"),
1258
(0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"),
1259
(0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"),
1260
(0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"),
1261
(0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"),
1262
( -1, -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"),
1263
(0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"),
1264
(0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"),
1265
(0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"),
1266
(0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"),
1267
(0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"),
1268
(0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"),
1269
(0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"),
1270
(0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"),
1271
(0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"),
1272
(0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"),
1273
(0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"),
1274
(0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"),
1275
(0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"),
1276
(0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"),
1277
(0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"),
1278
(0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"),
1279
(0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"),
1280
(0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"),
1281
( -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"),
1282
( -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"),
1283
( -1, 0xfd, 0xfd, -1, -1, "ds_condxchg32_rtn_b128"),
1284
( -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"),
1285
( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),
1286
}
1287
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:
1288
opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS)
1289
1290
# MUBUF instructions:
1291
MUBUF = {
1292
(0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),
1293
(0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),
1294
(0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),
1295
(0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),
1296
(0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),
1297
(0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),
1298
(0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),
1299
(0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),
1300
( -1, -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"),
1301
( -1, -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"),
1302
( -1, -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"),
1303
( -1, -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"),
1304
( -1, -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"),
1305
( -1, -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"),
1306
( -1, -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"),
1307
( -1, -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"),
1308
(0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"),
1309
(0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"),
1310
(0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"),
1311
(0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"),
1312
(0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"),
1313
(0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"),
1314
( -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"),
1315
(0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"),
1316
(0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),
1317
( -1, -1, -1, 0x19, 0x19, "buffer_store_byte_d16_hi"),
1318
(0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"),
1319
( -1, -1, -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"),
1320
(0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"),
1321
(0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"),
1322
( -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"),
1323
(0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"),
1324
( -1, -1, -1, 0x20, 0x20, "buffer_load_ubyte_d16"),
1325
( -1, -1, -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),
1326
( -1, -1, -1, 0x22, 0x22, "buffer_load_sbyte_d16"),
1327
( -1, -1, -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"),
1328
( -1, -1, -1, 0x24, 0x24, "buffer_load_short_d16"),
1329
( -1, -1, -1, 0x25, 0x25, "buffer_load_short_d16_hi"),
1330
( -1, -1, -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"),
1331
( -1, -1, -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"),
1332
( -1, -1, 0x3d, 0x3d, -1, "buffer_store_lds_dword"),
1333
(0x71, 0x71, 0x3e, 0x3e, -1, "buffer_wbinvl1"),
1334
(0x70, 0x70, 0x3f, 0x3f, -1, "buffer_wbinvl1_vol"),
1335
(0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"),
1336
(0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"),
1337
(0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"),
1338
(0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"),
1339
(0x34, -1, -1, -1, -1, "buffer_atomic_rsub"),
1340
(0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"),
1341
(0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"),
1342
(0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"),
1343
(0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"),
1344
(0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"),
1345
(0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"),
1346
(0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"),
1347
(0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"),
1348
(0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"),
1349
(0x3e, 0x3e, -1, -1, 0x3e, "buffer_atomic_fcmpswap"),
1350
(0x3f, 0x3f, -1, -1, 0x3f, "buffer_atomic_fmin"),
1351
(0x40, 0x40, -1, -1, 0x40, "buffer_atomic_fmax"),
1352
(0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"),
1353
(0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"),
1354
(0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"),
1355
(0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"),
1356
(0x54, -1, -1, -1, -1, "buffer_atomic_rsub_x2"),
1357
(0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"),
1358
(0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"),
1359
(0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"),
1360
(0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"),
1361
(0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"),
1362
(0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"),
1363
(0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"),
1364
(0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"),
1365
(0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"),
1366
(0x5e, 0x5e, -1, -1, 0x5e, "buffer_atomic_fcmpswap_x2"),
1367
(0x5f, 0x5f, -1, -1, 0x5f, "buffer_atomic_fmin_x2"),
1368
(0x60, 0x60, -1, -1, 0x60, "buffer_atomic_fmax_x2"),
1369
( -1, -1, -1, -1, 0x71, "buffer_gl0_inv"),
1370
( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"),
1371
( -1, -1, -1, -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set
1372
}
1373
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:
1374
opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)
1375
1376
MTBUF = {
1377
(0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),
1378
(0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),
1379
(0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),
1380
(0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),
1381
(0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),
1382
(0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),
1383
(0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),
1384
(0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),
1385
( -1, -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),
1386
( -1, -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),
1387
( -1, -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),
1388
( -1, -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),
1389
( -1, -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),
1390
( -1, -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),
1391
( -1, -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),
1392
( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),
1393
}
1394
for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:
1395
opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem)
1396
1397
1398
IMAGE = {
1399
(0x00, "image_load"),
1400
(0x01, "image_load_mip"),
1401
(0x02, "image_load_pck"),
1402
(0x03, "image_load_pck_sgn"),
1403
(0x04, "image_load_mip_pck"),
1404
(0x05, "image_load_mip_pck_sgn"),
1405
(0x08, "image_store"),
1406
(0x09, "image_store_mip"),
1407
(0x0a, "image_store_pck"),
1408
(0x0b, "image_store_mip_pck"),
1409
(0x0e, "image_get_resinfo"),
1410
(0x60, "image_get_lod"),
1411
}
1412
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1413
for (code, name) in IMAGE:
1414
opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
1415
1416
opcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+
1417
1418
IMAGE_ATOMIC = {
1419
(0x0f, 0x0f, 0x10, "image_atomic_swap"),
1420
(0x10, 0x10, 0x11, "image_atomic_cmpswap"),
1421
(0x11, 0x11, 0x12, "image_atomic_add"),
1422
(0x12, 0x12, 0x13, "image_atomic_sub"),
1423
(0x13, -1, -1, "image_atomic_rsub"),
1424
(0x14, 0x14, 0x14, "image_atomic_smin"),
1425
(0x15, 0x15, 0x15, "image_atomic_umin"),
1426
(0x16, 0x16, 0x16, "image_atomic_smax"),
1427
(0x17, 0x17, 0x17, "image_atomic_umax"),
1428
(0x18, 0x18, 0x18, "image_atomic_and"),
1429
(0x19, 0x19, 0x19, "image_atomic_or"),
1430
(0x1a, 0x1a, 0x1a, "image_atomic_xor"),
1431
(0x1b, 0x1b, 0x1b, "image_atomic_inc"),
1432
(0x1c, 0x1c, 0x1c, "image_atomic_dec"),
1433
(0x1d, 0x1d, -1, "image_atomic_fcmpswap"),
1434
(0x1e, 0x1e, -1, "image_atomic_fmin"),
1435
(0x1f, 0x1f, -1, "image_atomic_fmax"),
1436
}
1437
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)
1438
# gfx7 and gfx10 opcodes are the same here
1439
for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:
1440
opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True)
1441
1442
IMAGE_SAMPLE = {
1443
(0x20, "image_sample"),
1444
(0x21, "image_sample_cl"),
1445
(0x22, "image_sample_d"),
1446
(0x23, "image_sample_d_cl"),
1447
(0x24, "image_sample_l"),
1448
(0x25, "image_sample_b"),
1449
(0x26, "image_sample_b_cl"),
1450
(0x27, "image_sample_lz"),
1451
(0x28, "image_sample_c"),
1452
(0x29, "image_sample_c_cl"),
1453
(0x2a, "image_sample_c_d"),
1454
(0x2b, "image_sample_c_d_cl"),
1455
(0x2c, "image_sample_c_l"),
1456
(0x2d, "image_sample_c_b"),
1457
(0x2e, "image_sample_c_b_cl"),
1458
(0x2f, "image_sample_c_lz"),
1459
(0x30, "image_sample_o"),
1460
(0x31, "image_sample_cl_o"),
1461
(0x32, "image_sample_d_o"),
1462
(0x33, "image_sample_d_cl_o"),
1463
(0x34, "image_sample_l_o"),
1464
(0x35, "image_sample_b_o"),
1465
(0x36, "image_sample_b_cl_o"),
1466
(0x37, "image_sample_lz_o"),
1467
(0x38, "image_sample_c_o"),
1468
(0x39, "image_sample_c_cl_o"),
1469
(0x3a, "image_sample_c_d_o"),
1470
(0x3b, "image_sample_c_d_cl_o"),
1471
(0x3c, "image_sample_c_l_o"),
1472
(0x3d, "image_sample_c_b_o"),
1473
(0x3e, "image_sample_c_b_cl_o"),
1474
(0x3f, "image_sample_c_lz_o"),
1475
(0x68, "image_sample_cd"),
1476
(0x69, "image_sample_cd_cl"),
1477
(0x6a, "image_sample_c_cd"),
1478
(0x6b, "image_sample_c_cd_cl"),
1479
(0x6c, "image_sample_cd_o"),
1480
(0x6d, "image_sample_cd_cl_o"),
1481
(0x6e, "image_sample_c_cd_o"),
1482
(0x6f, "image_sample_c_cd_cl_o"),
1483
}
1484
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1485
for (code, name) in IMAGE_SAMPLE:
1486
opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
1487
1488
IMAGE_GATHER4 = {
1489
(0x40, "image_gather4"),
1490
(0x41, "image_gather4_cl"),
1491
#(0x42, "image_gather4h"), VEGA only?
1492
(0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.
1493
(0x45, "image_gather4_b"),
1494
(0x46, "image_gather4_b_cl"),
1495
(0x47, "image_gather4_lz"),
1496
(0x48, "image_gather4_c"),
1497
(0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.
1498
#(0x4a, "image_gather4h_pck"), VEGA only?
1499
#(0x4b, "image_gather8h_pck"), VGEA only?
1500
(0x4c, "image_gather4_c_l"),
1501
(0x4d, "image_gather4_c_b"),
1502
(0x4e, "image_gather4_c_b_cl"),
1503
(0x4f, "image_gather4_c_lz"),
1504
(0x50, "image_gather4_o"),
1505
(0x51, "image_gather4_cl_o"),
1506
(0x54, "image_gather4_l_o"),
1507
(0x55, "image_gather4_b_o"),
1508
(0x56, "image_gather4_b_cl_o"),
1509
(0x57, "image_gather4_lz_o"),
1510
(0x58, "image_gather4_c_o"),
1511
(0x59, "image_gather4_c_cl_o"),
1512
(0x5c, "image_gather4_c_l_o"),
1513
(0x5d, "image_gather4_c_b_o"),
1514
(0x5e, "image_gather4_c_b_cl_o"),
1515
(0x5f, "image_gather4_c_lz_o"),
1516
}
1517
# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)
1518
for (code, name) in IMAGE_GATHER4:
1519
opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)
1520
1521
opcode("image_bvh64_intersect_ray", -1, -1, 231, Format.MIMG, InstrClass.VMem)
1522
1523
FLAT = {
1524
#GFX7, GFX8_9, GFX10
1525
(0x08, 0x10, 0x08, "flat_load_ubyte"),
1526
(0x09, 0x11, 0x09, "flat_load_sbyte"),
1527
(0x0a, 0x12, 0x0a, "flat_load_ushort"),
1528
(0x0b, 0x13, 0x0b, "flat_load_sshort"),
1529
(0x0c, 0x14, 0x0c, "flat_load_dword"),
1530
(0x0d, 0x15, 0x0d, "flat_load_dwordx2"),
1531
(0x0f, 0x16, 0x0f, "flat_load_dwordx3"),
1532
(0x0e, 0x17, 0x0e, "flat_load_dwordx4"),
1533
(0x18, 0x18, 0x18, "flat_store_byte"),
1534
( -1, 0x19, 0x19, "flat_store_byte_d16_hi"),
1535
(0x1a, 0x1a, 0x1a, "flat_store_short"),
1536
( -1, 0x1b, 0x1b, "flat_store_short_d16_hi"),
1537
(0x1c, 0x1c, 0x1c, "flat_store_dword"),
1538
(0x1d, 0x1d, 0x1d, "flat_store_dwordx2"),
1539
(0x1f, 0x1e, 0x1f, "flat_store_dwordx3"),
1540
(0x1e, 0x1f, 0x1e, "flat_store_dwordx4"),
1541
( -1, 0x20, 0x20, "flat_load_ubyte_d16"),
1542
( -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"),
1543
( -1, 0x22, 0x22, "flat_load_sbyte_d16"),
1544
( -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"),
1545
( -1, 0x24, 0x24, "flat_load_short_d16"),
1546
( -1, 0x25, 0x25, "flat_load_short_d16_hi"),
1547
(0x30, 0x40, 0x30, "flat_atomic_swap"),
1548
(0x31, 0x41, 0x31, "flat_atomic_cmpswap"),
1549
(0x32, 0x42, 0x32, "flat_atomic_add"),
1550
(0x33, 0x43, 0x33, "flat_atomic_sub"),
1551
(0x35, 0x44, 0x35, "flat_atomic_smin"),
1552
(0x36, 0x45, 0x36, "flat_atomic_umin"),
1553
(0x37, 0x46, 0x37, "flat_atomic_smax"),
1554
(0x38, 0x47, 0x38, "flat_atomic_umax"),
1555
(0x39, 0x48, 0x39, "flat_atomic_and"),
1556
(0x3a, 0x49, 0x3a, "flat_atomic_or"),
1557
(0x3b, 0x4a, 0x3b, "flat_atomic_xor"),
1558
(0x3c, 0x4b, 0x3c, "flat_atomic_inc"),
1559
(0x3d, 0x4c, 0x3d, "flat_atomic_dec"),
1560
(0x3e, -1, 0x3e, "flat_atomic_fcmpswap"),
1561
(0x3f, -1, 0x3f, "flat_atomic_fmin"),
1562
(0x40, -1, 0x40, "flat_atomic_fmax"),
1563
(0x50, 0x60, 0x50, "flat_atomic_swap_x2"),
1564
(0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"),
1565
(0x52, 0x62, 0x52, "flat_atomic_add_x2"),
1566
(0x53, 0x63, 0x53, "flat_atomic_sub_x2"),
1567
(0x55, 0x64, 0x55, "flat_atomic_smin_x2"),
1568
(0x56, 0x65, 0x56, "flat_atomic_umin_x2"),
1569
(0x57, 0x66, 0x57, "flat_atomic_smax_x2"),
1570
(0x58, 0x67, 0x58, "flat_atomic_umax_x2"),
1571
(0x59, 0x68, 0x59, "flat_atomic_and_x2"),
1572
(0x5a, 0x69, 0x5a, "flat_atomic_or_x2"),
1573
(0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"),
1574
(0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"),
1575
(0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"),
1576
(0x5e, -1, 0x5e, "flat_atomic_fcmpswap_x2"),
1577
(0x5f, -1, 0x5f, "flat_atomic_fmin_x2"),
1578
(0x60, -1, 0x60, "flat_atomic_fmax_x2"),
1579
}
1580
for (gfx7, gfx8, gfx10, name) in FLAT:
1581
opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?
1582
1583
GLOBAL = {
1584
#GFX8_9, GFX10
1585
(0x10, 0x08, "global_load_ubyte"),
1586
(0x11, 0x09, "global_load_sbyte"),
1587
(0x12, 0x0a, "global_load_ushort"),
1588
(0x13, 0x0b, "global_load_sshort"),
1589
(0x14, 0x0c, "global_load_dword"),
1590
(0x15, 0x0d, "global_load_dwordx2"),
1591
(0x16, 0x0f, "global_load_dwordx3"),
1592
(0x17, 0x0e, "global_load_dwordx4"),
1593
(0x18, 0x18, "global_store_byte"),
1594
(0x19, 0x19, "global_store_byte_d16_hi"),
1595
(0x1a, 0x1a, "global_store_short"),
1596
(0x1b, 0x1b, "global_store_short_d16_hi"),
1597
(0x1c, 0x1c, "global_store_dword"),
1598
(0x1d, 0x1d, "global_store_dwordx2"),
1599
(0x1e, 0x1f, "global_store_dwordx3"),
1600
(0x1f, 0x1e, "global_store_dwordx4"),
1601
(0x20, 0x20, "global_load_ubyte_d16"),
1602
(0x21, 0x21, "global_load_ubyte_d16_hi"),
1603
(0x22, 0x22, "global_load_sbyte_d16"),
1604
(0x23, 0x23, "global_load_sbyte_d16_hi"),
1605
(0x24, 0x24, "global_load_short_d16"),
1606
(0x25, 0x25, "global_load_short_d16_hi"),
1607
(0x40, 0x30, "global_atomic_swap"),
1608
(0x41, 0x31, "global_atomic_cmpswap"),
1609
(0x42, 0x32, "global_atomic_add"),
1610
(0x43, 0x33, "global_atomic_sub"),
1611
(0x44, 0x35, "global_atomic_smin"),
1612
(0x45, 0x36, "global_atomic_umin"),
1613
(0x46, 0x37, "global_atomic_smax"),
1614
(0x47, 0x38, "global_atomic_umax"),
1615
(0x48, 0x39, "global_atomic_and"),
1616
(0x49, 0x3a, "global_atomic_or"),
1617
(0x4a, 0x3b, "global_atomic_xor"),
1618
(0x4b, 0x3c, "global_atomic_inc"),
1619
(0x4c, 0x3d, "global_atomic_dec"),
1620
( -1, 0x3e, "global_atomic_fcmpswap"),
1621
( -1, 0x3f, "global_atomic_fmin"),
1622
( -1, 0x40, "global_atomic_fmax"),
1623
(0x60, 0x50, "global_atomic_swap_x2"),
1624
(0x61, 0x51, "global_atomic_cmpswap_x2"),
1625
(0x62, 0x52, "global_atomic_add_x2"),
1626
(0x63, 0x53, "global_atomic_sub_x2"),
1627
(0x64, 0x55, "global_atomic_smin_x2"),
1628
(0x65, 0x56, "global_atomic_umin_x2"),
1629
(0x66, 0x57, "global_atomic_smax_x2"),
1630
(0x67, 0x58, "global_atomic_umax_x2"),
1631
(0x68, 0x59, "global_atomic_and_x2"),
1632
(0x69, 0x5a, "global_atomic_or_x2"),
1633
(0x6a, 0x5b, "global_atomic_xor_x2"),
1634
(0x6b, 0x5c, "global_atomic_inc_x2"),
1635
(0x6c, 0x5d, "global_atomic_dec_x2"),
1636
( -1, 0x5e, "global_atomic_fcmpswap_x2"),
1637
( -1, 0x5f, "global_atomic_fmin_x2"),
1638
( -1, 0x60, "global_atomic_fmax_x2"),
1639
( -1, 0x16, "global_load_dword_addtid"), #GFX10.3+
1640
( -1, 0x17, "global_store_dword_addtid"), #GFX10.3+
1641
( -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set
1642
}
1643
for (gfx8, gfx10, name) in GLOBAL:
1644
opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)
1645
1646
SCRATCH = {
1647
#GFX8_9, GFX10
1648
(0x10, 0x08, "scratch_load_ubyte"),
1649
(0x11, 0x09, "scratch_load_sbyte"),
1650
(0x12, 0x0a, "scratch_load_ushort"),
1651
(0x13, 0x0b, "scratch_load_sshort"),
1652
(0x14, 0x0c, "scratch_load_dword"),
1653
(0x15, 0x0d, "scratch_load_dwordx2"),
1654
(0x16, 0x0f, "scratch_load_dwordx3"),
1655
(0x17, 0x0e, "scratch_load_dwordx4"),
1656
(0x18, 0x18, "scratch_store_byte"),
1657
(0x19, 0x19, "scratch_store_byte_d16_hi"),
1658
(0x1a, 0x1a, "scratch_store_short"),
1659
(0x1b, 0x1b, "scratch_store_short_d16_hi"),
1660
(0x1c, 0x1c, "scratch_store_dword"),
1661
(0x1d, 0x1d, "scratch_store_dwordx2"),
1662
(0x1e, 0x1f, "scratch_store_dwordx3"),
1663
(0x1f, 0x1e, "scratch_store_dwordx4"),
1664
(0x20, 0x20, "scratch_load_ubyte_d16"),
1665
(0x21, 0x21, "scratch_load_ubyte_d16_hi"),
1666
(0x22, 0x22, "scratch_load_sbyte_d16"),
1667
(0x23, 0x23, "scratch_load_sbyte_d16_hi"),
1668
(0x24, 0x24, "scratch_load_short_d16"),
1669
(0x25, 0x25, "scratch_load_short_d16_hi"),
1670
}
1671
for (gfx8, gfx10, name) in SCRATCH:
1672
opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem)
1673
1674
# check for duplicate opcode numbers
1675
for ver in ['gfx9', 'gfx10']:
1676
op_to_name = {}
1677
for op in opcodes.values():
1678
if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:
1679
continue
1680
1681
num = getattr(op, 'opcode_' + ver)
1682
if num == -1:
1683
continue
1684
1685
key = (op.format, num)
1686
1687
if key in op_to_name:
1688
# exceptions
1689
names = set([op_to_name[key], op.name])
1690
if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):
1691
continue
1692
# v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.3
1693
if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):
1694
continue
1695
1696
print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))
1697
sys.exit(1)
1698
else:
1699
op_to_name[key] = op.name
1700
1701
# These instructions write the entire 32-bit VGPR, but it's not clear in Opcode's constructor that
1702
# it should be 32, since it works accidentally.
1703
assert(opcodes['ds_read_u8'].definition_size == 32)
1704
assert(opcodes['ds_read_u16'].definition_size == 32)
1705
1706