Path: blob/21.2-virgl/src/amd/compiler/aco_opcodes.py
4550 views
#1# Copyright (c) 2018 Valve Corporation2#3# Permission is hereby granted, free of charge, to any person obtaining a4# copy of this software and associated documentation files (the "Software"),5# to deal in the Software without restriction, including without limitation6# the rights to use, copy, modify, merge, publish, distribute, sublicense,7# and/or sell copies of the Software, and to permit persons to whom the8# Software is furnished to do so, subject to the following conditions:9#10# The above copyright notice and this permission notice (including the next11# paragraph) shall be included in all copies or substantial portions of the12# Software.13#14# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20# IN THE SOFTWARE.21#2223# Class that represents all the information we have about the opcode24# NOTE: this must be kept in sync with aco_op_info2526import sys27from enum import Enum2829class InstrClass(Enum):30Valu32 = 031ValuConvert32 = 132Valu64 = 233ValuQuarterRate32 = 334ValuFma = 435ValuTranscendental32 = 536ValuDouble = 637ValuDoubleAdd = 738ValuDoubleConvert = 839ValuDoubleTranscendental = 940Salu = 1041SMem = 1142Barrier = 1243Branch = 1344Sendmsg = 1445DS = 1546Export = 1647VMem = 1748Waitcnt = 1849Other = 195051class Format(Enum):52PSEUDO = 053SOP1 = 154SOP2 = 255SOPK = 356SOPP = 457SOPC = 558SMEM = 659DS = 860MTBUF = 961MUBUF = 1062MIMG = 1163EXP = 1264FLAT = 1365GLOBAL = 1466SCRATCH = 1567PSEUDO_BRANCH = 1668PSEUDO_BARRIER = 1769PSEUDO_REDUCTION = 1870VOP3P = 1971VOP1 = 1 << 872VOP2 = 1 << 973VOPC = 1 << 1074VOP3 = 1 << 1175VINTRP = 1 << 1276DPP = 1 << 1377SDWA = 1 << 147879def get_builder_fields(self):80if self == Format.SOPK:81return [('uint16_t', 'imm', None)]82elif self == Format.SOPP:83return [('uint32_t', 'block', '-1'),84('uint32_t', 'imm', '0')]85elif self == Format.SMEM:86return [('memory_sync_info', 'sync', 'memory_sync_info()'),87('bool', 'glc', 'false'),88('bool', 'dlc', 'false'),89('bool', 'nv', 'false')]90elif self == Format.DS:91return [('int16_t', 'offset0', '0'),92('int8_t', 'offset1', '0'),93('bool', 'gds', 'false')]94elif self == Format.MTBUF:95return [('unsigned', 'dfmt', None),96('unsigned', 'nfmt', None),97('unsigned', 'offset', None),98('bool', 'offen', None),99('bool', 'idxen', 'false'),100('bool', 'disable_wqm', 'false'),101('bool', 'glc', 'false'),102('bool', 'dlc', 'false'),103('bool', 'slc', 'false'),104('bool', 'tfe', 'false')]105elif self == Format.MUBUF:106return [('unsigned', 'offset', None),107('bool', 'offen', None),108('bool', 'swizzled', 'false'),109('bool', 'idxen', 'false'),110('bool', 'addr64', 'false'),111('bool', 'disable_wqm', 'false'),112('bool', 'glc', 'false'),113('bool', 'dlc', 'false'),114('bool', 'slc', 'false'),115('bool', 'tfe', 'false'),116('bool', 'lds', 'false')]117elif self == Format.MIMG:118return [('unsigned', 'dmask', '0xF'),119('bool', 'da', 'false'),120('bool', 'unrm', 'true'),121('bool', 'disable_wqm', 'false'),122('bool', 'glc', 'false'),123('bool', 'dlc', 'false'),124('bool', 'slc', 'false'),125('bool', 'tfe', 'false'),126('bool', 'lwe', 'false'),127('bool', 'r128_a16', 'false', 'r128'),128('bool', 'd16', 'false')]129return [('unsigned', 'attribute', None),130('unsigned', 'component', None)]131elif self == Format.EXP:132return [('unsigned', 'enabled_mask', None),133('unsigned', 'dest', None),134('bool', 'compr', 'false', 'compressed'),135('bool', 'done', 'false'),136('bool', 'vm', 'false', 'valid_mask')]137elif self == Format.PSEUDO_BRANCH:138return [('uint32_t', 'target0', '0', 'target[0]'),139('uint32_t', 'target1', '0', 'target[1]')]140elif self == Format.PSEUDO_REDUCTION:141return [('ReduceOp', 'op', None, 'reduce_op'),142('unsigned', 'cluster_size', '0')]143elif self == Format.PSEUDO_BARRIER:144return [('memory_sync_info', 'sync', None),145('sync_scope', 'exec_scope', 'scope_invocation')]146elif self == Format.VINTRP:147return [('unsigned', 'attribute', None),148('unsigned', 'component', None)]149elif self == Format.DPP:150return [('uint16_t', 'dpp_ctrl', None),151('uint8_t', 'row_mask', '0xF'),152('uint8_t', 'bank_mask', '0xF'),153('bool', 'bound_ctrl', 'true')]154elif self == Format.VOP3P:155return [('uint8_t', 'opsel_lo', None),156('uint8_t', 'opsel_hi', None)]157elif self in [Format.FLAT, Format.GLOBAL, Format.SCRATCH]:158return [('uint16_t', 'offset', 0),159('memory_sync_info', 'sync', 'memory_sync_info()'),160('bool', 'glc', 'false'),161('bool', 'slc', 'false'),162('bool', 'lds', 'false'),163('bool', 'nv', 'false')]164else:165return []166167def get_builder_field_names(self):168return [f[1] for f in self.get_builder_fields()]169170def get_builder_field_dests(self):171return [(f[3] if len(f) >= 4 else f[1]) for f in self.get_builder_fields()]172173def get_builder_field_decls(self):174return [('%s %s=%s' % (f[0], f[1], f[2]) if f[2] != None else '%s %s' % (f[0], f[1])) for f in self.get_builder_fields()]175176def get_builder_initialization(self, num_operands):177res = ''178if self == Format.SDWA:179for i in range(min(num_operands, 2)):180res += 'instr->sel[{0}] = op{0}.op.bytes() == 2 ? sdwa_uword : (op{0}.op.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'.format(i)181res += 'instr->dst_sel = def0.bytes() == 2 ? sdwa_uword : (def0.bytes() == 1 ? sdwa_ubyte : sdwa_udword);\n'182res += 'if (def0.bytes() < 4) instr->dst_preserve = true;'183return res184185186class Opcode(object):187"""Class that represents all the information we have about the opcode188NOTE: this must be kept in sync with aco_op_info189"""190def __init__(self, name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls):191"""Parameters:192193- name is the name of the opcode (prepend nir_op_ for the enum name)194- all types are strings that get nir_type_ prepended to them195- input_types is a list of types196- algebraic_properties is a space-seperated string, where nir_op_is_ is197prepended before each entry198- const_expr is an expression or series of statements that computes the199constant value of the opcode given the constant values of its inputs.200"""201assert isinstance(name, str)202assert isinstance(opcode_gfx7, int)203assert isinstance(opcode_gfx9, int)204assert isinstance(opcode_gfx10, int)205assert isinstance(format, Format)206assert isinstance(input_mod, bool)207assert isinstance(output_mod, bool)208209self.name = name210self.opcode_gfx7 = opcode_gfx7211self.opcode_gfx9 = opcode_gfx9212self.opcode_gfx10 = opcode_gfx10213self.input_mod = "1" if input_mod else "0"214self.output_mod = "1" if output_mod else "0"215self.is_atomic = "1" if is_atomic else "0"216self.format = format217self.cls = cls218219parts = name.replace('_e64', '').rsplit('_', 2)220op_dtype = parts[-1]221def_dtype = parts[-2] if len(parts) > 1 else parts[-1]222223def_dtype_sizes = {'{}{}'.format(prefix, size) : size for prefix in 'biuf' for size in [64, 32, 24, 16]}224op_dtype_sizes = {k:v for k, v in def_dtype_sizes.items()}225# inline constants are 32-bit for 16-bit integer/typeless instructions: https://reviews.llvm.org/D81841226op_dtype_sizes['b16'] = 32227op_dtype_sizes['i16'] = 32228op_dtype_sizes['u16'] = 32229230# If we can't tell the definition size and the operand size, default to231# 32. Some opcodes can have a larger definition size, but232# get_subdword_definition_info() handles that.233self.operand_size = op_dtype_sizes.get(op_dtype, 32)234self.definition_size = def_dtype_sizes.get(def_dtype, self.operand_size)235236# exceptions for operands:237if 'qsad_' in name:238self.operand_size = 0239elif 'sad_' in name:240self.operand_size = 32241elif name in ['v_mad_u64_u32', 'v_mad_i64_i32']:242self.operand_size = 0243elif self.operand_size == 24:244self.operand_size = 32245elif op_dtype == 'u8' or op_dtype == 'i8':246self.operand_size = 32247elif name in ['v_cvt_f32_ubyte0', 'v_cvt_f32_ubyte1',248'v_cvt_f32_ubyte2', 'v_cvt_f32_ubyte3']:249self.operand_size = 32250251# exceptions for definitions:252if 'qsad_' in name:253self.definition_size = 0254elif 'sad_' in name:255self.definition_size = 32256elif '_pk' in name:257self.definition_size = 32258259260# global dictionary of opcodes261opcodes = {}262263def opcode(name, opcode_gfx7 = -1, opcode_gfx9 = -1, opcode_gfx10 = -1, format = Format.PSEUDO, cls = InstrClass.Other, input_mod = False, output_mod = False, is_atomic = False):264assert name not in opcodes265opcodes[name] = Opcode(name, opcode_gfx7, opcode_gfx9, opcode_gfx10, format, input_mod, output_mod, is_atomic, cls)266267def default_class(opcodes, cls):268for op in opcodes:269if isinstance(op[-1], InstrClass):270yield op271else:272yield op + (cls,)273274opcode("exp", 0, 0, 0, format = Format.EXP, cls = InstrClass.Export)275opcode("p_parallelcopy")276opcode("p_startpgm")277opcode("p_phi")278opcode("p_linear_phi")279opcode("p_as_uniform")280opcode("p_unit_test")281282opcode("p_create_vector")283opcode("p_extract_vector")284opcode("p_split_vector")285286# start/end the parts where we can use exec based instructions287# implicitly288opcode("p_logical_start")289opcode("p_logical_end")290291# e.g. subgroupMin() in SPIR-V292opcode("p_reduce", format=Format.PSEUDO_REDUCTION)293# e.g. subgroupInclusiveMin()294opcode("p_inclusive_scan", format=Format.PSEUDO_REDUCTION)295# e.g. subgroupExclusiveMin()296opcode("p_exclusive_scan", format=Format.PSEUDO_REDUCTION)297298opcode("p_branch", format=Format.PSEUDO_BRANCH)299opcode("p_cbranch", format=Format.PSEUDO_BRANCH)300opcode("p_cbranch_z", format=Format.PSEUDO_BRANCH)301opcode("p_cbranch_nz", format=Format.PSEUDO_BRANCH)302303opcode("p_barrier", format=Format.PSEUDO_BARRIER)304305opcode("p_spill")306opcode("p_reload")307308# start/end linear vgprs309opcode("p_start_linear_vgpr")310opcode("p_end_linear_vgpr")311312opcode("p_wqm")313opcode("p_discard_if")314opcode("p_demote_to_helper")315opcode("p_is_helper")316opcode("p_exit_early_if")317318# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64319opcode("p_bpermute")320321opcode("p_constaddr")322323# These don't have to be pseudo-ops, but it makes optimization easier to only324# have to consider two instructions.325# (src0 >> (index * bits)) & ((1 << bits) - 1) with optional sign extension326opcode("p_extract") # src1=index, src2=bits, src3=signext327# (src0 & ((1 << bits) - 1)) << (index * bits)328opcode("p_insert") # src1=index, src2=bits329330331# SOP2 instructions: 2 scalar inputs, 1 scalar output (+optional scc)332SOP2 = {333# GFX6, GFX7, GFX8, GFX9, GFX10, name334(0x00, 0x00, 0x00, 0x00, 0x00, "s_add_u32"),335(0x01, 0x01, 0x01, 0x01, 0x01, "s_sub_u32"),336(0x02, 0x02, 0x02, 0x02, 0x02, "s_add_i32"),337(0x03, 0x03, 0x03, 0x03, 0x03, "s_sub_i32"),338(0x04, 0x04, 0x04, 0x04, 0x04, "s_addc_u32"),339(0x05, 0x05, 0x05, 0x05, 0x05, "s_subb_u32"),340(0x06, 0x06, 0x06, 0x06, 0x06, "s_min_i32"),341(0x07, 0x07, 0x07, 0x07, 0x07, "s_min_u32"),342(0x08, 0x08, 0x08, 0x08, 0x08, "s_max_i32"),343(0x09, 0x09, 0x09, 0x09, 0x09, "s_max_u32"),344(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cselect_b32"),345(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cselect_b64"),346(0x0e, 0x0e, 0x0c, 0x0c, 0x0e, "s_and_b32"),347(0x0f, 0x0f, 0x0d, 0x0d, 0x0f, "s_and_b64"),348(0x10, 0x10, 0x0e, 0x0e, 0x10, "s_or_b32"),349(0x11, 0x11, 0x0f, 0x0f, 0x11, "s_or_b64"),350(0x12, 0x12, 0x10, 0x10, 0x12, "s_xor_b32"),351(0x13, 0x13, 0x11, 0x11, 0x13, "s_xor_b64"),352(0x14, 0x14, 0x12, 0x12, 0x14, "s_andn2_b32"),353(0x15, 0x15, 0x13, 0x13, 0x15, "s_andn2_b64"),354(0x16, 0x16, 0x14, 0x14, 0x16, "s_orn2_b32"),355(0x17, 0x17, 0x15, 0x15, 0x17, "s_orn2_b64"),356(0x18, 0x18, 0x16, 0x16, 0x18, "s_nand_b32"),357(0x19, 0x19, 0x17, 0x17, 0x19, "s_nand_b64"),358(0x1a, 0x1a, 0x18, 0x18, 0x1a, "s_nor_b32"),359(0x1b, 0x1b, 0x19, 0x19, 0x1b, "s_nor_b64"),360(0x1c, 0x1c, 0x1a, 0x1a, 0x1c, "s_xnor_b32"),361(0x1d, 0x1d, 0x1b, 0x1b, 0x1d, "s_xnor_b64"),362(0x1e, 0x1e, 0x1c, 0x1c, 0x1e, "s_lshl_b32"),363(0x1f, 0x1f, 0x1d, 0x1d, 0x1f, "s_lshl_b64"),364(0x20, 0x20, 0x1e, 0x1e, 0x20, "s_lshr_b32"),365(0x21, 0x21, 0x1f, 0x1f, 0x21, "s_lshr_b64"),366(0x22, 0x22, 0x20, 0x20, 0x22, "s_ashr_i32"),367(0x23, 0x23, 0x21, 0x21, 0x23, "s_ashr_i64"),368(0x24, 0x24, 0x22, 0x22, 0x24, "s_bfm_b32"),369(0x25, 0x25, 0x23, 0x23, 0x25, "s_bfm_b64"),370(0x26, 0x26, 0x24, 0x24, 0x26, "s_mul_i32"),371(0x27, 0x27, 0x25, 0x25, 0x27, "s_bfe_u32"),372(0x28, 0x28, 0x26, 0x26, 0x28, "s_bfe_i32"),373(0x29, 0x29, 0x27, 0x27, 0x29, "s_bfe_u64"),374(0x2a, 0x2a, 0x28, 0x28, 0x2a, "s_bfe_i64"),375(0x2b, 0x2b, 0x29, 0x29, -1, "s_cbranch_g_fork", InstrClass.Branch),376(0x2c, 0x2c, 0x2a, 0x2a, 0x2c, "s_absdiff_i32"),377( -1, -1, 0x2b, 0x2b, -1, "s_rfe_restore_b64", InstrClass.Branch),378( -1, -1, -1, 0x2e, 0x2e, "s_lshl1_add_u32"),379( -1, -1, -1, 0x2f, 0x2f, "s_lshl2_add_u32"),380( -1, -1, -1, 0x30, 0x30, "s_lshl3_add_u32"),381( -1, -1, -1, 0x31, 0x31, "s_lshl4_add_u32"),382( -1, -1, -1, 0x32, 0x32, "s_pack_ll_b32_b16"),383( -1, -1, -1, 0x33, 0x33, "s_pack_lh_b32_b16"),384( -1, -1, -1, 0x34, 0x34, "s_pack_hh_b32_b16"),385( -1, -1, -1, 0x2c, 0x35, "s_mul_hi_u32"),386( -1, -1, -1, 0x2d, 0x36, "s_mul_hi_i32"),387# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP2.388( -1, -1, -1, -1, -1, "p_constaddr_addlo"),389}390for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP2, InstrClass.Salu):391opcode(name, gfx7, gfx9, gfx10, Format.SOP2, cls)392393394# SOPK instructions: 0 input (+ imm), 1 output + optional scc395SOPK = {396# GFX6, GFX7, GFX8, GFX9, GFX10, name397(0x00, 0x00, 0x00, 0x00, 0x00, "s_movk_i32"),398( -1, -1, -1, -1, 0x01, "s_version"), # GFX10+399(0x02, 0x02, 0x01, 0x01, 0x02, "s_cmovk_i32"), # GFX8_GFX9400(0x03, 0x03, 0x02, 0x02, 0x03, "s_cmpk_eq_i32"),401(0x04, 0x04, 0x03, 0x03, 0x04, "s_cmpk_lg_i32"),402(0x05, 0x05, 0x04, 0x04, 0x05, "s_cmpk_gt_i32"),403(0x06, 0x06, 0x05, 0x05, 0x06, "s_cmpk_ge_i32"),404(0x07, 0x07, 0x06, 0x06, 0x07, "s_cmpk_lt_i32"),405(0x08, 0x08, 0x07, 0x07, 0x08, "s_cmpk_le_i32"),406(0x09, 0x09, 0x08, 0x08, 0x09, "s_cmpk_eq_u32"),407(0x0a, 0x0a, 0x09, 0x09, 0x0a, "s_cmpk_lg_u32"),408(0x0b, 0x0b, 0x0a, 0x0a, 0x0b, "s_cmpk_gt_u32"),409(0x0c, 0x0c, 0x0b, 0x0b, 0x0c, "s_cmpk_ge_u32"),410(0x0d, 0x0d, 0x0c, 0x0c, 0x0d, "s_cmpk_lt_u32"),411(0x0e, 0x0e, 0x0d, 0x0d, 0x0e, "s_cmpk_le_u32"),412(0x0f, 0x0f, 0x0e, 0x0e, 0x0f, "s_addk_i32"),413(0x10, 0x10, 0x0f, 0x0f, 0x10, "s_mulk_i32"),414(0x11, 0x11, 0x10, 0x10, -1, "s_cbranch_i_fork", InstrClass.Branch),415(0x12, 0x12, 0x11, 0x11, 0x12, "s_getreg_b32"),416(0x13, 0x13, 0x12, 0x12, 0x13, "s_setreg_b32"),417(0x15, 0x15, 0x14, 0x14, 0x15, "s_setreg_imm32_b32"), # requires 32bit literal418( -1, -1, 0x15, 0x15, 0x16, "s_call_b64", InstrClass.Branch),419( -1, -1, -1, -1, 0x17, "s_waitcnt_vscnt", InstrClass.Waitcnt),420( -1, -1, -1, -1, 0x18, "s_waitcnt_vmcnt", InstrClass.Waitcnt),421( -1, -1, -1, -1, 0x19, "s_waitcnt_expcnt", InstrClass.Waitcnt),422( -1, -1, -1, -1, 0x1a, "s_waitcnt_lgkmcnt", InstrClass.Waitcnt),423( -1, -1, -1, -1, 0x1b, "s_subvector_loop_begin", InstrClass.Branch),424( -1, -1, -1, -1, 0x1c, "s_subvector_loop_end", InstrClass.Branch),425}426for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPK, InstrClass.Salu):427opcode(name, gfx7, gfx9, gfx10, Format.SOPK, cls)428429430# SOP1 instructions: 1 input, 1 output (+optional SCC)431SOP1 = {432# GFX6, GFX7, GFX8, GFX9, GFX10, name433(0x03, 0x03, 0x00, 0x00, 0x03, "s_mov_b32"),434(0x04, 0x04, 0x01, 0x01, 0x04, "s_mov_b64"),435(0x05, 0x05, 0x02, 0x02, 0x05, "s_cmov_b32"),436(0x06, 0x06, 0x03, 0x03, 0x06, "s_cmov_b64"),437(0x07, 0x07, 0x04, 0x04, 0x07, "s_not_b32"),438(0x08, 0x08, 0x05, 0x05, 0x08, "s_not_b64"),439(0x09, 0x09, 0x06, 0x06, 0x09, "s_wqm_b32"),440(0x0a, 0x0a, 0x07, 0x07, 0x0a, "s_wqm_b64"),441(0x0b, 0x0b, 0x08, 0x08, 0x0b, "s_brev_b32"),442(0x0c, 0x0c, 0x09, 0x09, 0x0c, "s_brev_b64"),443(0x0d, 0x0d, 0x0a, 0x0a, 0x0d, "s_bcnt0_i32_b32"),444(0x0e, 0x0e, 0x0b, 0x0b, 0x0e, "s_bcnt0_i32_b64"),445(0x0f, 0x0f, 0x0c, 0x0c, 0x0f, "s_bcnt1_i32_b32"),446(0x10, 0x10, 0x0d, 0x0d, 0x10, "s_bcnt1_i32_b64"),447(0x11, 0x11, 0x0e, 0x0e, 0x11, "s_ff0_i32_b32"),448(0x12, 0x12, 0x0f, 0x0f, 0x12, "s_ff0_i32_b64"),449(0x13, 0x13, 0x10, 0x10, 0x13, "s_ff1_i32_b32"),450(0x14, 0x14, 0x11, 0x11, 0x14, "s_ff1_i32_b64"),451(0x15, 0x15, 0x12, 0x12, 0x15, "s_flbit_i32_b32"),452(0x16, 0x16, 0x13, 0x13, 0x16, "s_flbit_i32_b64"),453(0x17, 0x17, 0x14, 0x14, 0x17, "s_flbit_i32"),454(0x18, 0x18, 0x15, 0x15, 0x18, "s_flbit_i32_i64"),455(0x19, 0x19, 0x16, 0x16, 0x19, "s_sext_i32_i8"),456(0x1a, 0x1a, 0x17, 0x17, 0x1a, "s_sext_i32_i16"),457(0x1b, 0x1b, 0x18, 0x18, 0x1b, "s_bitset0_b32"),458(0x1c, 0x1c, 0x19, 0x19, 0x1c, "s_bitset0_b64"),459(0x1d, 0x1d, 0x1a, 0x1a, 0x1d, "s_bitset1_b32"),460(0x1e, 0x1e, 0x1b, 0x1b, 0x1e, "s_bitset1_b64"),461(0x1f, 0x1f, 0x1c, 0x1c, 0x1f, "s_getpc_b64"),462(0x20, 0x20, 0x1d, 0x1d, 0x20, "s_setpc_b64", InstrClass.Branch),463(0x21, 0x21, 0x1e, 0x1e, 0x21, "s_swappc_b64", InstrClass.Branch),464(0x22, 0x22, 0x1f, 0x1f, 0x22, "s_rfe_b64", InstrClass.Branch),465(0x24, 0x24, 0x20, 0x20, 0x24, "s_and_saveexec_b64"),466(0x25, 0x25, 0x21, 0x21, 0x25, "s_or_saveexec_b64"),467(0x26, 0x26, 0x22, 0x22, 0x26, "s_xor_saveexec_b64"),468(0x27, 0x27, 0x23, 0x23, 0x27, "s_andn2_saveexec_b64"),469(0x28, 0x28, 0x24, 0x24, 0x28, "s_orn2_saveexec_b64"),470(0x29, 0x29, 0x25, 0x25, 0x29, "s_nand_saveexec_b64"),471(0x2a, 0x2a, 0x26, 0x26, 0x2a, "s_nor_saveexec_b64"),472(0x2b, 0x2b, 0x27, 0x27, 0x2b, "s_xnor_saveexec_b64"),473(0x2c, 0x2c, 0x28, 0x28, 0x2c, "s_quadmask_b32"),474(0x2d, 0x2d, 0x29, 0x29, 0x2d, "s_quadmask_b64"),475(0x2e, 0x2e, 0x2a, 0x2a, 0x2e, "s_movrels_b32"),476(0x2f, 0x2f, 0x2b, 0x2b, 0x2f, "s_movrels_b64"),477(0x30, 0x30, 0x2c, 0x2c, 0x30, "s_movreld_b32"),478(0x31, 0x31, 0x2d, 0x2d, 0x31, "s_movreld_b64"),479(0x32, 0x32, 0x2e, 0x2e, -1, "s_cbranch_join", InstrClass.Branch),480(0x34, 0x34, 0x30, 0x30, 0x34, "s_abs_i32"),481(0x35, 0x35, -1, -1, 0x35, "s_mov_fed_b32"),482( -1, -1, 0x32, 0x32, -1, "s_set_gpr_idx_idx"),483( -1, -1, -1, 0x33, 0x37, "s_andn1_saveexec_b64"),484( -1, -1, -1, 0x34, 0x38, "s_orn1_saveexec_b64"),485( -1, -1, -1, 0x35, 0x39, "s_andn1_wrexec_b64"),486( -1, -1, -1, 0x36, 0x3a, "s_andn2_wrexec_b64"),487( -1, -1, -1, 0x37, 0x3b, "s_bitreplicate_b64_b32"),488( -1, -1, -1, -1, 0x3c, "s_and_saveexec_b32"),489( -1, -1, -1, -1, 0x3d, "s_or_saveexec_b32"),490( -1, -1, -1, -1, 0x3e, "s_xor_saveexec_b32"),491( -1, -1, -1, -1, 0x3f, "s_andn2_saveexec_b32"),492( -1, -1, -1, -1, 0x40, "s_orn2_saveexec_b32"),493( -1, -1, -1, -1, 0x41, "s_nand_saveexec_b32"),494( -1, -1, -1, -1, 0x42, "s_nor_saveexec_b32"),495( -1, -1, -1, -1, 0x43, "s_xnor_saveexec_b32"),496( -1, -1, -1, -1, 0x44, "s_andn1_saveexec_b32"),497( -1, -1, -1, -1, 0x45, "s_orn1_saveexec_b32"),498( -1, -1, -1, -1, 0x46, "s_andn1_wrexec_b32"),499( -1, -1, -1, -1, 0x47, "s_andn2_wrexec_b32"),500( -1, -1, -1, -1, 0x49, "s_movrelsd_2_b32"),501# actually a pseudo-instruction. it's lowered to SALU during assembly though, so it's useful to identify it as a SOP1.502( -1, -1, -1, -1, -1, "p_constaddr_getpc"),503}504for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOP1, InstrClass.Salu):505opcode(name, gfx7, gfx9, gfx10, Format.SOP1, cls)506507508# SOPC instructions: 2 inputs and 0 outputs (+SCC)509SOPC = {510# GFX6, GFX7, GFX8, GFX9, GFX10, name511(0x00, 0x00, 0x00, 0x00, 0x00, "s_cmp_eq_i32"),512(0x01, 0x01, 0x01, 0x01, 0x01, "s_cmp_lg_i32"),513(0x02, 0x02, 0x02, 0x02, 0x02, "s_cmp_gt_i32"),514(0x03, 0x03, 0x03, 0x03, 0x03, "s_cmp_ge_i32"),515(0x04, 0x04, 0x04, 0x04, 0x04, "s_cmp_lt_i32"),516(0x05, 0x05, 0x05, 0x05, 0x05, "s_cmp_le_i32"),517(0x06, 0x06, 0x06, 0x06, 0x06, "s_cmp_eq_u32"),518(0x07, 0x07, 0x07, 0x07, 0x07, "s_cmp_lg_u32"),519(0x08, 0x08, 0x08, 0x08, 0x08, "s_cmp_gt_u32"),520(0x09, 0x09, 0x09, 0x09, 0x09, "s_cmp_ge_u32"),521(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_cmp_lt_u32"),522(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_cmp_le_u32"),523(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_bitcmp0_b32"),524(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_bitcmp1_b32"),525(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_bitcmp0_b64"),526(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_bitcmp1_b64"),527(0x10, 0x10, 0x10, 0x10, -1, "s_setvskip"),528( -1, -1, 0x11, 0x11, -1, "s_set_gpr_idx_on"),529( -1, -1, 0x12, 0x12, 0x12, "s_cmp_eq_u64"),530( -1, -1, 0x13, 0x13, 0x13, "s_cmp_lg_u64"),531}532for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SOPC:533opcode(name, gfx7, gfx9, gfx10, Format.SOPC, InstrClass.Salu)534535536# SOPP instructions: 0 inputs (+optional scc/vcc), 0 outputs537SOPP = {538# GFX6, GFX7, GFX8, GFX9, GFX10, name539(0x00, 0x00, 0x00, 0x00, 0x00, "s_nop"),540(0x01, 0x01, 0x01, 0x01, 0x01, "s_endpgm"),541(0x02, 0x02, 0x02, 0x02, 0x02, "s_branch", InstrClass.Branch),542( -1, -1, 0x03, 0x03, 0x03, "s_wakeup"),543(0x04, 0x04, 0x04, 0x04, 0x04, "s_cbranch_scc0", InstrClass.Branch),544(0x05, 0x05, 0x05, 0x05, 0x05, "s_cbranch_scc1", InstrClass.Branch),545(0x06, 0x06, 0x06, 0x06, 0x06, "s_cbranch_vccz", InstrClass.Branch),546(0x07, 0x07, 0x07, 0x07, 0x07, "s_cbranch_vccnz", InstrClass.Branch),547(0x08, 0x08, 0x08, 0x08, 0x08, "s_cbranch_execz", InstrClass.Branch),548(0x09, 0x09, 0x09, 0x09, 0x09, "s_cbranch_execnz", InstrClass.Branch),549(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_barrier", InstrClass.Barrier),550( -1, 0x0b, 0x0b, 0x0b, 0x0b, "s_setkill"),551(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_waitcnt", InstrClass.Waitcnt),552(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "s_sethalt"),553(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "s_sleep"),554(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "s_setprio"),555(0x10, 0x10, 0x10, 0x10, 0x10, "s_sendmsg", InstrClass.Sendmsg),556(0x11, 0x11, 0x11, 0x11, 0x11, "s_sendmsghalt", InstrClass.Sendmsg),557(0x12, 0x12, 0x12, 0x12, 0x12, "s_trap", InstrClass.Branch),558(0x13, 0x13, 0x13, 0x13, 0x13, "s_icache_inv"),559(0x14, 0x14, 0x14, 0x14, 0x14, "s_incperflevel"),560(0x15, 0x15, 0x15, 0x15, 0x15, "s_decperflevel"),561(0x16, 0x16, 0x16, 0x16, 0x16, "s_ttracedata"),562( -1, 0x17, 0x17, 0x17, 0x17, "s_cbranch_cdbgsys", InstrClass.Branch),563( -1, 0x18, 0x18, 0x18, 0x18, "s_cbranch_cdbguser", InstrClass.Branch),564( -1, 0x19, 0x19, 0x19, 0x19, "s_cbranch_cdbgsys_or_user", InstrClass.Branch),565( -1, 0x1a, 0x1a, 0x1a, 0x1a, "s_cbranch_cdbgsys_and_user", InstrClass.Branch),566( -1, -1, 0x1b, 0x1b, 0x1b, "s_endpgm_saved"),567( -1, -1, 0x1c, 0x1c, -1, "s_set_gpr_idx_off"),568( -1, -1, 0x1d, 0x1d, -1, "s_set_gpr_idx_mode"),569( -1, -1, -1, 0x1e, 0x1e, "s_endpgm_ordered_ps_done"),570( -1, -1, -1, -1, 0x1f, "s_code_end"),571( -1, -1, -1, -1, 0x20, "s_inst_prefetch"),572( -1, -1, -1, -1, 0x21, "s_clause"),573( -1, -1, -1, -1, 0x22, "s_wait_idle"),574( -1, -1, -1, -1, 0x23, "s_waitcnt_depctr"),575( -1, -1, -1, -1, 0x24, "s_round_mode"),576( -1, -1, -1, -1, 0x25, "s_denorm_mode"),577( -1, -1, -1, -1, 0x26, "s_ttracedata_imm"),578}579for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(SOPP, InstrClass.Salu):580opcode(name, gfx7, gfx9, gfx10, Format.SOPP, cls)581582583# SMEM instructions: sbase input (2 sgpr), potentially 2 offset inputs, 1 sdata input/output584# Unlike GFX10, GFX10.3 does not have SMEM store, atomic or scratch instructions585SMEM = {586# GFX6, GFX7, GFX8, GFX9, GFX10, name587(0x00, 0x00, 0x00, 0x00, 0x00, "s_load_dword"),588(0x01, 0x01, 0x01, 0x01, 0x01, "s_load_dwordx2"),589(0x02, 0x02, 0x02, 0x02, 0x02, "s_load_dwordx4"),590(0x03, 0x03, 0x03, 0x03, 0x03, "s_load_dwordx8"),591(0x04, 0x04, 0x04, 0x04, 0x04, "s_load_dwordx16"),592( -1, -1, -1, 0x05, 0x05, "s_scratch_load_dword"),593( -1, -1, -1, 0x06, 0x06, "s_scratch_load_dwordx2"),594( -1, -1, -1, 0x07, 0x07, "s_scratch_load_dwordx4"),595(0x08, 0x08, 0x08, 0x08, 0x08, "s_buffer_load_dword"),596(0x09, 0x09, 0x09, 0x09, 0x09, "s_buffer_load_dwordx2"),597(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "s_buffer_load_dwordx4"),598(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "s_buffer_load_dwordx8"),599(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "s_buffer_load_dwordx16"),600( -1, -1, 0x10, 0x10, 0x10, "s_store_dword"),601( -1, -1, 0x11, 0x11, 0x11, "s_store_dwordx2"),602( -1, -1, 0x12, 0x12, 0x12, "s_store_dwordx4"),603( -1, -1, -1, 0x15, 0x15, "s_scratch_store_dword"),604( -1, -1, -1, 0x16, 0x16, "s_scratch_store_dwordx2"),605( -1, -1, -1, 0x17, 0x17, "s_scratch_store_dwordx4"),606( -1, -1, 0x18, 0x18, 0x18, "s_buffer_store_dword"),607( -1, -1, 0x19, 0x19, 0x19, "s_buffer_store_dwordx2"),608( -1, -1, 0x1a, 0x1a, 0x1a, "s_buffer_store_dwordx4"),609( -1, -1, 0x1f, 0x1f, 0x1f, "s_gl1_inv"),610(0x1f, 0x1f, 0x20, 0x20, 0x20, "s_dcache_inv"),611( -1, -1, 0x21, 0x21, 0x21, "s_dcache_wb"),612( -1, 0x1d, 0x22, 0x22, -1, "s_dcache_inv_vol"),613( -1, -1, 0x23, 0x23, -1, "s_dcache_wb_vol"),614(0x1e, 0x1e, 0x24, 0x24, 0x24, "s_memtime"), #GFX6-GFX10615( -1, -1, 0x25, 0x25, 0x25, "s_memrealtime"),616( -1, -1, 0x26, 0x26, 0x26, "s_atc_probe"),617( -1, -1, 0x27, 0x27, 0x27, "s_atc_probe_buffer"),618( -1, -1, -1, 0x28, 0x28, "s_dcache_discard"),619( -1, -1, -1, 0x29, 0x29, "s_dcache_discard_x2"),620( -1, -1, -1, -1, 0x2a, "s_get_waveid_in_workgroup"),621( -1, -1, -1, 0x40, 0x40, "s_buffer_atomic_swap"),622( -1, -1, -1, 0x41, 0x41, "s_buffer_atomic_cmpswap"),623( -1, -1, -1, 0x42, 0x42, "s_buffer_atomic_add"),624( -1, -1, -1, 0x43, 0x43, "s_buffer_atomic_sub"),625( -1, -1, -1, 0x44, 0x44, "s_buffer_atomic_smin"),626( -1, -1, -1, 0x45, 0x45, "s_buffer_atomic_umin"),627( -1, -1, -1, 0x46, 0x46, "s_buffer_atomic_smax"),628( -1, -1, -1, 0x47, 0x47, "s_buffer_atomic_umax"),629( -1, -1, -1, 0x48, 0x48, "s_buffer_atomic_and"),630( -1, -1, -1, 0x49, 0x49, "s_buffer_atomic_or"),631( -1, -1, -1, 0x4a, 0x4a, "s_buffer_atomic_xor"),632( -1, -1, -1, 0x4b, 0x4b, "s_buffer_atomic_inc"),633( -1, -1, -1, 0x4c, 0x4c, "s_buffer_atomic_dec"),634( -1, -1, -1, 0x60, 0x60, "s_buffer_atomic_swap_x2"),635( -1, -1, -1, 0x61, 0x61, "s_buffer_atomic_cmpswap_x2"),636( -1, -1, -1, 0x62, 0x62, "s_buffer_atomic_add_x2"),637( -1, -1, -1, 0x63, 0x63, "s_buffer_atomic_sub_x2"),638( -1, -1, -1, 0x64, 0x64, "s_buffer_atomic_smin_x2"),639( -1, -1, -1, 0x65, 0x65, "s_buffer_atomic_umin_x2"),640( -1, -1, -1, 0x66, 0x66, "s_buffer_atomic_smax_x2"),641( -1, -1, -1, 0x67, 0x67, "s_buffer_atomic_umax_x2"),642( -1, -1, -1, 0x68, 0x68, "s_buffer_atomic_and_x2"),643( -1, -1, -1, 0x69, 0x69, "s_buffer_atomic_or_x2"),644( -1, -1, -1, 0x6a, 0x6a, "s_buffer_atomic_xor_x2"),645( -1, -1, -1, 0x6b, 0x6b, "s_buffer_atomic_inc_x2"),646( -1, -1, -1, 0x6c, 0x6c, "s_buffer_atomic_dec_x2"),647( -1, -1, -1, 0x80, 0x80, "s_atomic_swap"),648( -1, -1, -1, 0x81, 0x81, "s_atomic_cmpswap"),649( -1, -1, -1, 0x82, 0x82, "s_atomic_add"),650( -1, -1, -1, 0x83, 0x83, "s_atomic_sub"),651( -1, -1, -1, 0x84, 0x84, "s_atomic_smin"),652( -1, -1, -1, 0x85, 0x85, "s_atomic_umin"),653( -1, -1, -1, 0x86, 0x86, "s_atomic_smax"),654( -1, -1, -1, 0x87, 0x87, "s_atomic_umax"),655( -1, -1, -1, 0x88, 0x88, "s_atomic_and"),656( -1, -1, -1, 0x89, 0x89, "s_atomic_or"),657( -1, -1, -1, 0x8a, 0x8a, "s_atomic_xor"),658( -1, -1, -1, 0x8b, 0x8b, "s_atomic_inc"),659( -1, -1, -1, 0x8c, 0x8c, "s_atomic_dec"),660( -1, -1, -1, 0xa0, 0xa0, "s_atomic_swap_x2"),661( -1, -1, -1, 0xa1, 0xa1, "s_atomic_cmpswap_x2"),662( -1, -1, -1, 0xa2, 0xa2, "s_atomic_add_x2"),663( -1, -1, -1, 0xa3, 0xa3, "s_atomic_sub_x2"),664( -1, -1, -1, 0xa4, 0xa4, "s_atomic_smin_x2"),665( -1, -1, -1, 0xa5, 0xa5, "s_atomic_umin_x2"),666( -1, -1, -1, 0xa6, 0xa6, "s_atomic_smax_x2"),667( -1, -1, -1, 0xa7, 0xa7, "s_atomic_umax_x2"),668( -1, -1, -1, 0xa8, 0xa8, "s_atomic_and_x2"),669( -1, -1, -1, 0xa9, 0xa9, "s_atomic_or_x2"),670( -1, -1, -1, 0xaa, 0xaa, "s_atomic_xor_x2"),671( -1, -1, -1, 0xab, 0xab, "s_atomic_inc_x2"),672( -1, -1, -1, 0xac, 0xac, "s_atomic_dec_x2"),673}674for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in SMEM:675opcode(name, gfx7, gfx9, gfx10, Format.SMEM, InstrClass.SMem, is_atomic = "atomic" in name)676677678# VOP2 instructions: 2 inputs, 1 output (+ optional vcc)679# TODO: misses some GFX6_7 opcodes which were shifted to VOP3 in GFX8680VOP2 = {681# GFX6, GFX7, GFX8, GFX9, GFX10, name, input/output modifiers682(0x01, 0x01, -1, -1, -1, "v_readlane_b32", False),683(0x02, 0x02, -1, -1, -1, "v_writelane_b32", False),684(0x03, 0x03, 0x01, 0x01, 0x03, "v_add_f32", True),685(0x04, 0x04, 0x02, 0x02, 0x04, "v_sub_f32", True),686(0x05, 0x05, 0x03, 0x03, 0x05, "v_subrev_f32", True),687(0x06, 0x06, -1, -1, 0x06, "v_mac_legacy_f32", True),688(0x07, 0x07, 0x04, 0x04, 0x07, "v_mul_legacy_f32", True),689(0x08, 0x08, 0x05, 0x05, 0x08, "v_mul_f32", True),690(0x09, 0x09, 0x06, 0x06, 0x09, "v_mul_i32_i24", False),691(0x0a, 0x0a, 0x07, 0x07, 0x0a, "v_mul_hi_i32_i24", False),692(0x0b, 0x0b, 0x08, 0x08, 0x0b, "v_mul_u32_u24", False),693(0x0c, 0x0c, 0x09, 0x09, 0x0c, "v_mul_hi_u32_u24", False),694(0x0d, 0x0d, -1, -1, -1, "v_min_legacy_f32", True),695(0x0e, 0x0e, -1, -1, -1, "v_max_legacy_f32", True),696(0x0f, 0x0f, 0x0a, 0x0a, 0x0f, "v_min_f32", True),697(0x10, 0x10, 0x0b, 0x0b, 0x10, "v_max_f32", True),698(0x11, 0x11, 0x0c, 0x0c, 0x11, "v_min_i32", False),699(0x12, 0x12, 0x0d, 0x0d, 0x12, "v_max_i32", False),700(0x13, 0x13, 0x0e, 0x0e, 0x13, "v_min_u32", False),701(0x14, 0x14, 0x0f, 0x0f, 0x14, "v_max_u32", False),702(0x15, 0x15, -1, -1, -1, "v_lshr_b32", False),703(0x16, 0x16, 0x10, 0x10, 0x16, "v_lshrrev_b32", False),704(0x17, 0x17, -1, -1, -1, "v_ashr_i32", False),705(0x18, 0x18, 0x11, 0x11, 0x18, "v_ashrrev_i32", False),706(0x19, 0x19, -1, -1, -1, "v_lshl_b32", False),707(0x1a, 0x1a, 0x12, 0x12, 0x1a, "v_lshlrev_b32", False),708(0x1b, 0x1b, 0x13, 0x13, 0x1b, "v_and_b32", False),709(0x1c, 0x1c, 0x14, 0x14, 0x1c, "v_or_b32", False),710(0x1d, 0x1d, 0x15, 0x15, 0x1d, "v_xor_b32", False),711( -1, -1, -1, -1, 0x1e, "v_xnor_b32", False),712(0x1f, 0x1f, 0x16, 0x16, 0x1f, "v_mac_f32", True),713(0x20, 0x20, 0x17, 0x17, 0x20, "v_madmk_f32", False),714(0x21, 0x21, 0x18, 0x18, 0x21, "v_madak_f32", False),715(0x24, 0x24, -1, -1, -1, "v_mbcnt_hi_u32_b32", False),716(0x25, 0x25, 0x19, 0x19, -1, "v_add_co_u32", False), # VOP3B only in RDNA717(0x26, 0x26, 0x1a, 0x1a, -1, "v_sub_co_u32", False), # VOP3B only in RDNA718(0x27, 0x27, 0x1b, 0x1b, -1, "v_subrev_co_u32", False), # VOP3B only in RDNA719(0x28, 0x28, 0x1c, 0x1c, 0x28, "v_addc_co_u32", False), # v_add_co_ci_u32 in RDNA720(0x29, 0x29, 0x1d, 0x1d, 0x29, "v_subb_co_u32", False), # v_sub_co_ci_u32 in RDNA721(0x2a, 0x2a, 0x1e, 0x1e, 0x2a, "v_subbrev_co_u32", False), # v_subrev_co_ci_u32 in RDNA722( -1, -1, -1, -1, 0x2b, "v_fmac_f32", True),723( -1, -1, -1, -1, 0x2c, "v_fmamk_f32", True),724( -1, -1, -1, -1, 0x2d, "v_fmaak_f32", True),725(0x2f, 0x2f, -1, -1, 0x2f, "v_cvt_pkrtz_f16_f32", True),726( -1, -1, 0x1f, 0x1f, 0x32, "v_add_f16", True),727( -1, -1, 0x20, 0x20, 0x33, "v_sub_f16", True),728( -1, -1, 0x21, 0x21, 0x34, "v_subrev_f16", True),729( -1, -1, 0x22, 0x22, 0x35, "v_mul_f16", True),730( -1, -1, 0x23, 0x23, -1, "v_mac_f16", True),731( -1, -1, 0x24, 0x24, -1, "v_madmk_f16", False),732( -1, -1, 0x25, 0x25, -1, "v_madak_f16", False),733( -1, -1, 0x26, 0x26, -1, "v_add_u16", False),734( -1, -1, 0x27, 0x27, -1, "v_sub_u16", False),735( -1, -1, 0x28, 0x28, -1, "v_subrev_u16", False),736( -1, -1, 0x29, 0x29, -1, "v_mul_lo_u16", False),737( -1, -1, 0x2a, 0x2a, -1, "v_lshlrev_b16", False),738( -1, -1, 0x2b, 0x2b, -1, "v_lshrrev_b16", False),739( -1, -1, 0x2c, 0x2c, -1, "v_ashrrev_i16", False),740( -1, -1, 0x2d, 0x2d, 0x39, "v_max_f16", True),741( -1, -1, 0x2e, 0x2e, 0x3a, "v_min_f16", True),742( -1, -1, 0x2f, 0x2f, -1, "v_max_u16", False),743( -1, -1, 0x30, 0x30, -1, "v_max_i16", False),744( -1, -1, 0x31, 0x31, -1, "v_min_u16", False),745( -1, -1, 0x32, 0x32, -1, "v_min_i16", False),746( -1, -1, 0x33, 0x33, 0x3b, "v_ldexp_f16", False),747( -1, -1, -1, 0x34, 0x25, "v_add_u32", False), # use v_add_co_u32 on GFX8, called v_add_nc_u32 in RDNA748( -1, -1, -1, 0x35, 0x26, "v_sub_u32", False), # use v_sub_co_u32 on GFX8, called v_sub_nc_u32 in RDNA749( -1, -1, -1, 0x36, 0x27, "v_subrev_u32", False), # use v_subrev_co_u32 on GFX8, called v_subrev_nc_u32 in RDNA750( -1, -1, -1, -1, 0x36, "v_fmac_f16", False),751( -1, -1, -1, -1, 0x37, "v_fmamk_f16", False),752( -1, -1, -1, -1, 0x38, "v_fmaak_f16", False),753( -1, -1, -1, -1, 0x3c, "v_pk_fmac_f16", False),754}755for (gfx6, gfx7, gfx8, gfx9, gfx10, name, modifiers) in VOP2:756opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, modifiers, modifiers)757758if True:759# v_cndmask_b32 can use input modifiers but not output modifiers760(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00, 0x00, 0x00, 0x00, 0x01, "v_cndmask_b32")761opcode(name, gfx7, gfx9, gfx10, Format.VOP2, InstrClass.Valu32, True, False)762763764# VOP1 instructions: instructions with 1 input and 1 output765VOP1 = {766# GFX6, GFX7, GFX8, GFX9, GFX10, name, input_modifiers, output_modifiers767(0x00, 0x00, 0x00, 0x00, 0x00, "v_nop", False, False),768(0x01, 0x01, 0x01, 0x01, 0x01, "v_mov_b32", False, False),769(0x02, 0x02, 0x02, 0x02, 0x02, "v_readfirstlane_b32", False, False),770(0x03, 0x03, 0x03, 0x03, 0x03, "v_cvt_i32_f64", True, False, InstrClass.ValuDoubleConvert),771(0x04, 0x04, 0x04, 0x04, 0x04, "v_cvt_f64_i32", False, True, InstrClass.ValuDoubleConvert),772(0x05, 0x05, 0x05, 0x05, 0x05, "v_cvt_f32_i32", False, True),773(0x06, 0x06, 0x06, 0x06, 0x06, "v_cvt_f32_u32", False, True),774(0x07, 0x07, 0x07, 0x07, 0x07, "v_cvt_u32_f32", True, False),775(0x08, 0x08, 0x08, 0x08, 0x08, "v_cvt_i32_f32", True, False),776(0x09, 0x09, -1, -1, 0x09, "v_mov_fed_b32", True, False), # LLVM mentions it for GFX8_9777(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "v_cvt_f16_f32", True, True),778( -1, -1, -1, -1, -1, "p_cvt_f16_f32_rtne", True, True),779(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "v_cvt_f32_f16", True, True),780(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "v_cvt_rpi_i32_f32", True, False),781(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "v_cvt_flr_i32_f32", True, False),782(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "v_cvt_off_f32_i4", False, True),783(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "v_cvt_f32_f64", True, True, InstrClass.ValuDoubleConvert),784(0x10, 0x10, 0x10, 0x10, 0x10, "v_cvt_f64_f32", True, True, InstrClass.ValuDoubleConvert),785(0x11, 0x11, 0x11, 0x11, 0x11, "v_cvt_f32_ubyte0", False, True),786(0x12, 0x12, 0x12, 0x12, 0x12, "v_cvt_f32_ubyte1", False, True),787(0x13, 0x13, 0x13, 0x13, 0x13, "v_cvt_f32_ubyte2", False, True),788(0x14, 0x14, 0x14, 0x14, 0x14, "v_cvt_f32_ubyte3", False, True),789(0x15, 0x15, 0x15, 0x15, 0x15, "v_cvt_u32_f64", True, False, InstrClass.ValuDoubleConvert),790(0x16, 0x16, 0x16, 0x16, 0x16, "v_cvt_f64_u32", False, True, InstrClass.ValuDoubleConvert),791( -1, 0x17, 0x17, 0x17, 0x17, "v_trunc_f64", True, True, InstrClass.ValuDouble),792( -1, 0x18, 0x18, 0x18, 0x18, "v_ceil_f64", True, True, InstrClass.ValuDouble),793( -1, 0x19, 0x19, 0x19, 0x19, "v_rndne_f64", True, True, InstrClass.ValuDouble),794( -1, 0x1a, 0x1a, 0x1a, 0x1a, "v_floor_f64", True, True, InstrClass.ValuDouble),795( -1, -1, -1, -1, 0x1b, "v_pipeflush", False, False),796(0x20, 0x20, 0x1b, 0x1b, 0x20, "v_fract_f32", True, True),797(0x21, 0x21, 0x1c, 0x1c, 0x21, "v_trunc_f32", True, True),798(0x22, 0x22, 0x1d, 0x1d, 0x22, "v_ceil_f32", True, True),799(0x23, 0x23, 0x1e, 0x1e, 0x23, "v_rndne_f32", True, True),800(0x24, 0x24, 0x1f, 0x1f, 0x24, "v_floor_f32", True, True),801(0x25, 0x25, 0x20, 0x20, 0x25, "v_exp_f32", True, True, InstrClass.ValuTranscendental32),802(0x26, 0x26, -1, -1, -1, "v_log_clamp_f32", True, True, InstrClass.ValuTranscendental32),803(0x27, 0x27, 0x21, 0x21, 0x27, "v_log_f32", True, True, InstrClass.ValuTranscendental32),804(0x28, 0x28, -1, -1, -1, "v_rcp_clamp_f32", True, True, InstrClass.ValuTranscendental32),805(0x29, 0x29, -1, -1, -1, "v_rcp_legacy_f32", True, True, InstrClass.ValuTranscendental32),806(0x2a, 0x2a, 0x22, 0x22, 0x2a, "v_rcp_f32", True, True, InstrClass.ValuTranscendental32),807(0x2b, 0x2b, 0x23, 0x23, 0x2b, "v_rcp_iflag_f32", True, True, InstrClass.ValuTranscendental32),808(0x2c, 0x2c, -1, -1, -1, "v_rsq_clamp_f32", True, True, InstrClass.ValuTranscendental32),809(0x2d, 0x2d, -1, -1, -1, "v_rsq_legacy_f32", True, True, InstrClass.ValuTranscendental32),810(0x2e, 0x2e, 0x24, 0x24, 0x2e, "v_rsq_f32", True, True, InstrClass.ValuTranscendental32),811(0x2f, 0x2f, 0x25, 0x25, 0x2f, "v_rcp_f64", True, True, InstrClass.ValuDoubleTranscendental),812(0x30, 0x30, -1, -1, -1, "v_rcp_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),813(0x31, 0x31, 0x26, 0x26, 0x31, "v_rsq_f64", True, True, InstrClass.ValuDoubleTranscendental),814(0x32, 0x32, -1, -1, -1, "v_rsq_clamp_f64", True, True, InstrClass.ValuDoubleTranscendental),815(0x33, 0x33, 0x27, 0x27, 0x33, "v_sqrt_f32", True, True, InstrClass.ValuTranscendental32),816(0x34, 0x34, 0x28, 0x28, 0x34, "v_sqrt_f64", True, True, InstrClass.ValuDoubleTranscendental),817(0x35, 0x35, 0x29, 0x29, 0x35, "v_sin_f32", True, True, InstrClass.ValuTranscendental32),818(0x36, 0x36, 0x2a, 0x2a, 0x36, "v_cos_f32", True, True, InstrClass.ValuTranscendental32),819(0x37, 0x37, 0x2b, 0x2b, 0x37, "v_not_b32", False, False),820(0x38, 0x38, 0x2c, 0x2c, 0x38, "v_bfrev_b32", False, False),821(0x39, 0x39, 0x2d, 0x2d, 0x39, "v_ffbh_u32", False, False),822(0x3a, 0x3a, 0x2e, 0x2e, 0x3a, "v_ffbl_b32", False, False),823(0x3b, 0x3b, 0x2f, 0x2f, 0x3b, "v_ffbh_i32", False, False),824(0x3c, 0x3c, 0x30, 0x30, 0x3c, "v_frexp_exp_i32_f64", True, False, InstrClass.ValuDouble),825(0x3d, 0x3d, 0x31, 0x31, 0x3d, "v_frexp_mant_f64", True, False, InstrClass.ValuDouble),826(0x3e, 0x3e, 0x32, 0x32, 0x3e, "v_fract_f64", True, True, InstrClass.ValuDouble),827(0x3f, 0x3f, 0x33, 0x33, 0x3f, "v_frexp_exp_i32_f32", True, False),828(0x40, 0x40, 0x34, 0x34, 0x40, "v_frexp_mant_f32", True, False),829(0x41, 0x41, 0x35, 0x35, 0x41, "v_clrexcp", False, False),830(0x42, 0x42, 0x36, -1, 0x42, "v_movreld_b32", False, False),831(0x43, 0x43, 0x37, -1, 0x43, "v_movrels_b32", False, False),832(0x44, 0x44, 0x38, -1, 0x44, "v_movrelsd_b32", False, False),833( -1, -1, -1, -1, 0x48, "v_movrelsd_2_b32", False, False),834( -1, -1, -1, 0x37, -1, "v_screen_partition_4se_b32", False, False),835( -1, -1, 0x39, 0x39, 0x50, "v_cvt_f16_u16", False, True),836( -1, -1, 0x3a, 0x3a, 0x51, "v_cvt_f16_i16", False, True),837( -1, -1, 0x3b, 0x3b, 0x52, "v_cvt_u16_f16", True, False),838( -1, -1, 0x3c, 0x3c, 0x53, "v_cvt_i16_f16", True, False),839( -1, -1, 0x3d, 0x3d, 0x54, "v_rcp_f16", True, True, InstrClass.ValuTranscendental32),840( -1, -1, 0x3e, 0x3e, 0x55, "v_sqrt_f16", True, True, InstrClass.ValuTranscendental32),841( -1, -1, 0x3f, 0x3f, 0x56, "v_rsq_f16", True, True, InstrClass.ValuTranscendental32),842( -1, -1, 0x40, 0x40, 0x57, "v_log_f16", True, True, InstrClass.ValuTranscendental32),843( -1, -1, 0x41, 0x41, 0x58, "v_exp_f16", True, True, InstrClass.ValuTranscendental32),844( -1, -1, 0x42, 0x42, 0x59, "v_frexp_mant_f16", True, False),845( -1, -1, 0x43, 0x43, 0x5a, "v_frexp_exp_i16_f16", True, False),846( -1, -1, 0x44, 0x44, 0x5b, "v_floor_f16", True, True),847( -1, -1, 0x45, 0x45, 0x5c, "v_ceil_f16", True, True),848( -1, -1, 0x46, 0x46, 0x5d, "v_trunc_f16", True, True),849( -1, -1, 0x47, 0x47, 0x5e, "v_rndne_f16", True, True),850( -1, -1, 0x48, 0x48, 0x5f, "v_fract_f16", True, True),851( -1, -1, 0x49, 0x49, 0x60, "v_sin_f16", True, True, InstrClass.ValuTranscendental32),852( -1, -1, 0x4a, 0x4a, 0x61, "v_cos_f16", True, True, InstrClass.ValuTranscendental32),853( -1, 0x46, 0x4b, 0x4b, -1, "v_exp_legacy_f32", True, True, InstrClass.ValuTranscendental32),854( -1, 0x45, 0x4c, 0x4c, -1, "v_log_legacy_f32", True, True, InstrClass.ValuTranscendental32),855( -1, -1, -1, 0x4f, 0x62, "v_sat_pk_u8_i16", False, False),856( -1, -1, -1, 0x4d, 0x63, "v_cvt_norm_i16_f16", True, False),857( -1, -1, -1, 0x4e, 0x64, "v_cvt_norm_u16_f16", True, False),858( -1, -1, -1, 0x51, 0x65, "v_swap_b32", False, False),859( -1, -1, -1, -1, 0x68, "v_swaprel_b32", False, False),860}861for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP1, InstrClass.Valu32):862opcode(name, gfx7, gfx9, gfx10, Format.VOP1, cls, in_mod, out_mod)863864865# VOPC instructions:866867VOPC_CLASS = {868(0x88, 0x88, 0x10, 0x10, 0x88, "v_cmp_class_f32"),869( -1, -1, 0x14, 0x14, 0x8f, "v_cmp_class_f16"),870(0x98, 0x98, 0x11, 0x11, 0x98, "v_cmpx_class_f32"),871( -1, -1, 0x15, 0x15, 0x9f, "v_cmpx_class_f16"),872(0xa8, 0xa8, 0x12, 0x12, 0xa8, "v_cmp_class_f64", InstrClass.ValuDouble),873(0xb8, 0xb8, 0x13, 0x13, 0xb8, "v_cmpx_class_f64", InstrClass.ValuDouble),874}875for (gfx6, gfx7, gfx8, gfx9, gfx10, name, cls) in default_class(VOPC_CLASS, InstrClass.Valu32):876opcode(name, gfx7, gfx9, gfx10, Format.VOPC, cls, True, False)877878COMPF = ["f", "lt", "eq", "le", "gt", "lg", "ge", "o", "u", "nge", "nlg", "ngt", "nle", "neq", "nlt", "tru"]879880for i in range(8):881(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x20+i, 0x20+i, 0xc8+i, "v_cmp_"+COMPF[i]+"_f16")882opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)883(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x30+i, 0x30+i, 0xd8+i, "v_cmpx_"+COMPF[i]+"_f16")884opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)885(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x28+i, 0x28+i, 0xe8+i, "v_cmp_"+COMPF[i+8]+"_f16")886opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)887(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0x38+i, 0x38+i, 0xf8+i, "v_cmpx_"+COMPF[i+8]+"_f16")888opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)889890for i in range(16):891(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x00+i, 0x00+i, 0x40+i, 0x40+i, 0x00+i, "v_cmp_"+COMPF[i]+"_f32")892opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)893(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x10+i, 0x10+i, 0x50+i, 0x50+i, 0x10+i, "v_cmpx_"+COMPF[i]+"_f32")894opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32, True, False)895(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x20+i, 0x20+i, 0x60+i, 0x60+i, 0x20+i, "v_cmp_"+COMPF[i]+"_f64")896opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)897(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x30+i, 0x30+i, 0x70+i, 0x70+i, 0x30+i, "v_cmpx_"+COMPF[i]+"_f64")898opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.ValuDouble, True, False)899# GFX_6_7900(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x40+i, 0x40+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f32")901(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x50+i, 0x50+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f32")902(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x60+i, 0x60+i, -1, -1, -1, "v_cmps_"+COMPF[i]+"_f64")903(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x70+i, 0x70+i, -1, -1, -1, "v_cmpsx_"+COMPF[i]+"_f64")904905COMPI = ["f", "lt", "eq", "le", "gt", "lg", "ge", "tru"]906907# GFX_8_9908for i in [0,7]: # only 0 and 7909(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, -1, "v_cmp_"+COMPI[i]+"_i16")910opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)911(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, -1, "v_cmpx_"+COMPI[i]+"_i16")912opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)913(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, -1, "v_cmp_"+COMPI[i]+"_u16")914opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)915(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, -1, "v_cmpx_"+COMPI[i]+"_u16")916opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)917918for i in range(1, 7): # [1..6]919(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa0+i, 0xa0+i, 0x88+i, "v_cmp_"+COMPI[i]+"_i16")920opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)921(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb0+i, 0xb0+i, 0x98+i, "v_cmpx_"+COMPI[i]+"_i16")922opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)923(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xa8+i, 0xa8+i, 0xa8+i, "v_cmp_"+COMPI[i]+"_u16")924opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)925(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, 0xb8+i, 0xb8+i, 0xb8+i, "v_cmpx_"+COMPI[i]+"_u16")926opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)927928for i in range(8):929(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x80+i, 0x80+i, 0xc0+i, 0xc0+i, 0x80+i, "v_cmp_"+COMPI[i]+"_i32")930opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)931(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0x90+i, 0x90+i, 0xd0+i, 0xd0+i, 0x90+i, "v_cmpx_"+COMPI[i]+"_i32")932opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)933(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xa0+i, 0xa0+i, 0xe0+i, 0xe0+i, 0xa0+i, "v_cmp_"+COMPI[i]+"_i64")934opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)935(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xb0+i, 0xb0+i, 0xf0+i, 0xf0+i, 0xb0+i, "v_cmpx_"+COMPI[i]+"_i64")936opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)937(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xc0+i, 0xc0+i, 0xc8+i, 0xc8+i, 0xc0+i, "v_cmp_"+COMPI[i]+"_u32")938opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)939(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xd0+i, 0xd0+i, 0xd8+i, 0xd8+i, 0xd0+i, "v_cmpx_"+COMPI[i]+"_u32")940opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu32)941(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xe0+i, 0xe0+i, 0xe8+i, 0xe8+i, 0xe0+i, "v_cmp_"+COMPI[i]+"_u64")942opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)943(gfx6, gfx7, gfx8, gfx9, gfx10, name) = (0xf0+i, 0xf0+i, 0xf8+i, 0xf8+i, 0xf0+i, "v_cmpx_"+COMPI[i]+"_u64")944opcode(name, gfx7, gfx9, gfx10, Format.VOPC, InstrClass.Valu64)945946947# VOPP instructions: packed 16bit instructions - 1 or 2 inputs and 1 output948VOPP = {949# opcode, name, input/output modifiers950(0x00, "v_pk_mad_i16", False),951(0x01, "v_pk_mul_lo_u16", False),952(0x02, "v_pk_add_i16", False),953(0x03, "v_pk_sub_i16", False),954(0x04, "v_pk_lshlrev_b16", False),955(0x05, "v_pk_lshrrev_b16", False),956(0x06, "v_pk_ashrrev_i16", False),957(0x07, "v_pk_max_i16", False),958(0x08, "v_pk_min_i16", False),959(0x09, "v_pk_mad_u16", False),960(0x0a, "v_pk_add_u16", False),961(0x0b, "v_pk_sub_u16", False),962(0x0c, "v_pk_max_u16", False),963(0x0d, "v_pk_min_u16", False),964(0x0e, "v_pk_fma_f16", True),965(0x0f, "v_pk_add_f16", True),966(0x10, "v_pk_mul_f16", True),967(0x11, "v_pk_min_f16", True),968(0x12, "v_pk_max_f16", True),969(0x20, "v_fma_mix_f32", True), # v_mad_mix_f32 in VEGA ISA, v_fma_mix_f32 in RDNA ISA970(0x21, "v_fma_mixlo_f16", True), # v_mad_mixlo_f16 in VEGA ISA, v_fma_mixlo_f16 in RDNA ISA971(0x22, "v_fma_mixhi_f16", True), # v_mad_mixhi_f16 in VEGA ISA, v_fma_mixhi_f16 in RDNA ISA972}973# note that these are only supported on gfx9+ so we'll need to distinguish between gfx8 and gfx9 here974# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (-1, -1, -1, code, code, name)975for (code, name, modifiers) in VOPP:976opcode(name, -1, code, code, Format.VOP3P, InstrClass.Valu32, modifiers, modifiers)977978979# VINTERP instructions:980VINTRP = {981(0x00, "v_interp_p1_f32"),982(0x01, "v_interp_p2_f32"),983(0x02, "v_interp_mov_f32"),984}985# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)986for (code, name) in VINTRP:987opcode(name, code, code, code, Format.VINTRP, InstrClass.Valu32)988989# VOP3 instructions: 3 inputs, 1 output990# VOP3b instructions: have a unique scalar output, e.g. VOP2 with vcc out991VOP3 = {992(0x140, 0x140, 0x1c0, 0x1c0, 0x140, "v_mad_legacy_f32", True, True), # GFX6-GFX10993(0x141, 0x141, 0x1c1, 0x1c1, 0x141, "v_mad_f32", True, True),994(0x142, 0x142, 0x1c2, 0x1c2, 0x142, "v_mad_i32_i24", False, False),995(0x143, 0x143, 0x1c3, 0x1c3, 0x143, "v_mad_u32_u24", False, False),996(0x144, 0x144, 0x1c4, 0x1c4, 0x144, "v_cubeid_f32", True, True),997(0x145, 0x145, 0x1c5, 0x1c5, 0x145, "v_cubesc_f32", True, True),998(0x146, 0x146, 0x1c6, 0x1c6, 0x146, "v_cubetc_f32", True, True),999(0x147, 0x147, 0x1c7, 0x1c7, 0x147, "v_cubema_f32", True, True),1000(0x148, 0x148, 0x1c8, 0x1c8, 0x148, "v_bfe_u32", False, False),1001(0x149, 0x149, 0x1c9, 0x1c9, 0x149, "v_bfe_i32", False, False),1002(0x14a, 0x14a, 0x1ca, 0x1ca, 0x14a, "v_bfi_b32", False, False),1003(0x14b, 0x14b, 0x1cb, 0x1cb, 0x14b, "v_fma_f32", True, True, InstrClass.ValuFma),1004(0x14c, 0x14c, 0x1cc, 0x1cc, 0x14c, "v_fma_f64", True, True, InstrClass.ValuDouble),1005(0x14d, 0x14d, 0x1cd, 0x1cd, 0x14d, "v_lerp_u8", False, False),1006(0x14e, 0x14e, 0x1ce, 0x1ce, 0x14e, "v_alignbit_b32", False, False),1007(0x14f, 0x14f, 0x1cf, 0x1cf, 0x14f, "v_alignbyte_b32", False, False),1008(0x150, 0x150, -1, -1, 0x150, "v_mullit_f32", True, True),1009(0x151, 0x151, 0x1d0, 0x1d0, 0x151, "v_min3_f32", True, True),1010(0x152, 0x152, 0x1d1, 0x1d1, 0x152, "v_min3_i32", False, False),1011(0x153, 0x153, 0x1d2, 0x1d2, 0x153, "v_min3_u32", False, False),1012(0x154, 0x154, 0x1d3, 0x1d3, 0x154, "v_max3_f32", True, True),1013(0x155, 0x155, 0x1d4, 0x1d4, 0x155, "v_max3_i32", False, False),1014(0x156, 0x156, 0x1d5, 0x1d5, 0x156, "v_max3_u32", False, False),1015(0x157, 0x157, 0x1d6, 0x1d6, 0x157, "v_med3_f32", True, True),1016(0x158, 0x158, 0x1d7, 0x1d7, 0x158, "v_med3_i32", False, False),1017(0x159, 0x159, 0x1d8, 0x1d8, 0x159, "v_med3_u32", False, False),1018(0x15a, 0x15a, 0x1d9, 0x1d9, 0x15a, "v_sad_u8", False, False),1019(0x15b, 0x15b, 0x1da, 0x1da, 0x15b, "v_sad_hi_u8", False, False),1020(0x15c, 0x15c, 0x1db, 0x1db, 0x15c, "v_sad_u16", False, False),1021(0x15d, 0x15d, 0x1dc, 0x1dc, 0x15d, "v_sad_u32", False, False),1022(0x15e, 0x15e, 0x1dd, 0x1dd, 0x15e, "v_cvt_pk_u8_f32", True, False),1023(0x15f, 0x15f, 0x1de, 0x1de, 0x15f, "v_div_fixup_f32", True, True),1024(0x160, 0x160, 0x1df, 0x1df, 0x160, "v_div_fixup_f64", True, True),1025(0x161, 0x161, -1, -1, -1, "v_lshl_b64", False, False, InstrClass.Valu64),1026(0x162, 0x162, -1, -1, -1, "v_lshr_b64", False, False, InstrClass.Valu64),1027(0x163, 0x163, -1, -1, -1, "v_ashr_i64", False, False, InstrClass.Valu64),1028(0x164, 0x164, 0x280, 0x280, 0x164, "v_add_f64", True, True, InstrClass.ValuDoubleAdd),1029(0x165, 0x165, 0x281, 0x281, 0x165, "v_mul_f64", True, True, InstrClass.ValuDouble),1030(0x166, 0x166, 0x282, 0x282, 0x166, "v_min_f64", True, True, InstrClass.ValuDouble),1031(0x167, 0x167, 0x283, 0x283, 0x167, "v_max_f64", True, True, InstrClass.ValuDouble),1032(0x168, 0x168, 0x284, 0x284, 0x168, "v_ldexp_f64", False, True, InstrClass.ValuDouble), # src1 can take input modifiers1033(0x169, 0x169, 0x285, 0x285, 0x169, "v_mul_lo_u32", False, False, InstrClass.ValuQuarterRate32),1034(0x16a, 0x16a, 0x286, 0x286, 0x16a, "v_mul_hi_u32", False, False, InstrClass.ValuQuarterRate32),1035(0x16b, 0x16b, 0x285, 0x285, 0x16b, "v_mul_lo_i32", False, False, InstrClass.ValuQuarterRate32), # identical to v_mul_lo_u321036(0x16c, 0x16c, 0x287, 0x287, 0x16c, "v_mul_hi_i32", False, False, InstrClass.ValuQuarterRate32),1037(0x16d, 0x16d, 0x1e0, 0x1e0, 0x16d, "v_div_scale_f32", True, True), # writes to VCC1038(0x16e, 0x16e, 0x1e1, 0x1e1, 0x16e, "v_div_scale_f64", True, True, InstrClass.ValuDouble), # writes to VCC1039(0x16f, 0x16f, 0x1e2, 0x1e2, 0x16f, "v_div_fmas_f32", True, True), # takes VCC input1040(0x170, 0x170, 0x1e3, 0x1e3, 0x170, "v_div_fmas_f64", True, True, InstrClass.ValuDouble), # takes VCC input1041(0x171, 0x171, 0x1e4, 0x1e4, 0x171, "v_msad_u8", False, False),1042(0x172, 0x172, 0x1e5, 0x1e5, 0x172, "v_qsad_pk_u16_u8", False, False),1043(0x172, -1, -1, -1, -1, "v_qsad_u8", False, False), # what's the difference?1044(0x173, 0x173, 0x1e6, 0x1e6, 0x173, "v_mqsad_pk_u16_u8", False, False),1045(0x173, -1, -1, -1, -1, "v_mqsad_u8", False, False), # what's the difference?1046(0x174, 0x174, 0x292, 0x292, 0x174, "v_trig_preop_f64", False, False, InstrClass.ValuDouble),1047( -1, 0x175, 0x1e7, 0x1e7, 0x175, "v_mqsad_u32_u8", False, False),1048( -1, 0x176, 0x1e8, 0x1e8, 0x176, "v_mad_u64_u32", False, False, InstrClass.Valu64),1049( -1, 0x177, 0x1e9, 0x1e9, 0x177, "v_mad_i64_i32", False, False, InstrClass.Valu64),1050( -1, -1, 0x1ea, 0x1ea, -1, "v_mad_legacy_f16", True, True),1051( -1, -1, 0x1eb, 0x1eb, -1, "v_mad_legacy_u16", False, False),1052( -1, -1, 0x1ec, 0x1ec, -1, "v_mad_legacy_i16", False, False),1053( -1, -1, 0x1ed, 0x1ed, 0x344, "v_perm_b32", False, False),1054( -1, -1, 0x1ee, 0x1ee, -1, "v_fma_legacy_f16", True, True, InstrClass.ValuFma),1055( -1, -1, 0x1ef, 0x1ef, -1, "v_div_fixup_legacy_f16", True, True),1056(0x12c, 0x12c, 0x1f0, 0x1f0, -1, "v_cvt_pkaccum_u8_f32", True, False),1057( -1, -1, -1, 0x1f1, 0x373, "v_mad_u32_u16", False, False),1058( -1, -1, -1, 0x1f2, 0x375, "v_mad_i32_i16", False, False),1059( -1, -1, -1, 0x1f3, 0x345, "v_xad_u32", False, False),1060( -1, -1, -1, 0x1f4, 0x351, "v_min3_f16", True, True),1061( -1, -1, -1, 0x1f5, 0x352, "v_min3_i16", False, False),1062( -1, -1, -1, 0x1f6, 0x353, "v_min3_u16", False, False),1063( -1, -1, -1, 0x1f7, 0x354, "v_max3_f16", True, True),1064( -1, -1, -1, 0x1f8, 0x355, "v_max3_i16", False, False),1065( -1, -1, -1, 0x1f9, 0x356, "v_max3_u16", False, False),1066( -1, -1, -1, 0x1fa, 0x357, "v_med3_f16", True, True),1067( -1, -1, -1, 0x1fb, 0x358, "v_med3_i16", False, False),1068( -1, -1, -1, 0x1fc, 0x359, "v_med3_u16", False, False),1069( -1, -1, -1, 0x1fd, 0x346, "v_lshl_add_u32", False, False),1070( -1, -1, -1, 0x1fe, 0x347, "v_add_lshl_u32", False, False),1071( -1, -1, -1, 0x1ff, 0x36d, "v_add3_u32", False, False),1072( -1, -1, -1, 0x200, 0x36f, "v_lshl_or_b32", False, False),1073( -1, -1, -1, 0x201, 0x371, "v_and_or_b32", False, False),1074( -1, -1, -1, 0x202, 0x372, "v_or3_b32", False, False),1075( -1, -1, -1, 0x203, -1, "v_mad_f16", True, True),1076( -1, -1, -1, 0x204, 0x340, "v_mad_u16", False, False),1077( -1, -1, -1, 0x205, 0x35e, "v_mad_i16", False, False),1078( -1, -1, -1, 0x206, 0x34b, "v_fma_f16", True, True),1079( -1, -1, -1, 0x207, 0x35f, "v_div_fixup_f16", True, True),1080( -1, -1, 0x274, 0x274, 0x342, "v_interp_p1ll_f16", True, True),1081( -1, -1, 0x275, 0x275, 0x343, "v_interp_p1lv_f16", True, True),1082( -1, -1, 0x276, 0x276, -1, "v_interp_p2_legacy_f16", True, True),1083( -1, -1, -1, 0x277, 0x35a, "v_interp_p2_f16", True, True),1084(0x12b, 0x12b, 0x288, 0x288, 0x362, "v_ldexp_f32", False, True),1085( -1, -1, 0x289, 0x289, 0x360, "v_readlane_b32_e64", False, False),1086( -1, -1, 0x28a, 0x28a, 0x361, "v_writelane_b32_e64", False, False),1087(0x122, 0x122, 0x28b, 0x28b, 0x364, "v_bcnt_u32_b32", False, False),1088(0x123, 0x123, 0x28c, 0x28c, 0x365, "v_mbcnt_lo_u32_b32", False, False),1089( -1, -1, 0x28d, 0x28d, 0x366, "v_mbcnt_hi_u32_b32_e64", False, False),1090( -1, -1, 0x28f, 0x28f, 0x2ff, "v_lshlrev_b64", False, False, InstrClass.Valu64),1091( -1, -1, 0x290, 0x290, 0x300, "v_lshrrev_b64", False, False, InstrClass.Valu64),1092( -1, -1, 0x291, 0x291, 0x301, "v_ashrrev_i64", False, False, InstrClass.Valu64),1093(0x11e, 0x11e, 0x293, 0x293, 0x363, "v_bfm_b32", False, False),1094(0x12d, 0x12d, 0x294, 0x294, 0x368, "v_cvt_pknorm_i16_f32", True, False),1095(0x12e, 0x12e, 0x295, 0x295, 0x369, "v_cvt_pknorm_u16_f32", True, False),1096(0x12f, 0x12f, 0x296, 0x296, 0x12f, "v_cvt_pkrtz_f16_f32_e64", True, False), # GFX6_7_10 is VOP2 with opcode 0x02f1097(0x130, 0x130, 0x297, 0x297, 0x36a, "v_cvt_pk_u16_u32", False, False),1098(0x131, 0x131, 0x298, 0x298, 0x36b, "v_cvt_pk_i16_i32", False, False),1099( -1, -1, -1, 0x299, 0x312, "v_cvt_pknorm_i16_f16", True, False),1100( -1, -1, -1, 0x29a, 0x313, "v_cvt_pknorm_u16_f16", True, False),1101( -1, -1, -1, 0x29c, 0x37f, "v_add_i32", False, False),1102( -1, -1, -1, 0x29d, 0x376, "v_sub_i32", False, False),1103( -1, -1, -1, 0x29e, 0x30d, "v_add_i16", False, False),1104( -1, -1, -1, 0x29f, 0x30e, "v_sub_i16", False, False),1105( -1, -1, -1, 0x2a0, 0x311, "v_pack_b32_f16", True, False),1106( -1, -1, -1, -1, 0x178, "v_xor3_b32", False, False),1107( -1, -1, -1, -1, 0x377, "v_permlane16_b32", False, False),1108( -1, -1, -1, -1, 0x378, "v_permlanex16_b32", False, False),1109( -1, -1, -1, -1, 0x30f, "v_add_co_u32_e64", False, False),1110( -1, -1, -1, -1, 0x310, "v_sub_co_u32_e64", False, False),1111( -1, -1, -1, -1, 0x319, "v_subrev_co_u32_e64", False, False),1112( -1, -1, -1, -1, 0x303, "v_add_u16_e64", False, False),1113( -1, -1, -1, -1, 0x304, "v_sub_u16_e64", False, False),1114( -1, -1, -1, -1, 0x305, "v_mul_lo_u16_e64", False, False),1115( -1, -1, -1, -1, 0x309, "v_max_u16_e64", False, False),1116( -1, -1, -1, -1, 0x30a, "v_max_i16_e64", False, False),1117( -1, -1, -1, -1, 0x30b, "v_min_u16_e64", False, False),1118( -1, -1, -1, -1, 0x30c, "v_min_i16_e64", False, False),1119( -1, -1, -1, -1, 0x307, "v_lshrrev_b16_e64", False, False),1120( -1, -1, -1, -1, 0x308, "v_ashrrev_i16_e64", False, False),1121( -1, -1, -1, -1, 0x314, "v_lshlrev_b16_e64", False, False),1122( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True, InstrClass.ValuFma), #GFX10.3+1123}1124for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod, cls) in default_class(VOP3, InstrClass.Valu32):1125opcode(name, gfx7, gfx9, gfx10, Format.VOP3, cls, in_mod, out_mod)112611271128# DS instructions: 3 inputs (1 addr, 2 data), 1 output1129DS = {1130(0x00, 0x00, 0x00, 0x00, 0x00, "ds_add_u32"),1131(0x01, 0x01, 0x01, 0x01, 0x01, "ds_sub_u32"),1132(0x02, 0x02, 0x02, 0x02, 0x02, "ds_rsub_u32"),1133(0x03, 0x03, 0x03, 0x03, 0x03, "ds_inc_u32"),1134(0x04, 0x04, 0x04, 0x04, 0x04, "ds_dec_u32"),1135(0x05, 0x05, 0x05, 0x05, 0x05, "ds_min_i32"),1136(0x06, 0x06, 0x06, 0x06, 0x06, "ds_max_i32"),1137(0x07, 0x07, 0x07, 0x07, 0x07, "ds_min_u32"),1138(0x08, 0x08, 0x08, 0x08, 0x08, "ds_max_u32"),1139(0x09, 0x09, 0x09, 0x09, 0x09, "ds_and_b32"),1140(0x0a, 0x0a, 0x0a, 0x0a, 0x0a, "ds_or_b32"),1141(0x0b, 0x0b, 0x0b, 0x0b, 0x0b, "ds_xor_b32"),1142(0x0c, 0x0c, 0x0c, 0x0c, 0x0c, "ds_mskor_b32"),1143(0x0d, 0x0d, 0x0d, 0x0d, 0x0d, "ds_write_b32"),1144(0x0e, 0x0e, 0x0e, 0x0e, 0x0e, "ds_write2_b32"),1145(0x0f, 0x0f, 0x0f, 0x0f, 0x0f, "ds_write2st64_b32"),1146(0x10, 0x10, 0x10, 0x10, 0x10, "ds_cmpst_b32"),1147(0x11, 0x11, 0x11, 0x11, 0x11, "ds_cmpst_f32"),1148(0x12, 0x12, 0x12, 0x12, 0x12, "ds_min_f32"),1149(0x13, 0x13, 0x13, 0x13, 0x13, "ds_max_f32"),1150( -1, 0x14, 0x14, 0x14, 0x14, "ds_nop"),1151( -1, -1, 0x15, 0x15, 0x15, "ds_add_f32"),1152( -1, -1, 0x1d, 0x1d, 0xb0, "ds_write_addtid_b32"),1153(0x1e, 0x1e, 0x1e, 0x1e, 0x1e, "ds_write_b8"),1154(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, "ds_write_b16"),1155(0x20, 0x20, 0x20, 0x20, 0x20, "ds_add_rtn_u32"),1156(0x21, 0x21, 0x21, 0x21, 0x21, "ds_sub_rtn_u32"),1157(0x22, 0x22, 0x22, 0x22, 0x22, "ds_rsub_rtn_u32"),1158(0x23, 0x23, 0x23, 0x23, 0x23, "ds_inc_rtn_u32"),1159(0x24, 0x24, 0x24, 0x24, 0x24, "ds_dec_rtn_u32"),1160(0x25, 0x25, 0x25, 0x25, 0x25, "ds_min_rtn_i32"),1161(0x26, 0x26, 0x26, 0x26, 0x26, "ds_max_rtn_i32"),1162(0x27, 0x27, 0x27, 0x27, 0x27, "ds_min_rtn_u32"),1163(0x28, 0x28, 0x28, 0x28, 0x28, "ds_max_rtn_u32"),1164(0x29, 0x29, 0x29, 0x29, 0x29, "ds_and_rtn_b32"),1165(0x2a, 0x2a, 0x2a, 0x2a, 0x2a, "ds_or_rtn_b32"),1166(0x2b, 0x2b, 0x2b, 0x2b, 0x2b, "ds_xor_rtn_b32"),1167(0x2c, 0x2c, 0x2c, 0x2c, 0x2c, "ds_mskor_rtn_b32"),1168(0x2d, 0x2d, 0x2d, 0x2d, 0x2d, "ds_wrxchg_rtn_b32"),1169(0x2e, 0x2e, 0x2e, 0x2e, 0x2e, "ds_wrxchg2_rtn_b32"),1170(0x2f, 0x2f, 0x2f, 0x2f, 0x2f, "ds_wrxchg2st64_rtn_b32"),1171(0x30, 0x30, 0x30, 0x30, 0x30, "ds_cmpst_rtn_b32"),1172(0x31, 0x31, 0x31, 0x31, 0x31, "ds_cmpst_rtn_f32"),1173(0x32, 0x32, 0x32, 0x32, 0x32, "ds_min_rtn_f32"),1174(0x33, 0x33, 0x33, 0x33, 0x33, "ds_max_rtn_f32"),1175( -1, 0x34, 0x34, 0x34, 0x34, "ds_wrap_rtn_b32"),1176( -1, -1, 0x35, 0x35, 0x55, "ds_add_rtn_f32"),1177(0x36, 0x36, 0x36, 0x36, 0x36, "ds_read_b32"),1178(0x37, 0x37, 0x37, 0x37, 0x37, "ds_read2_b32"),1179(0x38, 0x38, 0x38, 0x38, 0x38, "ds_read2st64_b32"),1180(0x39, 0x39, 0x39, 0x39, 0x39, "ds_read_i8"),1181(0x3a, 0x3a, 0x3a, 0x3a, 0x3a, "ds_read_u8"),1182(0x3b, 0x3b, 0x3b, 0x3b, 0x3b, "ds_read_i16"),1183(0x3c, 0x3c, 0x3c, 0x3c, 0x3c, "ds_read_u16"),1184(0x35, 0x35, 0x3d, 0x3d, 0x35, "ds_swizzle_b32"), #data1 & offset, no addr/data21185( -1, -1, 0x3e, 0x3e, 0xb2, "ds_permute_b32"),1186( -1, -1, 0x3f, 0x3f, 0xb3, "ds_bpermute_b32"),1187(0x40, 0x40, 0x40, 0x40, 0x40, "ds_add_u64"),1188(0x41, 0x41, 0x41, 0x41, 0x41, "ds_sub_u64"),1189(0x42, 0x42, 0x42, 0x42, 0x42, "ds_rsub_u64"),1190(0x43, 0x43, 0x43, 0x43, 0x43, "ds_inc_u64"),1191(0x44, 0x44, 0x44, 0x44, 0x44, "ds_dec_u64"),1192(0x45, 0x45, 0x45, 0x45, 0x45, "ds_min_i64"),1193(0x46, 0x46, 0x46, 0x46, 0x46, "ds_max_i64"),1194(0x47, 0x47, 0x47, 0x47, 0x47, "ds_min_u64"),1195(0x48, 0x48, 0x48, 0x48, 0x48, "ds_max_u64"),1196(0x49, 0x49, 0x49, 0x49, 0x49, "ds_and_b64"),1197(0x4a, 0x4a, 0x4a, 0x4a, 0x4a, "ds_or_b64"),1198(0x4b, 0x4b, 0x4b, 0x4b, 0x4b, "ds_xor_b64"),1199(0x4c, 0x4c, 0x4c, 0x4c, 0x4c, "ds_mskor_b64"),1200(0x4d, 0x4d, 0x4d, 0x4d, 0x4d, "ds_write_b64"),1201(0x4e, 0x4e, 0x4e, 0x4e, 0x4e, "ds_write2_b64"),1202(0x4f, 0x4f, 0x4f, 0x4f, 0x4f, "ds_write2st64_b64"),1203(0x50, 0x50, 0x50, 0x50, 0x50, "ds_cmpst_b64"),1204(0x51, 0x51, 0x51, 0x51, 0x51, "ds_cmpst_f64"),1205(0x52, 0x52, 0x52, 0x52, 0x52, "ds_min_f64"),1206(0x53, 0x53, 0x53, 0x53, 0x53, "ds_max_f64"),1207( -1, -1, -1, 0x54, 0xa0, "ds_write_b8_d16_hi"),1208( -1, -1, -1, 0x55, 0xa1, "ds_write_b16_d16_hi"),1209( -1, -1, -1, 0x56, 0xa2, "ds_read_u8_d16"),1210( -1, -1, -1, 0x57, 0xa3, "ds_read_u8_d16_hi"),1211( -1, -1, -1, 0x58, 0xa4, "ds_read_i8_d16"),1212( -1, -1, -1, 0x59, 0xa5, "ds_read_i8_d16_hi"),1213( -1, -1, -1, 0x5a, 0xa6, "ds_read_u16_d16"),1214( -1, -1, -1, 0x5b, 0xa7, "ds_read_u16_d16_hi"),1215(0x60, 0x60, 0x60, 0x60, 0x60, "ds_add_rtn_u64"),1216(0x61, 0x61, 0x61, 0x61, 0x61, "ds_sub_rtn_u64"),1217(0x62, 0x62, 0x62, 0x62, 0x62, "ds_rsub_rtn_u64"),1218(0x63, 0x63, 0x63, 0x63, 0x63, "ds_inc_rtn_u64"),1219(0x64, 0x64, 0x64, 0x64, 0x64, "ds_dec_rtn_u64"),1220(0x65, 0x65, 0x65, 0x65, 0x65, "ds_min_rtn_i64"),1221(0x66, 0x66, 0x66, 0x66, 0x66, "ds_max_rtn_i64"),1222(0x67, 0x67, 0x67, 0x67, 0x67, "ds_min_rtn_u64"),1223(0x68, 0x68, 0x68, 0x68, 0x68, "ds_max_rtn_u64"),1224(0x69, 0x69, 0x69, 0x69, 0x69, "ds_and_rtn_b64"),1225(0x6a, 0x6a, 0x6a, 0x6a, 0x6a, "ds_or_rtn_b64"),1226(0x6b, 0x6b, 0x6b, 0x6b, 0x6b, "ds_xor_rtn_b64"),1227(0x6c, 0x6c, 0x6c, 0x6c, 0x6c, "ds_mskor_rtn_b64"),1228(0x6d, 0x6d, 0x6d, 0x6d, 0x6d, "ds_wrxchg_rtn_b64"),1229(0x6e, 0x6e, 0x6e, 0x6e, 0x6e, "ds_wrxchg2_rtn_b64"),1230(0x6f, 0x6f, 0x6f, 0x6f, 0x6f, "ds_wrxchg2st64_rtn_b64"),1231(0x70, 0x70, 0x70, 0x70, 0x70, "ds_cmpst_rtn_b64"),1232(0x71, 0x71, 0x71, 0x71, 0x71, "ds_cmpst_rtn_f64"),1233(0x72, 0x72, 0x72, 0x72, 0x72, "ds_min_rtn_f64"),1234(0x73, 0x73, 0x73, 0x73, 0x73, "ds_max_rtn_f64"),1235(0x76, 0x76, 0x76, 0x76, 0x76, "ds_read_b64"),1236(0x77, 0x77, 0x77, 0x77, 0x77, "ds_read2_b64"),1237(0x78, 0x78, 0x78, 0x78, 0x78, "ds_read2st64_b64"),1238( -1, 0x7e, 0x7e, 0x7e, 0x7e, "ds_condxchg32_rtn_b64"),1239(0x80, 0x80, 0x80, 0x80, 0x80, "ds_add_src2_u32"),1240(0x81, 0x81, 0x81, 0x81, 0x81, "ds_sub_src2_u32"),1241(0x82, 0x82, 0x82, 0x82, 0x82, "ds_rsub_src2_u32"),1242(0x83, 0x83, 0x83, 0x83, 0x83, "ds_inc_src2_u32"),1243(0x84, 0x84, 0x84, 0x84, 0x84, "ds_dec_src2_u32"),1244(0x85, 0x85, 0x85, 0x85, 0x85, "ds_min_src2_i32"),1245(0x86, 0x86, 0x86, 0x86, 0x86, "ds_max_src2_i32"),1246(0x87, 0x87, 0x87, 0x87, 0x87, "ds_min_src2_u32"),1247(0x88, 0x88, 0x88, 0x88, 0x88, "ds_max_src2_u32"),1248(0x89, 0x89, 0x89, 0x89, 0x89, "ds_and_src2_b32"),1249(0x8a, 0x8a, 0x8a, 0x8a, 0x8a, "ds_or_src2_b32"),1250(0x8b, 0x8b, 0x8b, 0x8b, 0x8b, "ds_xor_src2_b32"),1251(0x8d, 0x8d, 0x8d, 0x8d, 0x8d, "ds_write_src2_b32"),1252(0x92, 0x92, 0x92, 0x92, 0x92, "ds_min_src2_f32"),1253(0x93, 0x93, 0x93, 0x93, 0x93, "ds_max_src2_f32"),1254( -1, -1, 0x95, 0x95, 0x95, "ds_add_src2_f32"),1255( -1, 0x18, 0x98, 0x98, 0x18, "ds_gws_sema_release_all"),1256(0x19, 0x19, 0x99, 0x99, 0x19, "ds_gws_init"),1257(0x1a, 0x1a, 0x9a, 0x9a, 0x1a, "ds_gws_sema_v"),1258(0x1b, 0x1b, 0x9b, 0x9b, 0x1b, "ds_gws_sema_br"),1259(0x1c, 0x1c, 0x9c, 0x9c, 0x1c, "ds_gws_sema_p"),1260(0x1d, 0x1d, 0x9d, 0x9d, 0x1d, "ds_gws_barrier"),1261( -1, -1, 0xb6, 0xb6, 0xb1, "ds_read_addtid_b32"),1262(0x3d, 0x3d, 0xbd, 0xbd, 0x3d, "ds_consume"),1263(0x3e, 0x3e, 0xbe, 0xbe, 0x3e, "ds_append"),1264(0x3f, 0x3f, 0xbf, 0xbf, 0x3f, "ds_ordered_count"),1265(0xc0, 0xc0, 0xc0, 0xc0, 0xc0, "ds_add_src2_u64"),1266(0xc1, 0xc1, 0xc1, 0xc1, 0xc1, "ds_sub_src2_u64"),1267(0xc2, 0xc2, 0xc2, 0xc2, 0xc2, "ds_rsub_src2_u64"),1268(0xc3, 0xc3, 0xc3, 0xc3, 0xc3, "ds_inc_src2_u64"),1269(0xc4, 0xc4, 0xc4, 0xc4, 0xc4, "ds_dec_src2_u64"),1270(0xc5, 0xc5, 0xc5, 0xc5, 0xc5, "ds_min_src2_i64"),1271(0xc6, 0xc6, 0xc6, 0xc6, 0xc6, "ds_max_src2_i64"),1272(0xc7, 0xc7, 0xc7, 0xc7, 0xc7, "ds_min_src2_u64"),1273(0xc8, 0xc8, 0xc8, 0xc8, 0xc8, "ds_max_src2_u64"),1274(0xc9, 0xc9, 0xc9, 0xc9, 0xc9, "ds_and_src2_b64"),1275(0xca, 0xca, 0xca, 0xca, 0xca, "ds_or_src2_b64"),1276(0xcb, 0xcb, 0xcb, 0xcb, 0xcb, "ds_xor_src2_b64"),1277(0xcd, 0xcd, 0xcd, 0xcd, 0xcd, "ds_write_src2_b64"),1278(0xd2, 0xd2, 0xd2, 0xd2, 0xd2, "ds_min_src2_f64"),1279(0xd3, 0xd3, 0xd3, 0xd3, 0xd3, "ds_max_src2_f64"),1280( -1, 0xde, 0xde, 0xde, 0xde, "ds_write_b96"),1281( -1, 0xdf, 0xdf, 0xdf, 0xdf, "ds_write_b128"),1282( -1, 0xfd, 0xfd, -1, -1, "ds_condxchg32_rtn_b128"),1283( -1, 0xfe, 0xfe, 0xfe, 0xfe, "ds_read_b96"),1284( -1, 0xff, 0xff, 0xff, 0xff, "ds_read_b128"),1285}1286for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in DS:1287opcode(name, gfx7, gfx9, gfx10, Format.DS, InstrClass.DS)12881289# MUBUF instructions:1290MUBUF = {1291(0x00, 0x00, 0x00, 0x00, 0x00, "buffer_load_format_x"),1292(0x01, 0x01, 0x01, 0x01, 0x01, "buffer_load_format_xy"),1293(0x02, 0x02, 0x02, 0x02, 0x02, "buffer_load_format_xyz"),1294(0x03, 0x03, 0x03, 0x03, 0x03, "buffer_load_format_xyzw"),1295(0x04, 0x04, 0x04, 0x04, 0x04, "buffer_store_format_x"),1296(0x05, 0x05, 0x05, 0x05, 0x05, "buffer_store_format_xy"),1297(0x06, 0x06, 0x06, 0x06, 0x06, "buffer_store_format_xyz"),1298(0x07, 0x07, 0x07, 0x07, 0x07, "buffer_store_format_xyzw"),1299( -1, -1, 0x08, 0x08, 0x80, "buffer_load_format_d16_x"),1300( -1, -1, 0x09, 0x09, 0x81, "buffer_load_format_d16_xy"),1301( -1, -1, 0x0a, 0x0a, 0x82, "buffer_load_format_d16_xyz"),1302( -1, -1, 0x0b, 0x0b, 0x83, "buffer_load_format_d16_xyzw"),1303( -1, -1, 0x0c, 0x0c, 0x84, "buffer_store_format_d16_x"),1304( -1, -1, 0x0d, 0x0d, 0x85, "buffer_store_format_d16_xy"),1305( -1, -1, 0x0e, 0x0e, 0x86, "buffer_store_format_d16_xyz"),1306( -1, -1, 0x0f, 0x0f, 0x87, "buffer_store_format_d16_xyzw"),1307(0x08, 0x08, 0x10, 0x10, 0x08, "buffer_load_ubyte"),1308(0x09, 0x09, 0x11, 0x11, 0x09, "buffer_load_sbyte"),1309(0x0a, 0x0a, 0x12, 0x12, 0x0a, "buffer_load_ushort"),1310(0x0b, 0x0b, 0x13, 0x13, 0x0b, "buffer_load_sshort"),1311(0x0c, 0x0c, 0x14, 0x14, 0x0c, "buffer_load_dword"),1312(0x0d, 0x0d, 0x15, 0x15, 0x0d, "buffer_load_dwordx2"),1313( -1, 0x0f, 0x16, 0x16, 0x0f, "buffer_load_dwordx3"),1314(0x0f, 0x0e, 0x17, 0x17, 0x0e, "buffer_load_dwordx4"),1315(0x18, 0x18, 0x18, 0x18, 0x18, "buffer_store_byte"),1316( -1, -1, -1, 0x19, 0x19, "buffer_store_byte_d16_hi"),1317(0x1a, 0x1a, 0x1a, 0x1a, 0x1a, "buffer_store_short"),1318( -1, -1, -1, 0x1b, 0x1b, "buffer_store_short_d16_hi"),1319(0x1c, 0x1c, 0x1c, 0x1c, 0x1c, "buffer_store_dword"),1320(0x1d, 0x1d, 0x1d, 0x1d, 0x1d, "buffer_store_dwordx2"),1321( -1, 0x1f, 0x1e, 0x1e, 0x1f, "buffer_store_dwordx3"),1322(0x1e, 0x1e, 0x1f, 0x1f, 0x1e, "buffer_store_dwordx4"),1323( -1, -1, -1, 0x20, 0x20, "buffer_load_ubyte_d16"),1324( -1, -1, -1, 0x21, 0x21, "buffer_load_ubyte_d16_hi"),1325( -1, -1, -1, 0x22, 0x22, "buffer_load_sbyte_d16"),1326( -1, -1, -1, 0x23, 0x23, "buffer_load_sbyte_d16_hi"),1327( -1, -1, -1, 0x24, 0x24, "buffer_load_short_d16"),1328( -1, -1, -1, 0x25, 0x25, "buffer_load_short_d16_hi"),1329( -1, -1, -1, 0x26, 0x26, "buffer_load_format_d16_hi_x"),1330( -1, -1, -1, 0x27, 0x27, "buffer_store_format_d16_hi_x"),1331( -1, -1, 0x3d, 0x3d, -1, "buffer_store_lds_dword"),1332(0x71, 0x71, 0x3e, 0x3e, -1, "buffer_wbinvl1"),1333(0x70, 0x70, 0x3f, 0x3f, -1, "buffer_wbinvl1_vol"),1334(0x30, 0x30, 0x40, 0x40, 0x30, "buffer_atomic_swap"),1335(0x31, 0x31, 0x41, 0x41, 0x31, "buffer_atomic_cmpswap"),1336(0x32, 0x32, 0x42, 0x42, 0x32, "buffer_atomic_add"),1337(0x33, 0x33, 0x43, 0x43, 0x33, "buffer_atomic_sub"),1338(0x34, -1, -1, -1, -1, "buffer_atomic_rsub"),1339(0x35, 0x35, 0x44, 0x44, 0x35, "buffer_atomic_smin"),1340(0x36, 0x36, 0x45, 0x45, 0x36, "buffer_atomic_umin"),1341(0x37, 0x37, 0x46, 0x46, 0x37, "buffer_atomic_smax"),1342(0x38, 0x38, 0x47, 0x47, 0x38, "buffer_atomic_umax"),1343(0x39, 0x39, 0x48, 0x48, 0x39, "buffer_atomic_and"),1344(0x3a, 0x3a, 0x49, 0x49, 0x3a, "buffer_atomic_or"),1345(0x3b, 0x3b, 0x4a, 0x4a, 0x3b, "buffer_atomic_xor"),1346(0x3c, 0x3c, 0x4b, 0x4b, 0x3c, "buffer_atomic_inc"),1347(0x3d, 0x3d, 0x4c, 0x4c, 0x3d, "buffer_atomic_dec"),1348(0x3e, 0x3e, -1, -1, 0x3e, "buffer_atomic_fcmpswap"),1349(0x3f, 0x3f, -1, -1, 0x3f, "buffer_atomic_fmin"),1350(0x40, 0x40, -1, -1, 0x40, "buffer_atomic_fmax"),1351(0x50, 0x50, 0x60, 0x60, 0x50, "buffer_atomic_swap_x2"),1352(0x51, 0x51, 0x61, 0x61, 0x51, "buffer_atomic_cmpswap_x2"),1353(0x52, 0x52, 0x62, 0x62, 0x52, "buffer_atomic_add_x2"),1354(0x53, 0x53, 0x63, 0x63, 0x53, "buffer_atomic_sub_x2"),1355(0x54, -1, -1, -1, -1, "buffer_atomic_rsub_x2"),1356(0x55, 0x55, 0x64, 0x64, 0x55, "buffer_atomic_smin_x2"),1357(0x56, 0x56, 0x65, 0x65, 0x56, "buffer_atomic_umin_x2"),1358(0x57, 0x57, 0x66, 0x66, 0x57, "buffer_atomic_smax_x2"),1359(0x58, 0x58, 0x67, 0x67, 0x58, "buffer_atomic_umax_x2"),1360(0x59, 0x59, 0x68, 0x68, 0x59, "buffer_atomic_and_x2"),1361(0x5a, 0x5a, 0x69, 0x69, 0x5a, "buffer_atomic_or_x2"),1362(0x5b, 0x5b, 0x6a, 0x6a, 0x5b, "buffer_atomic_xor_x2"),1363(0x5c, 0x5c, 0x6b, 0x6b, 0x5c, "buffer_atomic_inc_x2"),1364(0x5d, 0x5d, 0x6c, 0x6c, 0x5d, "buffer_atomic_dec_x2"),1365(0x5e, 0x5e, -1, -1, 0x5e, "buffer_atomic_fcmpswap_x2"),1366(0x5f, 0x5f, -1, -1, 0x5f, "buffer_atomic_fmin_x2"),1367(0x60, 0x60, -1, -1, 0x60, "buffer_atomic_fmax_x2"),1368( -1, -1, -1, -1, 0x71, "buffer_gl0_inv"),1369( -1, -1, -1, -1, 0x72, "buffer_gl1_inv"),1370( -1, -1, -1, -1, 0x34, "buffer_atomic_csub"), #GFX10.3+. seems glc must be set1371}1372for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MUBUF:1373opcode(name, gfx7, gfx9, gfx10, Format.MUBUF, InstrClass.VMem, is_atomic = "atomic" in name)13741375MTBUF = {1376(0x00, 0x00, 0x00, 0x00, 0x00, "tbuffer_load_format_x"),1377(0x01, 0x01, 0x01, 0x01, 0x01, "tbuffer_load_format_xy"),1378(0x02, 0x02, 0x02, 0x02, 0x02, "tbuffer_load_format_xyz"),1379(0x03, 0x03, 0x03, 0x03, 0x03, "tbuffer_load_format_xyzw"),1380(0x04, 0x04, 0x04, 0x04, 0x04, "tbuffer_store_format_x"),1381(0x05, 0x05, 0x05, 0x05, 0x05, "tbuffer_store_format_xy"),1382(0x06, 0x06, 0x06, 0x06, 0x06, "tbuffer_store_format_xyz"),1383(0x07, 0x07, 0x07, 0x07, 0x07, "tbuffer_store_format_xyzw"),1384( -1, -1, 0x08, 0x08, 0x08, "tbuffer_load_format_d16_x"),1385( -1, -1, 0x09, 0x09, 0x09, "tbuffer_load_format_d16_xy"),1386( -1, -1, 0x0a, 0x0a, 0x0a, "tbuffer_load_format_d16_xyz"),1387( -1, -1, 0x0b, 0x0b, 0x0b, "tbuffer_load_format_d16_xyzw"),1388( -1, -1, 0x0c, 0x0c, 0x0c, "tbuffer_store_format_d16_x"),1389( -1, -1, 0x0d, 0x0d, 0x0d, "tbuffer_store_format_d16_xy"),1390( -1, -1, 0x0e, 0x0e, 0x0e, "tbuffer_store_format_d16_xyz"),1391( -1, -1, 0x0f, 0x0f, 0x0f, "tbuffer_store_format_d16_xyzw"),1392}1393for (gfx6, gfx7, gfx8, gfx9, gfx10, name) in MTBUF:1394opcode(name, gfx7, gfx9, gfx10, Format.MTBUF, InstrClass.VMem)139513961397IMAGE = {1398(0x00, "image_load"),1399(0x01, "image_load_mip"),1400(0x02, "image_load_pck"),1401(0x03, "image_load_pck_sgn"),1402(0x04, "image_load_mip_pck"),1403(0x05, "image_load_mip_pck_sgn"),1404(0x08, "image_store"),1405(0x09, "image_store_mip"),1406(0x0a, "image_store_pck"),1407(0x0b, "image_store_mip_pck"),1408(0x0e, "image_get_resinfo"),1409(0x60, "image_get_lod"),1410}1411# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)1412for (code, name) in IMAGE:1413opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)14141415opcode("image_msaa_load", -1, -1, 0x80, Format.MIMG, InstrClass.VMem) #GFX10.3+14161417IMAGE_ATOMIC = {1418(0x0f, 0x0f, 0x10, "image_atomic_swap"),1419(0x10, 0x10, 0x11, "image_atomic_cmpswap"),1420(0x11, 0x11, 0x12, "image_atomic_add"),1421(0x12, 0x12, 0x13, "image_atomic_sub"),1422(0x13, -1, -1, "image_atomic_rsub"),1423(0x14, 0x14, 0x14, "image_atomic_smin"),1424(0x15, 0x15, 0x15, "image_atomic_umin"),1425(0x16, 0x16, 0x16, "image_atomic_smax"),1426(0x17, 0x17, 0x17, "image_atomic_umax"),1427(0x18, 0x18, 0x18, "image_atomic_and"),1428(0x19, 0x19, 0x19, "image_atomic_or"),1429(0x1a, 0x1a, 0x1a, "image_atomic_xor"),1430(0x1b, 0x1b, 0x1b, "image_atomic_inc"),1431(0x1c, 0x1c, 0x1c, "image_atomic_dec"),1432(0x1d, 0x1d, -1, "image_atomic_fcmpswap"),1433(0x1e, 0x1e, -1, "image_atomic_fmin"),1434(0x1f, 0x1f, -1, "image_atomic_fmax"),1435}1436# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (gfx6, gfx7, gfx89, gfx89, ???, name)1437# gfx7 and gfx10 opcodes are the same here1438for (gfx6, gfx7, gfx89, name) in IMAGE_ATOMIC:1439opcode(name, gfx7, gfx89, gfx7, Format.MIMG, InstrClass.VMem, is_atomic = True)14401441IMAGE_SAMPLE = {1442(0x20, "image_sample"),1443(0x21, "image_sample_cl"),1444(0x22, "image_sample_d"),1445(0x23, "image_sample_d_cl"),1446(0x24, "image_sample_l"),1447(0x25, "image_sample_b"),1448(0x26, "image_sample_b_cl"),1449(0x27, "image_sample_lz"),1450(0x28, "image_sample_c"),1451(0x29, "image_sample_c_cl"),1452(0x2a, "image_sample_c_d"),1453(0x2b, "image_sample_c_d_cl"),1454(0x2c, "image_sample_c_l"),1455(0x2d, "image_sample_c_b"),1456(0x2e, "image_sample_c_b_cl"),1457(0x2f, "image_sample_c_lz"),1458(0x30, "image_sample_o"),1459(0x31, "image_sample_cl_o"),1460(0x32, "image_sample_d_o"),1461(0x33, "image_sample_d_cl_o"),1462(0x34, "image_sample_l_o"),1463(0x35, "image_sample_b_o"),1464(0x36, "image_sample_b_cl_o"),1465(0x37, "image_sample_lz_o"),1466(0x38, "image_sample_c_o"),1467(0x39, "image_sample_c_cl_o"),1468(0x3a, "image_sample_c_d_o"),1469(0x3b, "image_sample_c_d_cl_o"),1470(0x3c, "image_sample_c_l_o"),1471(0x3d, "image_sample_c_b_o"),1472(0x3e, "image_sample_c_b_cl_o"),1473(0x3f, "image_sample_c_lz_o"),1474(0x68, "image_sample_cd"),1475(0x69, "image_sample_cd_cl"),1476(0x6a, "image_sample_c_cd"),1477(0x6b, "image_sample_c_cd_cl"),1478(0x6c, "image_sample_cd_o"),1479(0x6d, "image_sample_cd_cl_o"),1480(0x6e, "image_sample_c_cd_o"),1481(0x6f, "image_sample_c_cd_cl_o"),1482}1483# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)1484for (code, name) in IMAGE_SAMPLE:1485opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)14861487IMAGE_GATHER4 = {1488(0x40, "image_gather4"),1489(0x41, "image_gather4_cl"),1490#(0x42, "image_gather4h"), VEGA only?1491(0x44, "image_gather4_l"), # following instructions have different opcodes according to ISA sheet.1492(0x45, "image_gather4_b"),1493(0x46, "image_gather4_b_cl"),1494(0x47, "image_gather4_lz"),1495(0x48, "image_gather4_c"),1496(0x49, "image_gather4_c_cl"), # previous instructions have different opcodes according to ISA sheet.1497#(0x4a, "image_gather4h_pck"), VEGA only?1498#(0x4b, "image_gather8h_pck"), VGEA only?1499(0x4c, "image_gather4_c_l"),1500(0x4d, "image_gather4_c_b"),1501(0x4e, "image_gather4_c_b_cl"),1502(0x4f, "image_gather4_c_lz"),1503(0x50, "image_gather4_o"),1504(0x51, "image_gather4_cl_o"),1505(0x54, "image_gather4_l_o"),1506(0x55, "image_gather4_b_o"),1507(0x56, "image_gather4_b_cl_o"),1508(0x57, "image_gather4_lz_o"),1509(0x58, "image_gather4_c_o"),1510(0x59, "image_gather4_c_cl_o"),1511(0x5c, "image_gather4_c_l_o"),1512(0x5d, "image_gather4_c_b_o"),1513(0x5e, "image_gather4_c_b_cl_o"),1514(0x5f, "image_gather4_c_lz_o"),1515}1516# (gfx6, gfx7, gfx8, gfx9, gfx10, name) = (code, code, code, code, code, name)1517for (code, name) in IMAGE_GATHER4:1518opcode(name, code, code, code, Format.MIMG, InstrClass.VMem)15191520opcode("image_bvh64_intersect_ray", -1, -1, 231, Format.MIMG, InstrClass.VMem)15211522FLAT = {1523#GFX7, GFX8_9, GFX101524(0x08, 0x10, 0x08, "flat_load_ubyte"),1525(0x09, 0x11, 0x09, "flat_load_sbyte"),1526(0x0a, 0x12, 0x0a, "flat_load_ushort"),1527(0x0b, 0x13, 0x0b, "flat_load_sshort"),1528(0x0c, 0x14, 0x0c, "flat_load_dword"),1529(0x0d, 0x15, 0x0d, "flat_load_dwordx2"),1530(0x0f, 0x16, 0x0f, "flat_load_dwordx3"),1531(0x0e, 0x17, 0x0e, "flat_load_dwordx4"),1532(0x18, 0x18, 0x18, "flat_store_byte"),1533( -1, 0x19, 0x19, "flat_store_byte_d16_hi"),1534(0x1a, 0x1a, 0x1a, "flat_store_short"),1535( -1, 0x1b, 0x1b, "flat_store_short_d16_hi"),1536(0x1c, 0x1c, 0x1c, "flat_store_dword"),1537(0x1d, 0x1d, 0x1d, "flat_store_dwordx2"),1538(0x1f, 0x1e, 0x1f, "flat_store_dwordx3"),1539(0x1e, 0x1f, 0x1e, "flat_store_dwordx4"),1540( -1, 0x20, 0x20, "flat_load_ubyte_d16"),1541( -1, 0x21, 0x21, "flat_load_ubyte_d16_hi"),1542( -1, 0x22, 0x22, "flat_load_sbyte_d16"),1543( -1, 0x23, 0x23, "flat_load_sbyte_d16_hi"),1544( -1, 0x24, 0x24, "flat_load_short_d16"),1545( -1, 0x25, 0x25, "flat_load_short_d16_hi"),1546(0x30, 0x40, 0x30, "flat_atomic_swap"),1547(0x31, 0x41, 0x31, "flat_atomic_cmpswap"),1548(0x32, 0x42, 0x32, "flat_atomic_add"),1549(0x33, 0x43, 0x33, "flat_atomic_sub"),1550(0x35, 0x44, 0x35, "flat_atomic_smin"),1551(0x36, 0x45, 0x36, "flat_atomic_umin"),1552(0x37, 0x46, 0x37, "flat_atomic_smax"),1553(0x38, 0x47, 0x38, "flat_atomic_umax"),1554(0x39, 0x48, 0x39, "flat_atomic_and"),1555(0x3a, 0x49, 0x3a, "flat_atomic_or"),1556(0x3b, 0x4a, 0x3b, "flat_atomic_xor"),1557(0x3c, 0x4b, 0x3c, "flat_atomic_inc"),1558(0x3d, 0x4c, 0x3d, "flat_atomic_dec"),1559(0x3e, -1, 0x3e, "flat_atomic_fcmpswap"),1560(0x3f, -1, 0x3f, "flat_atomic_fmin"),1561(0x40, -1, 0x40, "flat_atomic_fmax"),1562(0x50, 0x60, 0x50, "flat_atomic_swap_x2"),1563(0x51, 0x61, 0x51, "flat_atomic_cmpswap_x2"),1564(0x52, 0x62, 0x52, "flat_atomic_add_x2"),1565(0x53, 0x63, 0x53, "flat_atomic_sub_x2"),1566(0x55, 0x64, 0x55, "flat_atomic_smin_x2"),1567(0x56, 0x65, 0x56, "flat_atomic_umin_x2"),1568(0x57, 0x66, 0x57, "flat_atomic_smax_x2"),1569(0x58, 0x67, 0x58, "flat_atomic_umax_x2"),1570(0x59, 0x68, 0x59, "flat_atomic_and_x2"),1571(0x5a, 0x69, 0x5a, "flat_atomic_or_x2"),1572(0x5b, 0x6a, 0x5b, "flat_atomic_xor_x2"),1573(0x5c, 0x6b, 0x5c, "flat_atomic_inc_x2"),1574(0x5d, 0x6c, 0x5d, "flat_atomic_dec_x2"),1575(0x5e, -1, 0x5e, "flat_atomic_fcmpswap_x2"),1576(0x5f, -1, 0x5f, "flat_atomic_fmin_x2"),1577(0x60, -1, 0x60, "flat_atomic_fmax_x2"),1578}1579for (gfx7, gfx8, gfx10, name) in FLAT:1580opcode(name, gfx7, gfx8, gfx10, Format.FLAT, InstrClass.VMem, is_atomic = "atomic" in name) #TODO: also LDS?15811582GLOBAL = {1583#GFX8_9, GFX101584(0x10, 0x08, "global_load_ubyte"),1585(0x11, 0x09, "global_load_sbyte"),1586(0x12, 0x0a, "global_load_ushort"),1587(0x13, 0x0b, "global_load_sshort"),1588(0x14, 0x0c, "global_load_dword"),1589(0x15, 0x0d, "global_load_dwordx2"),1590(0x16, 0x0f, "global_load_dwordx3"),1591(0x17, 0x0e, "global_load_dwordx4"),1592(0x18, 0x18, "global_store_byte"),1593(0x19, 0x19, "global_store_byte_d16_hi"),1594(0x1a, 0x1a, "global_store_short"),1595(0x1b, 0x1b, "global_store_short_d16_hi"),1596(0x1c, 0x1c, "global_store_dword"),1597(0x1d, 0x1d, "global_store_dwordx2"),1598(0x1e, 0x1f, "global_store_dwordx3"),1599(0x1f, 0x1e, "global_store_dwordx4"),1600(0x20, 0x20, "global_load_ubyte_d16"),1601(0x21, 0x21, "global_load_ubyte_d16_hi"),1602(0x22, 0x22, "global_load_sbyte_d16"),1603(0x23, 0x23, "global_load_sbyte_d16_hi"),1604(0x24, 0x24, "global_load_short_d16"),1605(0x25, 0x25, "global_load_short_d16_hi"),1606(0x40, 0x30, "global_atomic_swap"),1607(0x41, 0x31, "global_atomic_cmpswap"),1608(0x42, 0x32, "global_atomic_add"),1609(0x43, 0x33, "global_atomic_sub"),1610(0x44, 0x35, "global_atomic_smin"),1611(0x45, 0x36, "global_atomic_umin"),1612(0x46, 0x37, "global_atomic_smax"),1613(0x47, 0x38, "global_atomic_umax"),1614(0x48, 0x39, "global_atomic_and"),1615(0x49, 0x3a, "global_atomic_or"),1616(0x4a, 0x3b, "global_atomic_xor"),1617(0x4b, 0x3c, "global_atomic_inc"),1618(0x4c, 0x3d, "global_atomic_dec"),1619( -1, 0x3e, "global_atomic_fcmpswap"),1620( -1, 0x3f, "global_atomic_fmin"),1621( -1, 0x40, "global_atomic_fmax"),1622(0x60, 0x50, "global_atomic_swap_x2"),1623(0x61, 0x51, "global_atomic_cmpswap_x2"),1624(0x62, 0x52, "global_atomic_add_x2"),1625(0x63, 0x53, "global_atomic_sub_x2"),1626(0x64, 0x55, "global_atomic_smin_x2"),1627(0x65, 0x56, "global_atomic_umin_x2"),1628(0x66, 0x57, "global_atomic_smax_x2"),1629(0x67, 0x58, "global_atomic_umax_x2"),1630(0x68, 0x59, "global_atomic_and_x2"),1631(0x69, 0x5a, "global_atomic_or_x2"),1632(0x6a, 0x5b, "global_atomic_xor_x2"),1633(0x6b, 0x5c, "global_atomic_inc_x2"),1634(0x6c, 0x5d, "global_atomic_dec_x2"),1635( -1, 0x5e, "global_atomic_fcmpswap_x2"),1636( -1, 0x5f, "global_atomic_fmin_x2"),1637( -1, 0x60, "global_atomic_fmax_x2"),1638( -1, 0x16, "global_load_dword_addtid"), #GFX10.3+1639( -1, 0x17, "global_store_dword_addtid"), #GFX10.3+1640( -1, 0x34, "global_atomic_csub"), #GFX10.3+. seems glc must be set1641}1642for (gfx8, gfx10, name) in GLOBAL:1643opcode(name, -1, gfx8, gfx10, Format.GLOBAL, InstrClass.VMem, is_atomic = "atomic" in name)16441645SCRATCH = {1646#GFX8_9, GFX101647(0x10, 0x08, "scratch_load_ubyte"),1648(0x11, 0x09, "scratch_load_sbyte"),1649(0x12, 0x0a, "scratch_load_ushort"),1650(0x13, 0x0b, "scratch_load_sshort"),1651(0x14, 0x0c, "scratch_load_dword"),1652(0x15, 0x0d, "scratch_load_dwordx2"),1653(0x16, 0x0f, "scratch_load_dwordx3"),1654(0x17, 0x0e, "scratch_load_dwordx4"),1655(0x18, 0x18, "scratch_store_byte"),1656(0x19, 0x19, "scratch_store_byte_d16_hi"),1657(0x1a, 0x1a, "scratch_store_short"),1658(0x1b, 0x1b, "scratch_store_short_d16_hi"),1659(0x1c, 0x1c, "scratch_store_dword"),1660(0x1d, 0x1d, "scratch_store_dwordx2"),1661(0x1e, 0x1f, "scratch_store_dwordx3"),1662(0x1f, 0x1e, "scratch_store_dwordx4"),1663(0x20, 0x20, "scratch_load_ubyte_d16"),1664(0x21, 0x21, "scratch_load_ubyte_d16_hi"),1665(0x22, 0x22, "scratch_load_sbyte_d16"),1666(0x23, 0x23, "scratch_load_sbyte_d16_hi"),1667(0x24, 0x24, "scratch_load_short_d16"),1668(0x25, 0x25, "scratch_load_short_d16_hi"),1669}1670for (gfx8, gfx10, name) in SCRATCH:1671opcode(name, -1, gfx8, gfx10, Format.SCRATCH, InstrClass.VMem)16721673# check for duplicate opcode numbers1674for ver in ['gfx9', 'gfx10']:1675op_to_name = {}1676for op in opcodes.values():1677if op.format in [Format.PSEUDO, Format.PSEUDO_BRANCH, Format.PSEUDO_BARRIER, Format.PSEUDO_REDUCTION]:1678continue16791680num = getattr(op, 'opcode_' + ver)1681if num == -1:1682continue16831684key = (op.format, num)16851686if key in op_to_name:1687# exceptions1688names = set([op_to_name[key], op.name])1689if ver in ['gfx8', 'gfx9'] and names == set(['v_mul_lo_i32', 'v_mul_lo_u32']):1690continue1691# v_mad_legacy_f32 is replaced with v_fma_legacy_f32 on GFX10.31692if ver == 'gfx10' and names == set(['v_mad_legacy_f32', 'v_fma_legacy_f32']):1693continue16941695print('%s and %s share the same opcode number (%s)' % (op_to_name[key], op.name, ver))1696sys.exit(1)1697else:1698op_to_name[key] = op.name16991700# These instructions write the entire 32-bit VGPR, but it's not clear in Opcode's constructor that1701# it should be 32, since it works accidentally.1702assert(opcodes['ds_read_u8'].definition_size == 32)1703assert(opcodes['ds_read_u16'].definition_size == 32)170417051706