Path: blob/21.2-virgl/src/amd/compiler/tests/test_to_hw_instr.cpp
7158 views
/*1* Copyright © 2020 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/23#include "helpers.h"2425using namespace aco;2627BEGIN_TEST(to_hw_instr.swap_subdword)28PhysReg v0_lo{256};29PhysReg v0_hi{256};30PhysReg v0_b1{256};31PhysReg v0_b3{256};32PhysReg v1_lo{257};33PhysReg v1_hi{257};34PhysReg v1_b1{257};35PhysReg v1_b3{257};36PhysReg v2_lo{258};37PhysReg v3_lo{259};38v0_hi.reg_b += 2;39v1_hi.reg_b += 2;40v0_b1.reg_b += 1;41v1_b1.reg_b += 1;42v0_b3.reg_b += 3;43v1_b3.reg_b += 3;4445for (unsigned i = GFX6; i <= GFX7; i++) {46if (!setup_cs(NULL, (chip_class)i))47continue;4849//~gfx[67]>> p_unit_test 050//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]51//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]52//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]53bld.pseudo(aco_opcode::p_unit_test, Operand::zero());54bld.pseudo(aco_opcode::p_parallelcopy,55Definition(v0_lo, v2b), Definition(v1_lo, v2b),56Operand(v1_lo, v2b), Operand(v0_lo, v2b));5758//~gfx[67]! p_unit_test 159//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]60//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 261//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 262bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));63bld.pseudo(aco_opcode::p_create_vector,64Definition(v0_lo, v1),65Operand(v1_lo, v2b), Operand(v0_lo, v2b));6667//~gfx[67]! p_unit_test 268//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]69//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 270//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 271//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]72bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));73bld.pseudo(aco_opcode::p_create_vector,74Definition(v0_lo, v6b), Operand(v1_lo, v2b),75Operand(v0_lo, v2b), Operand(v2_lo, v2b));7677//~gfx[67]! p_unit_test 378//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]79//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 280//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 281//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]82//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 283bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));84bld.pseudo(aco_opcode::p_create_vector,85Definition(v0_lo, v2),86Operand(v1_lo, v2b), Operand(v0_lo, v2b),87Operand(v2_lo, v2b), Operand(v3_lo, v2b));8889//~gfx[67]! p_unit_test 490//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]91//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 292//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]93//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 294//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]95//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]96//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]97bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));98bld.pseudo(aco_opcode::p_create_vector,99Definition(v0_lo, v2),100Operand(v1_lo, v2b), Operand(v2_lo, v2b),101Operand(v0_lo, v2b), Operand(v3_lo, v2b));102103//~gfx[67]! p_unit_test 5104//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]105//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]106bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));107bld.pseudo(aco_opcode::p_split_vector,108Definition(v1_lo, v2b), Definition(v0_lo, v2b),109Operand(v0_lo, v1));110111//~gfx[67]! p_unit_test 6112//~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]113//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]114//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]115bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));116bld.pseudo(aco_opcode::p_split_vector,117Definition(v1_lo, v2b), Definition(v0_lo, v2b),118Definition(v2_lo, v2b), Operand(v0_lo, v6b));119120//~gfx[67]! p_unit_test 7121//~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]122//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]123//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]124//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]125bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));126bld.pseudo(aco_opcode::p_split_vector,127Definition(v1_lo, v2b), Definition(v0_lo, v2b),128Definition(v2_lo, v2b), Definition(v3_lo, v2b),129Operand(v0_lo, v2));130131//~gfx[67]! p_unit_test 8132//~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]133//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]134//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]135//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]136//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]137bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));138bld.pseudo(aco_opcode::p_split_vector,139Definition(v1_lo, v2b), Definition(v2_lo, v2b),140Definition(v0_lo, v2b), Definition(v3_lo, v2b),141Operand(v0_lo, v2));142143//~gfx[67]! p_unit_test 9144//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]145//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]146//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]147bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));148bld.pseudo(aco_opcode::p_parallelcopy,149Definition(v0_lo, v1b), Definition(v1_lo, v1b),150Operand(v1_lo, v1b), Operand(v0_lo, v1b));151152//~gfx[67]! p_unit_test 10153//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]154//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3155//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]156bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));157bld.pseudo(aco_opcode::p_create_vector,158Definition(v0_lo, v2b),159Operand(v1_lo, v1b), Operand(v0_lo, v1b));160161//~gfx[67]! p_unit_test 11162//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]163//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3164//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]165//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]166//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2167bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));168bld.pseudo(aco_opcode::p_create_vector,169Definition(v0_lo, v3b), Operand(v1_lo, v1b),170Operand(v0_lo, v1b), Operand(v2_lo, v1b));171172//~gfx[67]! p_unit_test 12173//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]174//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3175//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]176//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]177//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2178//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]179//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1180bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));181bld.pseudo(aco_opcode::p_create_vector,182Definition(v0_lo, v1),183Operand(v1_lo, v1b), Operand(v0_lo, v1b),184Operand(v2_lo, v1b), Operand(v3_lo, v1b));185186//~gfx[67]! p_unit_test 13187//~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]188//~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8]189//~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16]190//~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8]191//~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]192//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001193//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]194bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));195Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,196Definition(v0_lo, v1),197Operand(v0_lo, v1b), Operand(v0_lo, v1b),198Operand(v0_lo, v1b), Operand(v0_lo, v1b));199pseudo->pseudo().scratch_sgpr = m0;200201//~gfx[67]! p_unit_test 14202//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]203//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]204bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));205bld.pseudo(aco_opcode::p_split_vector,206Definition(v1_lo, v1b), Definition(v0_lo, v1b),207Operand(v0_lo, v2b));208209//~gfx[67]! p_unit_test 15210//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]211//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]212//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]213//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]214bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));215bld.pseudo(aco_opcode::p_split_vector,216Definition(v1_lo, v1b), Definition(v0_lo, v1b),217Definition(v2_lo, v1b), Definition(v3_lo, v1b),218Operand(v0_lo, v1));219220//~gfx[67]! s_endpgm221222finish_to_hw_instr_test();223}224225for (unsigned i = GFX8; i <= GFX9; i++) {226if (!setup_cs(NULL, (chip_class)i))227continue;228229//~gfx[89]>> p_unit_test 0230//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2231//~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]232bld.pseudo(aco_opcode::p_unit_test, Operand::zero());233bld.pseudo(aco_opcode::p_parallelcopy,234Definition(v0_lo, v2b), Definition(v0_hi, v2b),235Operand(v0_hi, v2b), Operand(v0_lo, v2b));236237//~gfx[89]! p_unit_test 1238//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]239//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]240//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]241//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]242//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve243bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));244bld.pseudo(aco_opcode::p_parallelcopy,245Definition(v0_lo, v1), Definition(v1_lo, v2b),246Operand(v1_lo, v1), Operand(v0_lo, v2b));247248//~gfx[89]! p_unit_test 2249//~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_preserve250//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_preserve251//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve252//~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve253//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve254bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));255bld.pseudo(aco_opcode::p_parallelcopy,256Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),257Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));258259//~gfx[89]! p_unit_test 3260//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]261//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]262//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]263//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]264//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve265//~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve266bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));267bld.pseudo(aco_opcode::p_parallelcopy,268Definition(v0_lo, v1), Definition(v1_b3, v1b),269Operand(v1_lo, v1), Operand(v0_b3, v1b));270271//~gfx[89]! p_unit_test 4272//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]273//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]274//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]275//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]276//~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve277//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve278bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));279bld.pseudo(aco_opcode::p_parallelcopy,280Definition(v0_lo, v1), Definition(v1_lo, v1b),281Operand(v1_lo, v1), Operand(v0_lo, v1b));282283//~gfx[89]! p_unit_test 5284//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]285//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]286//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]287//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]288//~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve289//~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve290bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));291bld.pseudo(aco_opcode::p_parallelcopy,292Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),293Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));294295//~gfx[89]! p_unit_test 6296//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]297//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]298//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]299//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]300bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));301bld.pseudo(aco_opcode::p_parallelcopy,302Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),303Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));304305//~gfx[89]! p_unit_test 7306//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]307//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]308//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]309//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]310//~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2311bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));312bld.pseudo(aco_opcode::p_parallelcopy,313Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),314Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));315316//~gfx[89]! p_unit_test 8317//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]318//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]319//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]320//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]321//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve322//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve323//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve324bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));325bld.pseudo(aco_opcode::p_parallelcopy,326Definition(v0_lo, v3b), Definition(v1_lo, v3b),327Operand(v1_lo, v3b), Operand(v0_lo, v3b));328329//~gfx[89]! p_unit_test 9330//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]331//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]332//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]333//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]334//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve335bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));336bld.pseudo(aco_opcode::p_parallelcopy,337Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),338Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));339340//~gfx[89]! p_unit_test 10341//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve342//~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve343//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve344//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve345//~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve346//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve347bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));348bld.pseudo(aco_opcode::p_parallelcopy,349Definition(v0_b1, v2b), Definition(v1_b1, v2b),350Operand(v1_b1, v2b), Operand(v0_b1, v2b));351352//~gfx[89]! p_unit_test 11353//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve354//~gfx[89]! v1: %0:v[0] = v_mov_b32 42355bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));356bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),357Operand::c32(42u), Operand(v0_hi, v2b));358359//~gfx[89]! s_endpgm360361finish_to_hw_instr_test();362}363END_TEST364365BEGIN_TEST(to_hw_instr.subdword_constant)366PhysReg v0_lo{256};367PhysReg v0_hi{256};368PhysReg v0_b1{256};369PhysReg v1_hi{257};370v0_hi.reg_b += 2;371v0_b1.reg_b += 1;372v1_hi.reg_b += 2;373374for (unsigned i = GFX9; i <= GFX10; i++) {375if (!setup_cs(NULL, (chip_class)i))376continue;377378/* 16-bit pack */379//>> p_unit_test 0380//! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])381bld.pseudo(aco_opcode::p_unit_test, Operand::zero());382bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),383Operand::c16(0x3800), Operand(v1_hi, v2b));384385//! p_unit_test 1386//~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]387//~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]388//~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])389bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));390bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),391Operand::c16(0x4205), Operand(v1_hi, v2b));392393//! p_unit_test 2394//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]395//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]396//~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]397bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));398bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),399Operand::c16(0x4205), Operand(v0_lo, v2b));400401//! p_unit_test 3402//! v1: %_:v[0] = v_mov_b32 0x3c003800403bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));404bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),405Operand::c16(0x3800), Operand::c16(0x3c00));406407//! p_unit_test 4408//! v1: %_:v[0] = v_mov_b32 0x43064205409bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));410bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),411Operand::c16(0x4205), Operand::c16(0x4306));412413//! p_unit_test 5414//! v1: %_:v[0] = v_mov_b32 0x38004205415bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));416bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),417Operand::c16(0x4205), Operand::c16(0x3800));418419/* 16-bit copy */420//! p_unit_test 6421//! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve422bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));423bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800));424425//! p_unit_test 7426//~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]427//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]428//~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])429bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));430bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205));431432//! p_unit_test 8433//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]434//~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]435//~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205436bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));437bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205));438439//! p_unit_test 9440//! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve441//! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve442bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));443bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800));444445//! p_unit_test 10446//! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve447//! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve448bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));449bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205));450451/* 8-bit copy */452//! p_unit_test 11453//! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve454bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));455bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));456457//! s_endpgm458459finish_to_hw_instr_test();460}461END_TEST462463BEGIN_TEST(to_hw_instr.self_intersecting_swap)464if (!setup_cs(NULL, GFX9))465return;466467PhysReg reg_v1{257};468PhysReg reg_v2{258};469PhysReg reg_v3{259};470PhysReg reg_v7{263};471472//>> p_unit_test 0473//! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1]474//! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2]475//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]476//! s_endpgm477bld.pseudo(aco_opcode::p_unit_test, Operand::zero());478//v[1:2] = v[2:3]479//v3 = v7480//v7 = v1481bld.pseudo(aco_opcode::p_parallelcopy,482Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),483Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));484485finish_to_hw_instr_test();486END_TEST487488BEGIN_TEST(to_hw_instr.extract)489PhysReg s0_lo{0};490PhysReg s1_lo{1};491PhysReg v0_lo{256};492PhysReg v1_lo{257};493494for (unsigned i = GFX7; i <= GFX9; i++) {495for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {496if (!setup_cs(NULL, (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))497continue;498499#define EXT(idx, size) \500bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \501Operand::c32(size), Operand::c32(is_signed));502503//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'504//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'505//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'506//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'507//; funcs['sel'] = lambda bits: ('sext(%%_:v[1])[%s]' if variant.endswith('_signed') else '%%_:v[1][%s]') % bits508509//>> p_unit_test 0510bld.pseudo(aco_opcode::p_unit_test, Operand::zero());511//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8512EXT(0, 8)513//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8514EXT(1, 8)515//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8516EXT(2, 8)517//! v1: %_:v[0] = @v_shr 24, %_:v[1]518EXT(3, 8)519//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16520EXT(0, 16)521//! v1: %_:v[0] = @v_shr 16, %_:v[1]522EXT(1, 16)523524#undef EXT525526#define EXT(idx, size) \527bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \528Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));529530//>> p_unit_test 2531bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));532//~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000533//~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]534EXT(0, 8)535//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008536EXT(1, 8)537//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010538EXT(2, 8)539//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24540EXT(3, 8)541//~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000542//~gfx._signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]543EXT(0, 16)544//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16545EXT(1, 16)546547#undef EXT548549#define EXT(idx, src_b) \550bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \551Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));552553//>> p_unit_test 4554bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));555//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8556//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)557EXT(0, 0)558//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23)559if (i != GFX7)560EXT(0, 2)561//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8562//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15)563EXT(1, 0)564//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31)565if (i != GFX7)566EXT(1, 2)567568#undef EXT569570finish_to_hw_instr_test();571572//! s_endpgm573}574}575END_TEST576577BEGIN_TEST(to_hw_instr.insert)578PhysReg s0_lo{0};579PhysReg s1_lo{1};580PhysReg v0_lo{256};581PhysReg v1_lo{257};582583for (unsigned i = GFX7; i <= GFX9; i++) {584if (!setup_cs(NULL, (chip_class)i))585continue;586587#define INS(idx, size) \588bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \589Operand::c32(size));590591//>> p_unit_test 0592bld.pseudo(aco_opcode::p_unit_test, Operand::zero());593//! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8594INS(0, 8)595//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8596//~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0]597//~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1598INS(1, 8)599//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8600//~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0]601//~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2602INS(2, 8)603//! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1]604INS(3, 8)605//! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16606INS(0, 16)607//! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]608INS(1, 16)609610#undef INS611612#define INS(idx, size) \613bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \614Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size));615616//>> p_unit_test 1617bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));618//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000619INS(0, 8)620//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000621//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8622INS(1, 8)623//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000624//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16625INS(2, 8)626//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24627INS(3, 8)628//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000629INS(0, 16)630//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16631INS(1, 16)632633#undef INS634635#define INS(idx, def_b) \636bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \637Operand::c32(idx), Operand::c32(8u));638639//>> p_unit_test 2640bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));641//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8642//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte0 dst_preserve643INS(0, 0)644//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte2 dst_preserve645if (i != GFX7)646INS(0, 2)647//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]648//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte1 dst_preserve649INS(1, 0)650//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte3 dst_preserve651if (i != GFX7)652INS(1, 2)653654#undef INS655656finish_to_hw_instr_test();657658//! s_endpgm659}660END_TEST661662663