Path: blob/21.2-virgl/src/amd/compiler/tests/test_optimizer_postRA.cpp
7099 views
/*1* Copyright © 2021 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/2324#include "helpers.h"2526using namespace aco;2728BEGIN_TEST(optimizer_postRA.vcmp)29PhysReg reg_v0(256);30PhysReg reg_s0(0);31PhysReg reg_s2(2);32PhysReg reg_s4(4);3334//>> v1: %a:v[0] = p_startpgm35ASSERTED bool setup_ok = setup_cs("v1", GFX8);36assert(setup_ok);3738auto &startpgm = bld.instructions->at(0);39assert(startpgm->opcode == aco_opcode::p_startpgm);40startpgm->definitions[0].setFixed(reg_v0);4142Temp v_in = inputs[0];4344{45/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */4647//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]48//! s2: %e:s[2-3] = p_cbranch_z %b:vcc49//! p_unit_test 0, %e:s[2-3]50auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),51Operand(v_in, reg_v0));52auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));53auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));54writeout(0, Operand(br, reg_s2));55}5657//; del b, e5859{60/* When VCC is overwritten inbetween, don't optimize. */6162//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]63//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec64//! s2: %f:vcc = s_mov_b64 065//! s2: %e:s[2-3] = p_cbranch_z %d:scc66//! p_unit_test 1, %e:s[2-3], %f:vcc67auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),68Operand(v_in, reg_v0));69auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));70auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());71auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));72writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));73}7475//; del b, c, d, e, f7677{78/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */7980//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]81//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec82//! s2: %e:s[2-3] = p_cbranch_z %d:scc83//! p_unit_test 2, %e:s[2-3]84auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),85Operand(v_in, reg_v0));86auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));87auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));88writeout(2, Operand(br, reg_s2));89}9091//; del b, c, d, e9293{94/* When the VCC isn't written by VOPC, don't optimize */9596//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]97//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec98//! s2: %e:s[2-3] = p_cbranch_z %d:scc99//! p_unit_test 2, %e:s[2-3]100auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),101Operand::c32(1u), Operand(reg_s4, bld.lm));102auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));103auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));104writeout(2, Operand(br, reg_s2));105}106107//; del b, c, d, e, f, x108109{110/* When EXEC is overwritten inbetween, don't optimize. */111112//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]113//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec114//! s2: %f:exec = s_mov_b64 42115//! s2: %e:s[2-3] = p_cbranch_z %d:scc116//! p_unit_test 4, %e:s[2-3], %f:exec117auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),118Operand(v_in, reg_v0));119auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));120auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));121auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));122writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));123}124125//; del b, c, d, e, f, x126127finish_optimizer_postRA_test();128END_TEST129130BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)131//>> s1: %a, s2: %y, s1: %z = p_startpgm132ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);133assert(setup_ok);134135PhysReg reg_s0{0};136PhysReg reg_s1{1};137PhysReg reg_s2{2};138PhysReg reg_s3{3};139PhysReg reg_s4{4};140PhysReg reg_s6{6};141142Temp in_0 = inputs[0];143Temp in_1 = inputs[1];144Temp in_2 = inputs[2];145Operand op_in_0(in_0);146op_in_0.setFixed(reg_s0);147Operand op_in_1(in_1);148op_in_1.setFixed(reg_s4);149Operand op_in_2(in_2);150op_in_2.setFixed(reg_s6);151152{153//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018154//! s2: %f:vcc = p_cbranch_nz %e:scc155//! p_unit_test 0, %f:vcc156auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,157Operand::c32(0x40018u));158auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),159Operand::zero());160auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));161writeout(0, Operand(br, vcc));162}163164//; del d, e, f165166{167//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018168//! s2: %f:vcc = p_cbranch_z %e:scc169//! p_unit_test 1, %f:vcc170auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,171Operand::c32(0x40018u));172auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),173Operand::zero());174auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));175writeout(1, Operand(br, vcc));176}177178//; del d, e, f179180{181//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018182//! s2: %f:vcc = p_cbranch_z %e:scc183//! p_unit_test 2, %f:vcc184auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,185Operand::c32(0x40018u));186auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),187Operand::zero());188auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));189writeout(2, Operand(br, vcc));190}191192//; del d, e, f193194{195//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018196//! s2: %f:vcc = p_cbranch_nz %e:scc197//! p_unit_test 3, %f:vcc198auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,199Operand::c32(0x40018u));200auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),201Operand::zero());202auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));203writeout(3, Operand(br, vcc));204}205206//; del d, e, f207208{209//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345210//! s2: %f:vcc = p_cbranch_z %e:scc211//! p_unit_test 4, %f:vcc212auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,213Operand::c32(0x12345u));214auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),215Operand::zero(8));216auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));217writeout(4, Operand(br, vcc));218}219220//; del d, e, f221222{223/* SCC is overwritten in between, don't optimize */224225//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018226//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1227//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0228//! s2: %f:vcc = p_cbranch_z %g:scc229//! p_unit_test 5, %f:vcc, %h:s[3]230auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,231Operand::c32(0x40018u));232auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,233Operand::c32(1u));234auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),235Operand::zero());236auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));237writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));238}239240//; del d, e, f, g, h, x241242{243//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018244//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc245//! p_unit_test 6, %f:s[4]246auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,247Operand::c32(0x40018u));248auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),249Operand::zero());250auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));251writeout(6, Operand(br, reg_s4));252}253254//; del d, e, f255256{257/* SCC is overwritten in between, don't optimize */258259//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018260//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1261//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0262//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc263//! p_unit_test 7, %f:s[4], %h:s[3]264auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,265Operand::c32(0x40018u));266auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,267Operand::c32(1u));268auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),269Operand::zero());270auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));271writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));272}273274//; del d, e, f, g, h, x275276finish_optimizer_postRA_test();277END_TEST278279280