Path: blob/21.2-virgl/src/amd/compiler/tests/test_regalloc.cpp
7108 views
/*1* Copyright © 2020 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/23#include "helpers.h"2425using namespace aco;2627BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)28/* Registers of operands should be "recycled" for the output. But if the29* input is smaller than the output, that's not generally possible. The30* first v_cvt_f32_f16 instruction below uses the upper 16 bits of v031* while the lower 16 bits are still live, so the output must be stored in32* a register other than v0. For the second v_cvt_f32_f16, the original33* value stored in v0 is no longer used and hence it's safe to store the34* result in v0.35*/3637for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {38for (bool pessimistic : { false, true }) {39const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";4041//>> v1: %_:v[#a] = p_startpgm42if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))43return;4445//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]46Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);4748//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32]49//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]50//; success = (b != a)51auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());52auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());53writeout(0, result1);54writeout(1, result2);5556finish_ra_test(ra_test_policy { pessimistic });57}58}59END_TEST6061BEGIN_TEST(regalloc.32bit_partial_write)62//>> v1: %_:v[0] = p_startpgm63if (!setup_cs("v1", GFX10))64return;6566/* ensure high 16 bits are occupied */67//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]68Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();6970/* This test checks if this instruction uses SDWA. */71//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_preserve72Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());7374//! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]75bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);7677finish_ra_test(ra_test_policy());78END_TEST7980BEGIN_TEST(regalloc.precolor.swap)81//>> s2: %op0:s[0-1] = p_startpgm82if (!setup_cs("s2", GFX10))83return;8485program->dev.sgpr_limit = 4;8687//! s2: %op1:s[2-3] = p_unit_test88Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));8990//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]91//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]92Operand op(inputs[0]);93op.setFixed(PhysReg(2));94bld.pseudo(aco_opcode::p_unit_test, op, op1);9596finish_ra_test(ra_test_policy());97END_TEST9899BEGIN_TEST(regalloc.precolor.blocking_vector)100//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm101if (!setup_cs("s2 s1", GFX10))102return;103104//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]105//! p_unit_test %tmp1_2:s[1]106Operand op(inputs[1]);107op.setFixed(PhysReg(1));108bld.pseudo(aco_opcode::p_unit_test, op);109110//! p_unit_test %tmp0_2:s[2-3]111bld.pseudo(aco_opcode::p_unit_test, inputs[0]);112113finish_ra_test(ra_test_policy());114END_TEST115116BEGIN_TEST(regalloc.precolor.vector.test)117//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm118if (!setup_cs("s2 s1 s1", GFX10))119return;120121//! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]122//! p_unit_test %tmp0_2:s[2-3]123Operand op(inputs[0]);124op.setFixed(PhysReg(2));125bld.pseudo(aco_opcode::p_unit_test, op);126127//! p_unit_test %tmp2_2:s[0]128bld.pseudo(aco_opcode::p_unit_test, inputs[2]);129130finish_ra_test(ra_test_policy());131END_TEST132133BEGIN_TEST(regalloc.precolor.vector.collect)134//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm135if (!setup_cs("s2 s1 s1", GFX10))136return;137138//! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1]139//! p_unit_test %tmp0_2:s[2-3]140Operand op(inputs[0]);141op.setFixed(PhysReg(2));142bld.pseudo(aco_opcode::p_unit_test, op);143144//! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0]145bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);146147finish_ra_test(ra_test_policy());148END_TEST149150151