Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/tests/test_regalloc.cpp
7108 views
1
/*
2
* Copyright © 2020 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
#include "helpers.h"
25
26
using namespace aco;
27
28
BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
29
/* Registers of operands should be "recycled" for the output. But if the
30
* input is smaller than the output, that's not generally possible. The
31
* first v_cvt_f32_f16 instruction below uses the upper 16 bits of v0
32
* while the lower 16 bits are still live, so the output must be stored in
33
* a register other than v0. For the second v_cvt_f32_f16, the original
34
* value stored in v0 is no longer used and hence it's safe to store the
35
* result in v0.
36
*/
37
38
for (chip_class cc = GFX8; cc < NUM_GFX_VERSIONS; cc = (chip_class)((unsigned)cc + 1)) {
39
for (bool pessimistic : { false, true }) {
40
const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
41
42
//>> v1: %_:v[#a] = p_startpgm
43
if (!setup_cs("v1", (chip_class)cc, CHIP_UNKNOWN, subvariant))
44
return;
45
46
//! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
47
Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
48
49
//! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32]
50
//! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
51
//; success = (b != a)
52
auto result1 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(1).getTemp());
53
auto result2 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), tmp.def(0).getTemp());
54
writeout(0, result1);
55
writeout(1, result2);
56
57
finish_ra_test(ra_test_policy { pessimistic });
58
}
59
}
60
END_TEST
61
62
BEGIN_TEST(regalloc.32bit_partial_write)
63
//>> v1: %_:v[0] = p_startpgm
64
if (!setup_cs("v1", GFX10))
65
return;
66
67
/* ensure high 16 bits are occupied */
68
//! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
69
Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
70
71
/* This test checks if this instruction uses SDWA. */
72
//! v2b: %_:v[0][0:16] = v_not_b32 0 dst_preserve
73
Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero());
74
75
//! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32]
76
bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi);
77
78
finish_ra_test(ra_test_policy());
79
END_TEST
80
81
BEGIN_TEST(regalloc.precolor.swap)
82
//>> s2: %op0:s[0-1] = p_startpgm
83
if (!setup_cs("s2", GFX10))
84
return;
85
86
program->dev.sgpr_limit = 4;
87
88
//! s2: %op1:s[2-3] = p_unit_test
89
Temp op1 = bld.pseudo(aco_opcode::p_unit_test, bld.def(s2));
90
91
//! s2: %op1_2:s[0-1], s2: %op0_2:s[2-3] = p_parallelcopy %op1:s[2-3], %op0:s[0-1]
92
//! p_unit_test %op0_2:s[2-3], %op1_2:s[0-1]
93
Operand op(inputs[0]);
94
op.setFixed(PhysReg(2));
95
bld.pseudo(aco_opcode::p_unit_test, op, op1);
96
97
finish_ra_test(ra_test_policy());
98
END_TEST
99
100
BEGIN_TEST(regalloc.precolor.blocking_vector)
101
//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2] = p_startpgm
102
if (!setup_cs("s2 s1", GFX10))
103
return;
104
105
//! s2: %tmp0_2:s[2-3], s1: %tmp1_2:s[1] = p_parallelcopy %tmp0:s[0-1], %tmp1:s[2]
106
//! p_unit_test %tmp1_2:s[1]
107
Operand op(inputs[1]);
108
op.setFixed(PhysReg(1));
109
bld.pseudo(aco_opcode::p_unit_test, op);
110
111
//! p_unit_test %tmp0_2:s[2-3]
112
bld.pseudo(aco_opcode::p_unit_test, inputs[0]);
113
114
finish_ra_test(ra_test_policy());
115
END_TEST
116
117
BEGIN_TEST(regalloc.precolor.vector.test)
118
//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
119
if (!setup_cs("s2 s1 s1", GFX10))
120
return;
121
122
//! s1: %tmp2_2:s[0], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp0:s[0-1]
123
//! p_unit_test %tmp0_2:s[2-3]
124
Operand op(inputs[0]);
125
op.setFixed(PhysReg(2));
126
bld.pseudo(aco_opcode::p_unit_test, op);
127
128
//! p_unit_test %tmp2_2:s[0]
129
bld.pseudo(aco_opcode::p_unit_test, inputs[2]);
130
131
finish_ra_test(ra_test_policy());
132
END_TEST
133
134
BEGIN_TEST(regalloc.precolor.vector.collect)
135
//>> s2: %tmp0:s[0-1], s1: %tmp1:s[2], s1: %tmp2:s[3] = p_startpgm
136
if (!setup_cs("s2 s1 s1", GFX10))
137
return;
138
139
//! s1: %tmp2_2:s[0], s1: %tmp1_2:s[1], s2: %tmp0_2:s[2-3] = p_parallelcopy %tmp2:s[3], %tmp1:s[2], %tmp0:s[0-1]
140
//! p_unit_test %tmp0_2:s[2-3]
141
Operand op(inputs[0]);
142
op.setFixed(PhysReg(2));
143
bld.pseudo(aco_opcode::p_unit_test, op);
144
145
//! p_unit_test %tmp1_2:s[1], %tmp2_2:s[0]
146
bld.pseudo(aco_opcode::p_unit_test, inputs[1], inputs[2]);
147
148
finish_ra_test(ra_test_policy());
149
END_TEST
150
151