Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/tests/test_optimizer_postRA.cpp
7099 views
1
/*
2
* Copyright © 2021 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
25
#include "helpers.h"
26
27
using namespace aco;
28
29
BEGIN_TEST(optimizer_postRA.vcmp)
30
PhysReg reg_v0(256);
31
PhysReg reg_s0(0);
32
PhysReg reg_s2(2);
33
PhysReg reg_s4(4);
34
35
//>> v1: %a:v[0] = p_startpgm
36
ASSERTED bool setup_ok = setup_cs("v1", GFX8);
37
assert(setup_ok);
38
39
auto &startpgm = bld.instructions->at(0);
40
assert(startpgm->opcode == aco_opcode::p_startpgm);
41
startpgm->definitions[0].setFixed(reg_v0);
42
43
Temp v_in = inputs[0];
44
45
{
46
/* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
47
48
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
49
//! s2: %e:s[2-3] = p_cbranch_z %b:vcc
50
//! p_unit_test 0, %e:s[2-3]
51
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
52
Operand(v_in, reg_v0));
53
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
54
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
55
writeout(0, Operand(br, reg_s2));
56
}
57
58
//; del b, e
59
60
{
61
/* When VCC is overwritten inbetween, don't optimize. */
62
63
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
64
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
65
//! s2: %f:vcc = s_mov_b64 0
66
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
67
//! p_unit_test 1, %e:s[2-3], %f:vcc
68
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
69
Operand(v_in, reg_v0));
70
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
71
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
72
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
73
writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
74
}
75
76
//; del b, c, d, e, f
77
78
{
79
/* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
80
81
//! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
82
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
83
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
84
//! p_unit_test 2, %e:s[2-3]
85
auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
86
Operand(v_in, reg_v0));
87
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
88
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
89
writeout(2, Operand(br, reg_s2));
90
}
91
92
//; del b, c, d, e
93
94
{
95
/* When the VCC isn't written by VOPC, don't optimize */
96
97
//! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
98
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
99
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
100
//! p_unit_test 2, %e:s[2-3]
101
auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
102
Operand::c32(1u), Operand(reg_s4, bld.lm));
103
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
104
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
105
writeout(2, Operand(br, reg_s2));
106
}
107
108
//; del b, c, d, e, f, x
109
110
{
111
/* When EXEC is overwritten inbetween, don't optimize. */
112
113
//! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
114
//! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
115
//! s2: %f:exec = s_mov_b64 42
116
//! s2: %e:s[2-3] = p_cbranch_z %d:scc
117
//! p_unit_test 4, %e:s[2-3], %f:exec
118
auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
119
Operand(v_in, reg_v0));
120
auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
121
auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
122
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
123
writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
124
}
125
126
//; del b, c, d, e, f, x
127
128
finish_optimizer_postRA_test();
129
END_TEST
130
131
BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
132
//>> s1: %a, s2: %y, s1: %z = p_startpgm
133
ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
134
assert(setup_ok);
135
136
PhysReg reg_s0{0};
137
PhysReg reg_s1{1};
138
PhysReg reg_s2{2};
139
PhysReg reg_s3{3};
140
PhysReg reg_s4{4};
141
PhysReg reg_s6{6};
142
143
Temp in_0 = inputs[0];
144
Temp in_1 = inputs[1];
145
Temp in_2 = inputs[2];
146
Operand op_in_0(in_0);
147
op_in_0.setFixed(reg_s0);
148
Operand op_in_1(in_1);
149
op_in_1.setFixed(reg_s4);
150
Operand op_in_2(in_2);
151
op_in_2.setFixed(reg_s6);
152
153
{
154
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
155
//! s2: %f:vcc = p_cbranch_nz %e:scc
156
//! p_unit_test 0, %f:vcc
157
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
158
Operand::c32(0x40018u));
159
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
160
Operand::zero());
161
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
162
writeout(0, Operand(br, vcc));
163
}
164
165
//; del d, e, f
166
167
{
168
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
169
//! s2: %f:vcc = p_cbranch_z %e:scc
170
//! p_unit_test 1, %f:vcc
171
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
172
Operand::c32(0x40018u));
173
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
174
Operand::zero());
175
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
176
writeout(1, Operand(br, vcc));
177
}
178
179
//; del d, e, f
180
181
{
182
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
183
//! s2: %f:vcc = p_cbranch_z %e:scc
184
//! p_unit_test 2, %f:vcc
185
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
186
Operand::c32(0x40018u));
187
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
188
Operand::zero());
189
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
190
writeout(2, Operand(br, vcc));
191
}
192
193
//; del d, e, f
194
195
{
196
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
197
//! s2: %f:vcc = p_cbranch_nz %e:scc
198
//! p_unit_test 3, %f:vcc
199
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
200
Operand::c32(0x40018u));
201
auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
202
Operand::zero());
203
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
204
writeout(3, Operand(br, vcc));
205
}
206
207
//; del d, e, f
208
209
{
210
//! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
211
//! s2: %f:vcc = p_cbranch_z %e:scc
212
//! p_unit_test 4, %f:vcc
213
auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
214
Operand::c32(0x12345u));
215
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
216
Operand::zero(8));
217
auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
218
writeout(4, Operand(br, vcc));
219
}
220
221
//; del d, e, f
222
223
{
224
/* SCC is overwritten in between, don't optimize */
225
226
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
227
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
228
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
229
//! s2: %f:vcc = p_cbranch_z %g:scc
230
//! p_unit_test 5, %f:vcc, %h:s[3]
231
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
232
Operand::c32(0x40018u));
233
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
234
Operand::c32(1u));
235
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
236
Operand::zero());
237
auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
238
writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
239
}
240
241
//; del d, e, f, g, h, x
242
243
{
244
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
245
//! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
246
//! p_unit_test 6, %f:s[4]
247
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
248
Operand::c32(0x40018u));
249
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
250
Operand::zero());
251
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
252
writeout(6, Operand(br, reg_s4));
253
}
254
255
//; del d, e, f
256
257
{
258
/* SCC is overwritten in between, don't optimize */
259
260
//! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
261
//! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
262
//! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
263
//! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
264
//! p_unit_test 7, %f:s[4], %h:s[3]
265
auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
266
Operand::c32(0x40018u));
267
auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
268
Operand::c32(1u));
269
auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
270
Operand::zero());
271
auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
272
writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
273
}
274
275
//; del d, e, f, g, h, x
276
277
finish_optimizer_postRA_test();
278
END_TEST
279
280