Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/tests/test_to_hw_instr.cpp
7158 views
1
/*
2
* Copyright © 2020 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
#include "helpers.h"
25
26
using namespace aco;
27
28
BEGIN_TEST(to_hw_instr.swap_subdword)
29
PhysReg v0_lo{256};
30
PhysReg v0_hi{256};
31
PhysReg v0_b1{256};
32
PhysReg v0_b3{256};
33
PhysReg v1_lo{257};
34
PhysReg v1_hi{257};
35
PhysReg v1_b1{257};
36
PhysReg v1_b3{257};
37
PhysReg v2_lo{258};
38
PhysReg v3_lo{259};
39
v0_hi.reg_b += 2;
40
v1_hi.reg_b += 2;
41
v0_b1.reg_b += 1;
42
v1_b1.reg_b += 1;
43
v0_b3.reg_b += 3;
44
v1_b3.reg_b += 3;
45
46
for (unsigned i = GFX6; i <= GFX7; i++) {
47
if (!setup_cs(NULL, (chip_class)i))
48
continue;
49
50
//~gfx[67]>> p_unit_test 0
51
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
52
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
53
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
54
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
55
bld.pseudo(aco_opcode::p_parallelcopy,
56
Definition(v0_lo, v2b), Definition(v1_lo, v2b),
57
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
58
59
//~gfx[67]! p_unit_test 1
60
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
61
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
62
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
63
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
64
bld.pseudo(aco_opcode::p_create_vector,
65
Definition(v0_lo, v1),
66
Operand(v1_lo, v2b), Operand(v0_lo, v2b));
67
68
//~gfx[67]! p_unit_test 2
69
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
70
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
71
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
72
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
73
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
74
bld.pseudo(aco_opcode::p_create_vector,
75
Definition(v0_lo, v6b), Operand(v1_lo, v2b),
76
Operand(v0_lo, v2b), Operand(v2_lo, v2b));
77
78
//~gfx[67]! p_unit_test 3
79
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
80
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
81
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
82
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
83
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
84
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
85
bld.pseudo(aco_opcode::p_create_vector,
86
Definition(v0_lo, v2),
87
Operand(v1_lo, v2b), Operand(v0_lo, v2b),
88
Operand(v2_lo, v2b), Operand(v3_lo, v2b));
89
90
//~gfx[67]! p_unit_test 4
91
//~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
92
//~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[2][0:16], %0:v[1][16:32], 2
93
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
94
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:16], %0:v[0][16:32], 2
95
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
96
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
97
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
98
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
99
bld.pseudo(aco_opcode::p_create_vector,
100
Definition(v0_lo, v2),
101
Operand(v1_lo, v2b), Operand(v2_lo, v2b),
102
Operand(v0_lo, v2b), Operand(v3_lo, v2b));
103
104
//~gfx[67]! p_unit_test 5
105
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
106
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
107
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
108
bld.pseudo(aco_opcode::p_split_vector,
109
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
110
Operand(v0_lo, v1));
111
112
//~gfx[67]! p_unit_test 6
113
//~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
114
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
115
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
116
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
117
bld.pseudo(aco_opcode::p_split_vector,
118
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
119
Definition(v2_lo, v2b), Operand(v0_lo, v6b));
120
121
//~gfx[67]! p_unit_test 7
122
//~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16]
123
//~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
124
//~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
125
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
126
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
127
bld.pseudo(aco_opcode::p_split_vector,
128
Definition(v1_lo, v2b), Definition(v0_lo, v2b),
129
Definition(v2_lo, v2b), Definition(v3_lo, v2b),
130
Operand(v0_lo, v2));
131
132
//~gfx[67]! p_unit_test 8
133
//~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
134
//~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
135
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
136
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
137
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
138
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
139
bld.pseudo(aco_opcode::p_split_vector,
140
Definition(v1_lo, v2b), Definition(v2_lo, v2b),
141
Definition(v0_lo, v2b), Definition(v3_lo, v2b),
142
Operand(v0_lo, v2));
143
144
//~gfx[67]! p_unit_test 9
145
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
146
//~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
147
//~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
148
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
149
bld.pseudo(aco_opcode::p_parallelcopy,
150
Definition(v0_lo, v1b), Definition(v1_lo, v1b),
151
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
152
153
//~gfx[67]! p_unit_test 10
154
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
155
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
156
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
157
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
158
bld.pseudo(aco_opcode::p_create_vector,
159
Definition(v0_lo, v2b),
160
Operand(v1_lo, v1b), Operand(v0_lo, v1b));
161
162
//~gfx[67]! p_unit_test 11
163
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
164
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
165
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
166
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
167
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
168
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
169
bld.pseudo(aco_opcode::p_create_vector,
170
Definition(v0_lo, v3b), Operand(v1_lo, v1b),
171
Operand(v0_lo, v1b), Operand(v2_lo, v1b));
172
173
//~gfx[67]! p_unit_test 12
174
//~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
175
//~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
176
//~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
177
//~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
178
//~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
179
//~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
180
//~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
181
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
182
bld.pseudo(aco_opcode::p_create_vector,
183
Definition(v0_lo, v1),
184
Operand(v1_lo, v1b), Operand(v0_lo, v1b),
185
Operand(v2_lo, v1b), Operand(v3_lo, v1b));
186
187
//~gfx[67]! p_unit_test 13
188
//~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
189
//~gfx[67]! v2b: %0:v[0][0:16] = v_mul_u32_u24 0x101, %0:v[0][0:8]
190
//~gfx[67]! v2b: %0:v[0][0:16] = v_and_b32 0xffff, %0:v[0][0:16]
191
//~gfx[67]! v3b: %0:v[0][0:24] = v_cvt_pk_u16_u32 %0:v[0][0:16], %0:v[0][0:8]
192
//~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24]
193
//~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
194
//~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
195
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
196
Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
197
Definition(v0_lo, v1),
198
Operand(v0_lo, v1b), Operand(v0_lo, v1b),
199
Operand(v0_lo, v1b), Operand(v0_lo, v1b));
200
pseudo->pseudo().scratch_sgpr = m0;
201
202
//~gfx[67]! p_unit_test 14
203
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
204
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
205
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
206
bld.pseudo(aco_opcode::p_split_vector,
207
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
208
Operand(v0_lo, v2b));
209
210
//~gfx[67]! p_unit_test 15
211
//~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
212
//~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
213
//~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
214
//~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
215
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
216
bld.pseudo(aco_opcode::p_split_vector,
217
Definition(v1_lo, v1b), Definition(v0_lo, v1b),
218
Definition(v2_lo, v1b), Definition(v3_lo, v1b),
219
Operand(v0_lo, v1));
220
221
//~gfx[67]! s_endpgm
222
223
finish_to_hw_instr_test();
224
}
225
226
for (unsigned i = GFX8; i <= GFX9; i++) {
227
if (!setup_cs(NULL, (chip_class)i))
228
continue;
229
230
//~gfx[89]>> p_unit_test 0
231
//~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
232
//~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
233
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
234
bld.pseudo(aco_opcode::p_parallelcopy,
235
Definition(v0_lo, v2b), Definition(v0_hi, v2b),
236
Operand(v0_hi, v2b), Operand(v0_lo, v2b));
237
238
//~gfx[89]! p_unit_test 1
239
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
240
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
241
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
242
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
243
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
244
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
245
bld.pseudo(aco_opcode::p_parallelcopy,
246
Definition(v0_lo, v1), Definition(v1_lo, v2b),
247
Operand(v1_lo, v1), Operand(v0_lo, v2b));
248
249
//~gfx[89]! p_unit_test 2
250
//~gfx[89]! v2b: %0:v[0][16:32] = v_mov_b32 %0:v[1][16:32] dst_preserve
251
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][0:16] dst_preserve
252
//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
253
//~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
254
//~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve
255
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
256
bld.pseudo(aco_opcode::p_parallelcopy,
257
Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
258
Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
259
260
//~gfx[89]! p_unit_test 3
261
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
262
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
263
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
264
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
265
//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve
266
//~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve
267
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
268
bld.pseudo(aco_opcode::p_parallelcopy,
269
Definition(v0_lo, v1), Definition(v1_b3, v1b),
270
Operand(v1_lo, v1), Operand(v0_b3, v1b));
271
272
//~gfx[89]! p_unit_test 4
273
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
274
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
275
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
276
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
277
//~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve
278
//~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve
279
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
280
bld.pseudo(aco_opcode::p_parallelcopy,
281
Definition(v0_lo, v1), Definition(v1_lo, v1b),
282
Operand(v1_lo, v1), Operand(v0_lo, v1b));
283
284
//~gfx[89]! p_unit_test 5
285
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
286
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
287
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
288
//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
289
//~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve
290
//~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve
291
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
292
bld.pseudo(aco_opcode::p_parallelcopy,
293
Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
294
Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
295
296
//~gfx[89]! p_unit_test 6
297
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
298
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
299
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
300
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
301
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
302
bld.pseudo(aco_opcode::p_parallelcopy,
303
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
304
Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
305
306
//~gfx[89]! p_unit_test 7
307
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
308
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[0], %0:v[1]
309
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
310
//~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
311
//~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
312
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
313
bld.pseudo(aco_opcode::p_parallelcopy,
314
Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
315
Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
316
317
//~gfx[89]! p_unit_test 8
318
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
319
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
320
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
321
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
322
//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
323
//~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
324
//~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve
325
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
326
bld.pseudo(aco_opcode::p_parallelcopy,
327
Definition(v0_lo, v3b), Definition(v1_lo, v3b),
328
Operand(v1_lo, v3b), Operand(v0_lo, v3b));
329
330
//~gfx[89]! p_unit_test 9
331
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
332
//~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
333
//~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
334
//~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
335
//~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve
336
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
337
bld.pseudo(aco_opcode::p_parallelcopy,
338
Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
339
Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
340
341
//~gfx[89]! p_unit_test 10
342
//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
343
//~gfx[89]! v1b: %0:v[0][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
344
//~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_preserve
345
//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
346
//~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
347
//~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve
348
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
349
bld.pseudo(aco_opcode::p_parallelcopy,
350
Definition(v0_b1, v2b), Definition(v1_b1, v2b),
351
Operand(v1_b1, v2b), Operand(v0_b1, v2b));
352
353
//~gfx[89]! p_unit_test 11
354
//~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve
355
//~gfx[89]! v1: %0:v[0] = v_mov_b32 42
356
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
357
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
358
Operand::c32(42u), Operand(v0_hi, v2b));
359
360
//~gfx[89]! s_endpgm
361
362
finish_to_hw_instr_test();
363
}
364
END_TEST
365
366
BEGIN_TEST(to_hw_instr.subdword_constant)
367
PhysReg v0_lo{256};
368
PhysReg v0_hi{256};
369
PhysReg v0_b1{256};
370
PhysReg v1_hi{257};
371
v0_hi.reg_b += 2;
372
v0_b1.reg_b += 1;
373
v1_hi.reg_b += 2;
374
375
for (unsigned i = GFX9; i <= GFX10; i++) {
376
if (!setup_cs(NULL, (chip_class)i))
377
continue;
378
379
/* 16-bit pack */
380
//>> p_unit_test 0
381
//! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32])
382
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
383
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
384
Operand::c16(0x3800), Operand(v1_hi, v2b));
385
386
//! p_unit_test 1
387
//~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32]
388
//~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0]
389
//~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32])
390
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
391
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
392
Operand::c16(0x4205), Operand(v1_hi, v2b));
393
394
//! p_unit_test 2
395
//~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
396
//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
397
//~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16]
398
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
399
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
400
Operand::c16(0x4205), Operand(v0_lo, v2b));
401
402
//! p_unit_test 3
403
//! v1: %_:v[0] = v_mov_b32 0x3c003800
404
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
405
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
406
Operand::c16(0x3800), Operand::c16(0x3c00));
407
408
//! p_unit_test 4
409
//! v1: %_:v[0] = v_mov_b32 0x43064205
410
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
411
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
412
Operand::c16(0x4205), Operand::c16(0x4306));
413
414
//! p_unit_test 5
415
//! v1: %_:v[0] = v_mov_b32 0x38004205
416
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
417
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
418
Operand::c16(0x4205), Operand::c16(0x3800));
419
420
/* 16-bit copy */
421
//! p_unit_test 6
422
//! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve
423
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
424
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800));
425
426
//! p_unit_test 7
427
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0]
428
//~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0]
429
//~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32])
430
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
431
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205));
432
433
//! p_unit_test 8
434
//~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
435
//~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0]
436
//~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205
437
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
438
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205));
439
440
//! p_unit_test 9
441
//! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve
442
//! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve
443
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
444
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800));
445
446
//! p_unit_test 10
447
//! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve
448
//! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve
449
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
450
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205));
451
452
/* 8-bit copy */
453
//! p_unit_test 11
454
//! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve
455
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
456
bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42));
457
458
//! s_endpgm
459
460
finish_to_hw_instr_test();
461
}
462
END_TEST
463
464
BEGIN_TEST(to_hw_instr.self_intersecting_swap)
465
if (!setup_cs(NULL, GFX9))
466
return;
467
468
PhysReg reg_v1{257};
469
PhysReg reg_v2{258};
470
PhysReg reg_v3{259};
471
PhysReg reg_v7{263};
472
473
//>> p_unit_test 0
474
//! v1: %0:v[1], v1: %0:v[2] = v_swap_b32 %0:v[2], %0:v[1]
475
//! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2]
476
//! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
477
//! s_endpgm
478
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
479
//v[1:2] = v[2:3]
480
//v3 = v7
481
//v7 = v1
482
bld.pseudo(aco_opcode::p_parallelcopy,
483
Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),
484
Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));
485
486
finish_to_hw_instr_test();
487
END_TEST
488
489
BEGIN_TEST(to_hw_instr.extract)
490
PhysReg s0_lo{0};
491
PhysReg s1_lo{1};
492
PhysReg v0_lo{256};
493
PhysReg v1_lo{257};
494
495
for (unsigned i = GFX7; i <= GFX9; i++) {
496
for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
497
if (!setup_cs(NULL, (chip_class)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
498
continue;
499
500
#define EXT(idx, size) \
501
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
502
Operand::c32(size), Operand::c32(is_signed));
503
504
//; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
505
//; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
506
//; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
507
//; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
508
//; funcs['sel'] = lambda bits: ('sext(%%_:v[1])[%s]' if variant.endswith('_signed') else '%%_:v[1][%s]') % bits
509
510
//>> p_unit_test 0
511
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
512
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
513
EXT(0, 8)
514
//! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
515
EXT(1, 8)
516
//! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
517
EXT(2, 8)
518
//! v1: %_:v[0] = @v_shr 24, %_:v[1]
519
EXT(3, 8)
520
//! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
521
EXT(0, 16)
522
//! v1: %_:v[0] = @v_shr 16, %_:v[1]
523
EXT(1, 16)
524
525
#undef EXT
526
527
#define EXT(idx, size) \
528
bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1), \
529
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
530
531
//>> p_unit_test 2
532
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
533
//~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000
534
//~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
535
EXT(0, 8)
536
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80008
537
EXT(1, 8)
538
//! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80010
539
EXT(2, 8)
540
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 24
541
EXT(3, 8)
542
//~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x100000
543
//~gfx._signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
544
EXT(0, 16)
545
//! s1: %_:s[0], s1: %_:scc = @s_shr %_:s[1], 16
546
EXT(1, 16)
547
548
#undef EXT
549
550
#define EXT(idx, src_b) \
551
bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b), \
552
Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
553
554
//>> p_unit_test 4
555
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
556
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
557
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7)
558
EXT(0, 0)
559
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(16:23)
560
if (i != GFX7)
561
EXT(0, 2)
562
//~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
563
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(8:15)
564
EXT(1, 0)
565
//~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(24:31)
566
if (i != GFX7)
567
EXT(1, 2)
568
569
#undef EXT
570
571
finish_to_hw_instr_test();
572
573
//! s_endpgm
574
}
575
}
576
END_TEST
577
578
BEGIN_TEST(to_hw_instr.insert)
579
PhysReg s0_lo{0};
580
PhysReg s1_lo{1};
581
PhysReg v0_lo{256};
582
PhysReg v1_lo{257};
583
584
for (unsigned i = GFX7; i <= GFX9; i++) {
585
if (!setup_cs(NULL, (chip_class)i))
586
continue;
587
588
#define INS(idx, size) \
589
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
590
Operand::c32(size));
591
592
//>> p_unit_test 0
593
bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
594
//! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8
595
INS(0, 8)
596
//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
597
//~gfx7! v1: %0:v[0] = v_lshlrev_b32 8, %0:v[0]
598
//~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte1
599
INS(1, 8)
600
//~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8
601
//~gfx7! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[0]
602
//~gfx[^7]! v1: %0:v[0] = v_mov_b32 %0:v[1] dst_sel:ubyte2
603
INS(2, 8)
604
//! v1: %0:v[0] = v_lshlrev_b32 24, %0:v[1]
605
INS(3, 8)
606
//! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 16
607
INS(0, 16)
608
//! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
609
INS(1, 16)
610
611
#undef INS
612
613
#define INS(idx, size) \
614
bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1), \
615
Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size));
616
617
//>> p_unit_test 1
618
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
619
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
620
INS(0, 8)
621
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
622
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 8
623
INS(1, 8)
624
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000
625
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[0], 16
626
INS(2, 8)
627
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 24
628
INS(3, 8)
629
//! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x100000
630
INS(0, 16)
631
//! s1: %_:s[0], s1: %_:scc = s_lshl_b32 %_:s[1], 16
632
INS(1, 16)
633
634
#undef INS
635
636
#define INS(idx, def_b) \
637
bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b), \
638
Operand::c32(idx), Operand::c32(8u));
639
640
//>> p_unit_test 2
641
bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
642
//~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8
643
//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte0 dst_preserve
644
INS(0, 0)
645
//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte2 dst_preserve
646
if (i != GFX7)
647
INS(0, 2)
648
//~gfx7! v2b: %_:v[0][0:16] = v_lshlrev_b32 8, %_:v[1][0:16]
649
//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte1 dst_preserve
650
INS(1, 0)
651
//~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte3 dst_preserve
652
if (i != GFX7)
653
INS(1, 2)
654
655
#undef INS
656
657
finish_to_hw_instr_test();
658
659
//! s_endpgm
660
}
661
END_TEST
662
663