Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/aco_validate.cpp
4550 views
1
/*
2
* Copyright © 2018 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
25
#include "aco_ir.h"
26
27
#include "util/memstream.h"
28
29
#include <array>
30
#include <map>
31
#include <set>
32
#include <vector>
33
34
namespace aco {
35
36
static void
37
aco_log(Program* program, enum radv_compiler_debug_level level, const char* prefix,
38
const char* file, unsigned line, const char* fmt, va_list args)
39
{
40
char* msg;
41
42
if (program->debug.shorten_messages) {
43
msg = ralloc_vasprintf(NULL, fmt, args);
44
} else {
45
msg = ralloc_strdup(NULL, prefix);
46
ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);
47
ralloc_asprintf_append(&msg, " ");
48
ralloc_vasprintf_append(&msg, fmt, args);
49
}
50
51
if (program->debug.func)
52
program->debug.func(program->debug.private_data, level, msg);
53
54
fprintf(program->debug.output, "%s\n", msg);
55
56
ralloc_free(msg);
57
}
58
59
void
60
_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)
61
{
62
va_list args;
63
64
va_start(args, fmt);
65
aco_log(program, RADV_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);
66
va_end(args);
67
}
68
69
void
70
_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
71
{
72
va_list args;
73
74
va_start(args, fmt);
75
aco_log(program, RADV_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
76
va_end(args);
77
}
78
79
bool
80
validate_ir(Program* program)
81
{
82
bool is_valid = true;
83
auto check = [&program, &is_valid](bool success, const char* msg,
84
aco::Instruction* instr) -> void
85
{
86
if (!success) {
87
char* out;
88
size_t outsize;
89
struct u_memstream mem;
90
u_memstream_open(&mem, &out, &outsize);
91
FILE* const memf = u_memstream_get(&mem);
92
93
fprintf(memf, "%s: ", msg);
94
aco_print_instr(instr, memf);
95
u_memstream_close(&mem);
96
97
aco_err(program, "%s", out);
98
free(out);
99
100
is_valid = false;
101
}
102
};
103
104
auto check_block = [&program, &is_valid](bool success, const char* msg,
105
aco::Block* block) -> void
106
{
107
if (!success) {
108
aco_err(program, "%s: BB%u", msg, block->index);
109
is_valid = false;
110
}
111
};
112
113
for (Block& block : program->blocks) {
114
for (aco_ptr<Instruction>& instr : block.instructions) {
115
116
/* check base format */
117
Format base_format = instr->format;
118
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
119
base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP);
120
if ((uint32_t)base_format & (uint32_t)Format::VOP1)
121
base_format = Format::VOP1;
122
else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
123
base_format = Format::VOP2;
124
else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
125
base_format = Format::VOPC;
126
else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {
127
if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
128
instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
129
instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
130
instr->opcode == aco_opcode::v_interp_p2_f16) {
131
/* v_interp_*_fp16 are considered VINTRP by the compiler but
132
* they are emitted as VOP3.
133
*/
134
base_format = Format::VOP3;
135
} else {
136
base_format = Format::VINTRP;
137
}
138
}
139
check(base_format == instr_info.format[(int)instr->opcode],
140
"Wrong base format for instruction", instr.get());
141
142
/* check VOP3 modifiers */
143
if (instr->isVOP3() && instr->format != Format::VOP3) {
144
check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
145
base_format == Format::VOPC || base_format == Format::VINTRP,
146
"Format cannot have VOP3/VOP3B applied", instr.get());
147
}
148
149
/* check SDWA */
150
if (instr->isSDWA()) {
151
check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
152
base_format == Format::VOPC,
153
"Format cannot have SDWA applied", instr.get());
154
155
check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());
156
157
SDWA_instruction& sdwa = instr->sdwa();
158
check(sdwa.omod == 0 || program->chip_class >= GFX9,
159
"SDWA omod only supported on GFX9+", instr.get());
160
if (base_format == Format::VOPC) {
161
check(sdwa.clamp == false || program->chip_class == GFX8,
162
"SDWA VOPC clamp only supported on GFX8", instr.get());
163
check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
164
program->chip_class >= GFX9,
165
"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
166
}
167
168
if (instr->operands.size() >= 3) {
169
check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
170
"3rd operand must be fixed to vcc with SDWA", instr.get());
171
}
172
if (instr->definitions.size() >= 2) {
173
check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
174
"2nd definition must be fixed to vcc with SDWA", instr.get());
175
}
176
177
const bool sdwa_opcodes =
178
instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
179
instr->opcode != aco_opcode::v_fmamk_f32 &&
180
instr->opcode != aco_opcode::v_fmaak_f32 &&
181
instr->opcode != aco_opcode::v_fmamk_f16 &&
182
instr->opcode != aco_opcode::v_fmaak_f16 &&
183
instr->opcode != aco_opcode::v_madmk_f32 &&
184
instr->opcode != aco_opcode::v_madak_f32 &&
185
instr->opcode != aco_opcode::v_madmk_f16 &&
186
instr->opcode != aco_opcode::v_madak_f16 &&
187
instr->opcode != aco_opcode::v_readfirstlane_b32 &&
188
instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
189
190
const bool feature_mac =
191
program->chip_class == GFX8 &&
192
(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
193
194
check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
195
196
if (instr->definitions[0].regClass().is_subdword())
197
check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()),
198
"Unexpected SDWA sel for sub-dword definition", instr.get());
199
}
200
201
/* check opsel */
202
if (instr->isVOP3()) {
203
VOP3_instruction& vop3 = instr->vop3();
204
check(vop3.opsel == 0 || program->chip_class >= GFX9,
205
"Opsel is only supported on GFX9+", instr.get());
206
207
for (unsigned i = 0; i < 3; i++) {
208
if (i >= instr->operands.size() ||
209
(instr->operands[i].hasRegClass() &&
210
instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
211
check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());
212
}
213
if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
214
check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",
215
instr.get());
216
}
217
218
/* check for undefs */
219
for (unsigned i = 0; i < instr->operands.size(); i++) {
220
if (instr->operands[i].isUndefined()) {
221
bool flat = instr->isFlatLike();
222
bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
223
instr->opcode == aco_opcode::p_create_vector ||
224
(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
225
((instr->isMUBUF() || instr->isMTBUF()) && i == 1);
226
check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
227
} else {
228
check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
229
instr->operands[i].isConstant(),
230
"Uninitialized Operand", instr.get());
231
}
232
}
233
234
/* check subdword definitions */
235
for (unsigned i = 0; i < instr->definitions.size(); i++) {
236
if (instr->definitions[i].regClass().is_subdword())
237
check(instr->isPseudo() || instr->definitions[i].bytes() <= 4,
238
"Only Pseudo instructions can write subdword registers larger than 4 bytes",
239
instr.get());
240
}
241
242
if (instr->isSALU() || instr->isVALU()) {
243
/* check literals */
244
Operand literal(s1);
245
for (unsigned i = 0; i < instr->operands.size(); i++) {
246
Operand op = instr->operands[i];
247
if (!op.isLiteral())
248
continue;
249
250
check(!instr->isDPP() && !instr->isSDWA() &&
251
(!instr->isVOP3() || program->chip_class >= GFX10) &&
252
(!instr->isVOP3P() || program->chip_class >= GFX10),
253
"Literal applied on wrong instruction format", instr.get());
254
255
check(literal.isUndefined() || (literal.size() == op.size() &&
256
literal.constantValue() == op.constantValue()),
257
"Only 1 Literal allowed", instr.get());
258
literal = op;
259
check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
260
"Wrong source position for Literal argument", instr.get());
261
}
262
263
/* check num sgprs for VALU */
264
if (instr->isVALU()) {
265
bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||
266
instr->opcode == aco_opcode::v_lshrrev_b64 ||
267
instr->opcode == aco_opcode::v_ashrrev_i64;
268
unsigned const_bus_limit = 1;
269
if (program->chip_class >= GFX10 && !is_shift64)
270
const_bus_limit = 2;
271
272
uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;
273
if (instr->isSDWA())
274
scalar_mask = program->chip_class >= GFX9 ? 0x7 : 0x4;
275
else if (instr->isDPP())
276
scalar_mask = 0x0;
277
278
if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
279
instr->opcode == aco_opcode::v_readlane_b32 ||
280
instr->opcode == aco_opcode::v_readlane_b32_e64) {
281
check(instr->definitions[0].getTemp().type() == RegType::sgpr,
282
"Wrong Definition type for VALU instruction", instr.get());
283
} else {
284
check(instr->definitions[0].getTemp().type() == RegType::vgpr,
285
"Wrong Definition type for VALU instruction", instr.get());
286
}
287
288
unsigned num_sgprs = 0;
289
unsigned sgpr[] = {0, 0};
290
for (unsigned i = 0; i < instr->operands.size(); i++) {
291
Operand op = instr->operands[i];
292
if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
293
instr->opcode == aco_opcode::v_readlane_b32 ||
294
instr->opcode == aco_opcode::v_readlane_b32_e64) {
295
check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
296
op.isConstant(),
297
"Must be a SGPR or a constant", instr.get());
298
check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
299
op.bytes() <= 4),
300
"Wrong Operand type for VALU instruction", instr.get());
301
continue;
302
}
303
if (instr->opcode == aco_opcode::v_permlane16_b32 ||
304
instr->opcode == aco_opcode::v_permlanex16_b32) {
305
check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),
306
"Operand 0 of v_permlane must be VGPR", instr.get());
307
check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
308
op.isConstant(),
309
"Lane select operands of v_permlane must be SGPR or constant",
310
instr.get());
311
}
312
313
if (instr->opcode == aco_opcode::v_writelane_b32 ||
314
instr->opcode == aco_opcode::v_writelane_b32_e64) {
315
check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&
316
op.bytes() <= 4),
317
"Wrong Operand type for VALU instruction", instr.get());
318
check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||
319
op.isConstant(),
320
"Must be a SGPR or a constant", instr.get());
321
continue;
322
}
323
if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {
324
check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
325
instr.get());
326
327
if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
328
if (num_sgprs < 2)
329
sgpr[num_sgprs++] = op.tempId();
330
}
331
}
332
333
if (op.isConstant() && !op.isLiteral())
334
check(scalar_mask & (1 << i), "Wrong source position for constant argument",
335
instr.get());
336
}
337
check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
338
"Too many SGPRs/literals", instr.get());
339
}
340
341
if (instr->isSOP1() || instr->isSOP2()) {
342
check(instr->definitions[0].getTemp().type() == RegType::sgpr,
343
"Wrong Definition type for SALU instruction", instr.get());
344
for (const Operand& op : instr->operands) {
345
check(op.isConstant() || op.regClass().type() <= RegType::sgpr,
346
"Wrong Operand type for SALU instruction", instr.get());
347
}
348
}
349
}
350
351
switch (instr->format) {
352
case Format::PSEUDO: {
353
if (instr->opcode == aco_opcode::p_create_vector) {
354
unsigned size = 0;
355
for (const Operand& op : instr->operands) {
356
check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
357
size += op.bytes();
358
}
359
check(size == instr->definitions[0].bytes(),
360
"Definition size does not match operand sizes", instr.get());
361
if (instr->definitions[0].getTemp().type() == RegType::sgpr) {
362
for (const Operand& op : instr->operands) {
363
check(op.isConstant() || op.regClass().type() == RegType::sgpr,
364
"Wrong Operand type for scalar vector", instr.get());
365
}
366
}
367
} else if (instr->opcode == aco_opcode::p_extract_vector) {
368
check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),
369
"Wrong Operand types", instr.get());
370
check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
371
instr->operands[0].bytes(),
372
"Index out of range", instr.get());
373
check(instr->definitions[0].getTemp().type() == RegType::vgpr ||
374
instr->operands[0].regClass().type() == RegType::sgpr,
375
"Cannot extract SGPR value from VGPR vector", instr.get());
376
check(program->chip_class >= GFX9 ||
377
!instr->definitions[0].regClass().is_subdword() ||
378
instr->operands[0].regClass().type() == RegType::vgpr,
379
"Cannot extract subdword from SGPR before GFX9+", instr.get());
380
} else if (instr->opcode == aco_opcode::p_split_vector) {
381
check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());
382
unsigned size = 0;
383
for (const Definition& def : instr->definitions) {
384
size += def.bytes();
385
}
386
check(size == instr->operands[0].bytes(),
387
"Operand size does not match definition sizes", instr.get());
388
if (instr->operands[0].getTemp().type() == RegType::vgpr) {
389
for (const Definition& def : instr->definitions)
390
check(def.regClass().type() == RegType::vgpr,
391
"Wrong Definition type for VGPR split_vector", instr.get());
392
} else {
393
for (const Definition& def : instr->definitions)
394
check(program->chip_class >= GFX9 || !def.regClass().is_subdword(),
395
"Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
396
}
397
} else if (instr->opcode == aco_opcode::p_parallelcopy) {
398
check(instr->definitions.size() == instr->operands.size(),
399
"Number of Operands does not match number of Definitions", instr.get());
400
for (unsigned i = 0; i < instr->operands.size(); i++) {
401
check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
402
"Operand and Definition size must match", instr.get());
403
if (instr->operands[i].isTemp())
404
check((instr->definitions[i].getTemp().type() ==
405
instr->operands[i].regClass().type()) ||
406
(instr->definitions[i].getTemp().type() == RegType::vgpr &&
407
instr->operands[i].regClass().type() == RegType::sgpr),
408
"Operand and Definition types do not match", instr.get());
409
}
410
} else if (instr->opcode == aco_opcode::p_phi) {
411
check(instr->operands.size() == block.logical_preds.size(),
412
"Number of Operands does not match number of predecessors", instr.get());
413
check(instr->definitions[0].getTemp().type() == RegType::vgpr,
414
"Logical Phi Definition must be vgpr", instr.get());
415
for (const Operand& op : instr->operands)
416
check(instr->definitions[0].size() == op.size(),
417
"Operand sizes must match Definition size", instr.get());
418
} else if (instr->opcode == aco_opcode::p_linear_phi) {
419
for (const Operand& op : instr->operands) {
420
check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
421
instr.get());
422
check(instr->definitions[0].size() == op.size(),
423
"Operand sizes must match Definition size", instr.get());
424
}
425
check(instr->operands.size() == block.linear_preds.size(),
426
"Number of Operands does not match number of predecessors", instr.get());
427
} else if (instr->opcode == aco_opcode::p_extract ||
428
instr->opcode == aco_opcode::p_insert) {
429
check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());
430
check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
431
if (instr->opcode == aco_opcode::p_extract)
432
check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
433
instr.get());
434
435
check(instr->definitions[0].getTemp().type() != RegType::sgpr ||
436
instr->operands[0].getTemp().type() == RegType::sgpr,
437
"Can't extract/insert VGPR to SGPR", instr.get());
438
439
if (instr->operands[0].getTemp().type() == RegType::vgpr)
440
check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
441
"Sizes of operand and definition must match", instr.get());
442
443
if (instr->definitions[0].getTemp().type() == RegType::sgpr)
444
check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
445
instr->definitions[1].physReg() == scc,
446
"SGPR extract/insert needs a SCC definition", instr.get());
447
448
check(instr->operands[2].constantEquals(8) || instr->operands[2].constantEquals(16),
449
"Size must be 8 or 16", instr.get());
450
check(instr->operands[2].constantValue() < instr->operands[0].getTemp().bytes() * 8u,
451
"Size must be smaller than source", instr.get());
452
453
unsigned comp =
454
instr->operands[0].bytes() * 8u / MAX2(instr->operands[2].constantValue(), 1);
455
check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
456
instr.get());
457
}
458
break;
459
}
460
case Format::PSEUDO_REDUCTION: {
461
for (const Operand& op : instr->operands)
462
check(op.regClass().type() == RegType::vgpr,
463
"All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
464
instr.get());
465
466
if (instr->opcode == aco_opcode::p_reduce &&
467
instr->reduction().cluster_size == program->wave_size)
468
check(instr->definitions[0].regClass().type() == RegType::sgpr ||
469
program->wave_size == 32,
470
"The result of unclustered reductions must go into an SGPR.", instr.get());
471
else
472
check(instr->definitions[0].regClass().type() == RegType::vgpr,
473
"The result of scans and clustered reductions must go into a VGPR.",
474
instr.get());
475
476
break;
477
}
478
case Format::SMEM: {
479
if (instr->operands.size() >= 1)
480
check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||
481
(instr->operands[0].isTemp() &&
482
instr->operands[0].regClass().type() == RegType::sgpr),
483
"SMEM operands must be sgpr", instr.get());
484
if (instr->operands.size() >= 2)
485
check(instr->operands[1].isConstant() ||
486
(instr->operands[1].isTemp() &&
487
instr->operands[1].regClass().type() == RegType::sgpr),
488
"SMEM offset must be constant or sgpr", instr.get());
489
if (!instr->definitions.empty())
490
check(instr->definitions[0].getTemp().type() == RegType::sgpr,
491
"SMEM result must be sgpr", instr.get());
492
break;
493
}
494
case Format::MTBUF:
495
case Format::MUBUF: {
496
check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
497
instr.get());
498
check(instr->operands[1].hasRegClass() &&
499
instr->operands[1].regClass().type() == RegType::vgpr,
500
"VADDR must be in vgpr for VMEM instructions", instr.get());
501
check(
502
instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,
503
"VMEM resource constant must be sgpr", instr.get());
504
check(instr->operands.size() < 4 ||
505
(instr->operands[3].isTemp() &&
506
instr->operands[3].regClass().type() == RegType::vgpr),
507
"VMEM write data must be vgpr", instr.get());
508
break;
509
}
510
case Format::MIMG: {
511
check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
512
instr.get());
513
check(instr->operands[0].hasRegClass() &&
514
(instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
515
"MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
516
if (instr->operands[1].hasRegClass())
517
check(instr->operands[1].regClass() == s4,
518
"MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
519
if (!instr->operands[2].isUndefined()) {
520
bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
521
instr->opcode == aco_opcode::image_atomic_fcmpswap;
522
check(instr->definitions.empty() ||
523
(instr->definitions[0].regClass() == instr->operands[2].regClass() ||
524
is_cmpswap),
525
"MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
526
"TFE/LWE loads",
527
instr.get());
528
}
529
check(instr->operands.size() == 4 || program->chip_class >= GFX10,
530
"NSA is only supported on GFX10+", instr.get());
531
for (unsigned i = 3; i < instr->operands.size(); i++) {
532
if (instr->operands.size() == 4) {
533
check(instr->operands[i].hasRegClass() &&
534
instr->operands[i].regClass().type() == RegType::vgpr,
535
"MIMG operands[3] (VADDR) must be VGPR", instr.get());
536
} else {
537
check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",
538
instr.get());
539
}
540
}
541
check(instr->definitions.empty() ||
542
(instr->definitions[0].isTemp() &&
543
instr->definitions[0].regClass().type() == RegType::vgpr),
544
"MIMG definitions[0] (VDATA) must be VGPR", instr.get());
545
break;
546
}
547
case Format::DS: {
548
for (const Operand& op : instr->operands) {
549
check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,
550
"Only VGPRs are valid DS instruction operands", instr.get());
551
}
552
if (!instr->definitions.empty())
553
check(instr->definitions[0].getTemp().type() == RegType::vgpr,
554
"DS instruction must return VGPR", instr.get());
555
break;
556
}
557
case Format::EXP: {
558
for (unsigned i = 0; i < 4; i++)
559
check(instr->operands[i].hasRegClass() &&
560
instr->operands[i].regClass().type() == RegType::vgpr,
561
"Only VGPRs are valid Export arguments", instr.get());
562
break;
563
}
564
case Format::FLAT:
565
check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
566
instr.get());
567
FALLTHROUGH;
568
case Format::GLOBAL:
569
case Format::SCRATCH: {
570
check(
571
instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,
572
"FLAT/GLOBAL/SCRATCH address must be vgpr", instr.get());
573
check(instr->operands[1].hasRegClass() &&
574
instr->operands[1].regClass().type() == RegType::sgpr,
575
"FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
576
if (!instr->definitions.empty())
577
check(instr->definitions[0].getTemp().type() == RegType::vgpr,
578
"FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
579
else
580
check(instr->operands[2].regClass().type() == RegType::vgpr,
581
"FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
582
break;
583
}
584
default: break;
585
}
586
}
587
}
588
589
/* validate CFG */
590
for (unsigned i = 0; i < program->blocks.size(); i++) {
591
Block& block = program->blocks[i];
592
check_block(block.index == i, "block.index must match actual index", &block);
593
594
/* predecessors/successors should be sorted */
595
for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
596
check_block(block.linear_preds[j] < block.linear_preds[j + 1],
597
"linear predecessors must be sorted", &block);
598
for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
599
check_block(block.logical_preds[j] < block.logical_preds[j + 1],
600
"logical predecessors must be sorted", &block);
601
for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
602
check_block(block.linear_succs[j] < block.linear_succs[j + 1],
603
"linear successors must be sorted", &block);
604
for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
605
check_block(block.logical_succs[j] < block.logical_succs[j + 1],
606
"logical successors must be sorted", &block);
607
608
/* critical edges are not allowed */
609
if (block.linear_preds.size() > 1) {
610
for (unsigned pred : block.linear_preds)
611
check_block(program->blocks[pred].linear_succs.size() == 1,
612
"linear critical edges are not allowed", &program->blocks[pred]);
613
for (unsigned pred : block.logical_preds)
614
check_block(program->blocks[pred].logical_succs.size() == 1,
615
"logical critical edges are not allowed", &program->blocks[pred]);
616
}
617
}
618
619
return is_valid;
620
}
621
622
/* RA validation */
623
namespace {
624
625
struct Location {
626
Location() : block(NULL), instr(NULL) {}
627
628
Block* block;
629
Instruction* instr; // NULL if it's the block's live-in
630
};
631
632
struct Assignment {
633
Location defloc;
634
Location firstloc;
635
PhysReg reg;
636
};
637
638
bool
639
ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
640
{
641
va_list args;
642
va_start(args, fmt);
643
char msg[1024];
644
vsprintf(msg, fmt, args);
645
va_end(args);
646
647
char* out;
648
size_t outsize;
649
struct u_memstream mem;
650
u_memstream_open(&mem, &out, &outsize);
651
FILE* const memf = u_memstream_get(&mem);
652
653
fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
654
if (loc.instr) {
655
aco_print_instr(loc.instr, memf);
656
fprintf(memf, "\n%s", msg);
657
} else {
658
fprintf(memf, "%s", msg);
659
}
660
if (loc2.block) {
661
fprintf(memf, " in BB%d:\n", loc2.block->index);
662
aco_print_instr(loc2.instr, memf);
663
}
664
fprintf(memf, "\n\n");
665
u_memstream_close(&mem);
666
667
aco_err(program, "%s", out);
668
free(out);
669
670
return true;
671
}
672
673
bool
674
validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, unsigned index)
675
{
676
Operand op = instr->operands[index];
677
unsigned byte = op.physReg().byte();
678
679
if (instr->opcode == aco_opcode::p_as_uniform)
680
return byte == 0;
681
if (instr->isPseudo() && chip >= GFX8)
682
return true;
683
if (instr->isSDWA()) {
684
unsigned sel = instr->sdwa().sel[index] & sdwa_asuint;
685
return (sel & sdwa_isra) && (sel & sdwa_rasize) <= op.bytes();
686
}
687
if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))
688
return true;
689
690
switch (instr->opcode) {
691
case aco_opcode::v_cvt_f32_ubyte1:
692
if (byte == 1)
693
return true;
694
break;
695
case aco_opcode::v_cvt_f32_ubyte2:
696
if (byte == 2)
697
return true;
698
break;
699
case aco_opcode::v_cvt_f32_ubyte3:
700
if (byte == 3)
701
return true;
702
break;
703
case aco_opcode::ds_write_b8_d16_hi:
704
case aco_opcode::ds_write_b16_d16_hi:
705
if (byte == 2 && index == 1)
706
return true;
707
break;
708
case aco_opcode::buffer_store_byte_d16_hi:
709
case aco_opcode::buffer_store_short_d16_hi:
710
if (byte == 2 && index == 3)
711
return true;
712
break;
713
case aco_opcode::flat_store_byte_d16_hi:
714
case aco_opcode::flat_store_short_d16_hi:
715
case aco_opcode::scratch_store_byte_d16_hi:
716
case aco_opcode::scratch_store_short_d16_hi:
717
case aco_opcode::global_store_byte_d16_hi:
718
case aco_opcode::global_store_short_d16_hi:
719
if (byte == 2 && index == 2)
720
return true;
721
break;
722
default: break;
723
}
724
725
return byte == 0;
726
}
727
728
bool
729
validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)
730
{
731
Definition def = instr->definitions[0];
732
unsigned byte = def.physReg().byte();
733
734
if (instr->isPseudo() && chip >= GFX8)
735
return true;
736
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
737
return true;
738
if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))
739
return true;
740
741
switch (instr->opcode) {
742
case aco_opcode::buffer_load_ubyte_d16_hi:
743
case aco_opcode::buffer_load_short_d16_hi:
744
case aco_opcode::flat_load_ubyte_d16_hi:
745
case aco_opcode::flat_load_short_d16_hi:
746
case aco_opcode::scratch_load_ubyte_d16_hi:
747
case aco_opcode::scratch_load_short_d16_hi:
748
case aco_opcode::global_load_ubyte_d16_hi:
749
case aco_opcode::global_load_short_d16_hi:
750
case aco_opcode::ds_read_u8_d16_hi:
751
case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
752
default: break;
753
}
754
755
return byte == 0;
756
}
757
758
unsigned
759
get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
760
{
761
chip_class chip = program->chip_class;
762
Definition def = instr->definitions[index];
763
764
if (instr->isPseudo())
765
return chip >= GFX8 ? def.bytes() : def.size() * 4u;
766
if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))
767
return def.bytes();
768
769
switch (instr->opcode) {
770
case aco_opcode::buffer_load_ubyte_d16:
771
case aco_opcode::buffer_load_short_d16:
772
case aco_opcode::flat_load_ubyte_d16:
773
case aco_opcode::flat_load_short_d16:
774
case aco_opcode::scratch_load_ubyte_d16:
775
case aco_opcode::scratch_load_short_d16:
776
case aco_opcode::global_load_ubyte_d16:
777
case aco_opcode::global_load_short_d16:
778
case aco_opcode::ds_read_u8_d16:
779
case aco_opcode::ds_read_u16_d16:
780
case aco_opcode::buffer_load_ubyte_d16_hi:
781
case aco_opcode::buffer_load_short_d16_hi:
782
case aco_opcode::flat_load_ubyte_d16_hi:
783
case aco_opcode::flat_load_short_d16_hi:
784
case aco_opcode::scratch_load_ubyte_d16_hi:
785
case aco_opcode::scratch_load_short_d16_hi:
786
case aco_opcode::global_load_ubyte_d16_hi:
787
case aco_opcode::global_load_short_d16_hi:
788
case aco_opcode::ds_read_u8_d16_hi:
789
case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
790
case aco_opcode::v_mad_f16:
791
case aco_opcode::v_mad_u16:
792
case aco_opcode::v_mad_i16:
793
case aco_opcode::v_fma_f16:
794
case aco_opcode::v_div_fixup_f16:
795
case aco_opcode::v_interp_p2_f16:
796
if (chip >= GFX9)
797
return 2;
798
break;
799
default: break;
800
}
801
802
return MAX2(chip >= GFX10 ? def.bytes() : 4,
803
instr_info.definition_size[(int)instr->opcode] / 8u);
804
}
805
806
} /* end namespace */
807
808
bool
809
validate_ra(Program* program)
810
{
811
if (!(debug_flags & DEBUG_VALIDATE_RA))
812
return false;
813
814
bool err = false;
815
aco::live live_vars = aco::live_var_analysis(program);
816
std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
817
uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
818
819
std::map<unsigned, Assignment> assignments;
820
for (Block& block : program->blocks) {
821
Location loc;
822
loc.block = &block;
823
for (aco_ptr<Instruction>& instr : block.instructions) {
824
if (instr->opcode == aco_opcode::p_phi) {
825
for (unsigned i = 0; i < instr->operands.size(); i++) {
826
if (instr->operands[i].isTemp() &&
827
instr->operands[i].getTemp().type() == RegType::sgpr &&
828
instr->operands[i].isFirstKill())
829
phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
830
}
831
}
832
833
loc.instr = instr.get();
834
for (unsigned i = 0; i < instr->operands.size(); i++) {
835
Operand& op = instr->operands[i];
836
if (!op.isTemp())
837
continue;
838
if (!op.isFixed())
839
err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
840
if (assignments.count(op.tempId()) && assignments[op.tempId()].reg != op.physReg())
841
err |=
842
ra_fail(program, loc, assignments.at(op.tempId()).firstloc,
843
"Operand %d has an inconsistent register assignment with instruction", i);
844
if ((op.getTemp().type() == RegType::vgpr &&
845
op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
846
(op.getTemp().type() == RegType::sgpr &&
847
op.physReg() + op.size() > program->config->num_sgprs &&
848
op.physReg() < sgpr_limit))
849
err |= ra_fail(program, loc, assignments.at(op.tempId()).firstloc,
850
"Operand %d has an out-of-bounds register assignment", i);
851
if (op.physReg() == vcc && !program->needs_vcc)
852
err |= ra_fail(program, loc, Location(),
853
"Operand %d fixed to vcc but needs_vcc=false", i);
854
if (op.regClass().is_subdword() &&
855
!validate_subdword_operand(program->chip_class, instr, i))
856
err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
857
if (!assignments[op.tempId()].firstloc.block)
858
assignments[op.tempId()].firstloc = loc;
859
if (!assignments[op.tempId()].defloc.block)
860
assignments[op.tempId()].reg = op.physReg();
861
}
862
863
for (unsigned i = 0; i < instr->definitions.size(); i++) {
864
Definition& def = instr->definitions[i];
865
if (!def.isTemp())
866
continue;
867
if (!def.isFixed())
868
err |=
869
ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
870
if (assignments[def.tempId()].defloc.block)
871
err |= ra_fail(program, loc, assignments.at(def.tempId()).defloc,
872
"Temporary %%%d also defined by instruction", def.tempId());
873
if ((def.getTemp().type() == RegType::vgpr &&
874
def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
875
(def.getTemp().type() == RegType::sgpr &&
876
def.physReg() + def.size() > program->config->num_sgprs &&
877
def.physReg() < sgpr_limit))
878
err |= ra_fail(program, loc, assignments.at(def.tempId()).firstloc,
879
"Definition %d has an out-of-bounds register assignment", i);
880
if (def.physReg() == vcc && !program->needs_vcc)
881
err |= ra_fail(program, loc, Location(),
882
"Definition %d fixed to vcc but needs_vcc=false", i);
883
if (def.regClass().is_subdword() &&
884
!validate_subdword_definition(program->chip_class, instr))
885
err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
886
if (!assignments[def.tempId()].firstloc.block)
887
assignments[def.tempId()].firstloc = loc;
888
assignments[def.tempId()].defloc = loc;
889
assignments[def.tempId()].reg = def.physReg();
890
}
891
}
892
}
893
894
for (Block& block : program->blocks) {
895
Location loc;
896
loc.block = &block;
897
898
std::array<unsigned, 2048> regs; /* register file in bytes */
899
regs.fill(0);
900
901
std::set<Temp> live;
902
for (unsigned id : live_vars.live_out[block.index])
903
live.insert(Temp(id, program->temp_rc[id]));
904
/* remove killed p_phi sgpr operands */
905
for (Temp tmp : phi_sgpr_ops[block.index])
906
live.erase(tmp);
907
908
/* check live out */
909
for (Temp tmp : live) {
910
PhysReg reg = assignments.at(tmp.id()).reg;
911
for (unsigned i = 0; i < tmp.bytes(); i++) {
912
if (regs[reg.reg_b + i]) {
913
err |= ra_fail(program, loc, Location(),
914
"Assignment of element %d of %%%d already taken by %%%d in live-out",
915
i, tmp.id(), regs[reg.reg_b + i]);
916
}
917
regs[reg.reg_b + i] = tmp.id();
918
}
919
}
920
regs.fill(0);
921
922
for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {
923
aco_ptr<Instruction>& instr = *it;
924
925
/* check killed p_phi sgpr operands */
926
if (instr->opcode == aco_opcode::p_logical_end) {
927
for (Temp tmp : phi_sgpr_ops[block.index]) {
928
PhysReg reg = assignments.at(tmp.id()).reg;
929
for (unsigned i = 0; i < tmp.bytes(); i++) {
930
if (regs[reg.reg_b + i])
931
err |= ra_fail(
932
program, loc, Location(),
933
"Assignment of element %d of %%%d already taken by %%%d in live-out", i,
934
tmp.id(), regs[reg.reg_b + i]);
935
}
936
live.emplace(tmp);
937
}
938
}
939
940
for (const Definition& def : instr->definitions) {
941
if (!def.isTemp())
942
continue;
943
live.erase(def.getTemp());
944
}
945
946
/* don't count phi operands as live-in, since they are actually
947
* killed when they are copied at the predecessor */
948
if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
949
for (const Operand& op : instr->operands) {
950
if (!op.isTemp())
951
continue;
952
live.insert(op.getTemp());
953
}
954
}
955
}
956
957
for (Temp tmp : live) {
958
PhysReg reg = assignments.at(tmp.id()).reg;
959
for (unsigned i = 0; i < tmp.bytes(); i++)
960
regs[reg.reg_b + i] = tmp.id();
961
}
962
963
for (aco_ptr<Instruction>& instr : block.instructions) {
964
loc.instr = instr.get();
965
966
/* remove killed p_phi operands from regs */
967
if (instr->opcode == aco_opcode::p_logical_end) {
968
for (Temp tmp : phi_sgpr_ops[block.index]) {
969
PhysReg reg = assignments.at(tmp.id()).reg;
970
for (unsigned i = 0; i < tmp.bytes(); i++)
971
regs[reg.reg_b + i] = 0;
972
}
973
}
974
975
if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
976
for (const Operand& op : instr->operands) {
977
if (!op.isTemp())
978
continue;
979
if (op.isFirstKillBeforeDef()) {
980
for (unsigned j = 0; j < op.getTemp().bytes(); j++)
981
regs[op.physReg().reg_b + j] = 0;
982
}
983
}
984
}
985
986
for (unsigned i = 0; i < instr->definitions.size(); i++) {
987
Definition& def = instr->definitions[i];
988
if (!def.isTemp())
989
continue;
990
Temp tmp = def.getTemp();
991
PhysReg reg = assignments.at(tmp.id()).reg;
992
for (unsigned j = 0; j < tmp.bytes(); j++) {
993
if (regs[reg.reg_b + j])
994
err |= ra_fail(
995
program, loc, assignments.at(regs[reg.reg_b + j]).defloc,
996
"Assignment of element %d of %%%d already taken by %%%d from instruction", i,
997
tmp.id(), regs[reg.reg_b + j]);
998
regs[reg.reg_b + j] = tmp.id();
999
}
1000
if (def.regClass().is_subdword() && def.bytes() < 4) {
1001
unsigned written = get_subdword_bytes_written(program, instr, i);
1002
/* If written=4, the instruction still might write the upper half. In that case, it's
1003
* the lower half that isn't preserved */
1004
for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
1005
unsigned written_reg = reg.reg() * 4u + j;
1006
if (regs[written_reg] && regs[written_reg] != def.tempId())
1007
err |= ra_fail(program, loc, assignments.at(regs[written_reg]).defloc,
1008
"Assignment of element %d of %%%d overwrites the full register "
1009
"taken by %%%d from instruction",
1010
i, tmp.id(), regs[written_reg]);
1011
}
1012
}
1013
}
1014
1015
for (const Definition& def : instr->definitions) {
1016
if (!def.isTemp())
1017
continue;
1018
if (def.isKill()) {
1019
for (unsigned j = 0; j < def.getTemp().bytes(); j++)
1020
regs[def.physReg().reg_b + j] = 0;
1021
}
1022
}
1023
1024
if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1025
for (const Operand& op : instr->operands) {
1026
if (!op.isTemp())
1027
continue;
1028
if (op.isLateKill() && op.isFirstKill()) {
1029
for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1030
regs[op.physReg().reg_b + j] = 0;
1031
}
1032
}
1033
}
1034
}
1035
}
1036
1037
return err;
1038
}
1039
} // namespace aco
1040
1041