Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/compiler/aco_ir.h
4550 views
1
/*
2
* Copyright © 2018 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
*/
24
25
#ifndef ACO_IR_H
26
#define ACO_IR_H
27
28
#include "aco_opcodes.h"
29
#include "aco_util.h"
30
31
#include "vulkan/radv_shader.h"
32
33
#include "nir.h"
34
35
#include <bitset>
36
#include <memory>
37
#include <vector>
38
39
struct radv_shader_args;
40
struct radv_shader_info;
41
42
namespace aco {
43
44
extern uint64_t debug_flags;
45
46
enum {
47
DEBUG_VALIDATE_IR = 0x1,
48
DEBUG_VALIDATE_RA = 0x2,
49
DEBUG_PERFWARN = 0x4,
50
DEBUG_FORCE_WAITCNT = 0x8,
51
DEBUG_NO_VN = 0x10,
52
DEBUG_NO_OPT = 0x20,
53
DEBUG_NO_SCHED = 0x40,
54
DEBUG_PERF_INFO = 0x80,
55
DEBUG_LIVE_INFO = 0x100,
56
};
57
58
/**
59
* Representation of the instruction's microcode encoding format
60
* Note: Some Vector ALU Formats can be combined, such that:
61
* - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
62
* - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
63
* - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
64
*
65
* (*) The same is applicable for VOP1 and VOPC instructions.
66
*/
67
enum class Format : std::uint16_t {
68
/* Pseudo Instruction Format */
69
PSEUDO = 0,
70
/* Scalar ALU & Control Formats */
71
SOP1 = 1,
72
SOP2 = 2,
73
SOPK = 3,
74
SOPP = 4,
75
SOPC = 5,
76
/* Scalar Memory Format */
77
SMEM = 6,
78
/* LDS/GDS Format */
79
DS = 8,
80
/* Vector Memory Buffer Formats */
81
MTBUF = 9,
82
MUBUF = 10,
83
/* Vector Memory Image Format */
84
MIMG = 11,
85
/* Export Format */
86
EXP = 12,
87
/* Flat Formats */
88
FLAT = 13,
89
GLOBAL = 14,
90
SCRATCH = 15,
91
92
PSEUDO_BRANCH = 16,
93
PSEUDO_BARRIER = 17,
94
PSEUDO_REDUCTION = 18,
95
96
/* Vector ALU Formats */
97
VOP3P = 19,
98
VOP1 = 1 << 8,
99
VOP2 = 1 << 9,
100
VOPC = 1 << 10,
101
VOP3 = 1 << 11,
102
/* Vector Parameter Interpolation Format */
103
VINTRP = 1 << 12,
104
DPP = 1 << 13,
105
SDWA = 1 << 14,
106
};
107
108
enum class instr_class : uint8_t {
109
valu32 = 0,
110
valu_convert32 = 1,
111
valu64 = 2,
112
valu_quarter_rate32 = 3,
113
valu_fma = 4,
114
valu_transcendental32 = 5,
115
valu_double = 6,
116
valu_double_add = 7,
117
valu_double_convert = 8,
118
valu_double_transcendental = 9,
119
salu = 10,
120
smem = 11,
121
barrier = 12,
122
branch = 13,
123
sendmsg = 14,
124
ds = 15,
125
exp = 16,
126
vmem = 17,
127
waitcnt = 18,
128
other = 19,
129
count,
130
};
131
132
enum storage_class : uint8_t {
133
storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */
134
storage_buffer = 0x1, /* SSBOs and global memory */
135
storage_atomic_counter = 0x2, /* not used for Vulkan */
136
storage_image = 0x4,
137
storage_shared = 0x8, /* or TCS output */
138
storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
139
storage_scratch = 0x20,
140
storage_vgpr_spill = 0x40,
141
storage_count = 8,
142
};
143
144
enum memory_semantics : uint8_t {
145
semantic_none = 0x0,
146
/* for loads: don't move any access after this load to before this load (even other loads)
147
* for barriers: don't move any access after the barrier to before any
148
* atomics/control_barriers/sendmsg_gs_done before the barrier */
149
semantic_acquire = 0x1,
150
/* for stores: don't move any access before this store to after this store
151
* for barriers: don't move any access before the barrier to after any
152
* atomics/control_barriers/sendmsg_gs_done after the barrier */
153
semantic_release = 0x2,
154
155
/* the rest are for load/stores/atomics only */
156
/* cannot be DCE'd or CSE'd */
157
semantic_volatile = 0x4,
158
/* does not interact with barriers and assumes this lane is the only lane
159
* accessing this memory */
160
semantic_private = 0x8,
161
/* this operation can be reordered around operations of the same storage.
162
* says nothing about barriers */
163
semantic_can_reorder = 0x10,
164
/* this is a atomic instruction (may only read or write memory) */
165
semantic_atomic = 0x20,
166
/* this is instruction both reads and writes memory */
167
semantic_rmw = 0x40,
168
169
semantic_acqrel = semantic_acquire | semantic_release,
170
semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
171
};
172
173
enum sync_scope : uint8_t {
174
scope_invocation = 0,
175
scope_subgroup = 1,
176
scope_workgroup = 2,
177
scope_queuefamily = 3,
178
scope_device = 4,
179
};
180
181
struct memory_sync_info {
182
memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
183
memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
184
: storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
185
{}
186
187
storage_class storage : 8;
188
memory_semantics semantics : 8;
189
sync_scope scope : 8;
190
191
bool operator==(const memory_sync_info& rhs) const
192
{
193
return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
194
}
195
196
bool can_reorder() const
197
{
198
if (semantics & semantic_acqrel)
199
return false;
200
/* Also check storage so that zero-initialized memory_sync_info can be
201
* reordered. */
202
return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
203
}
204
};
205
static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
206
207
enum fp_round {
208
fp_round_ne = 0,
209
fp_round_pi = 1,
210
fp_round_ni = 2,
211
fp_round_tz = 3,
212
};
213
214
enum fp_denorm {
215
/* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
216
* v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
217
fp_denorm_flush = 0x0,
218
fp_denorm_keep_in = 0x1,
219
fp_denorm_keep_out = 0x2,
220
fp_denorm_keep = 0x3,
221
};
222
223
struct float_mode {
224
/* matches encoding of the MODE register */
225
union {
226
struct {
227
fp_round round32 : 2;
228
fp_round round16_64 : 2;
229
unsigned denorm32 : 2;
230
unsigned denorm16_64 : 2;
231
};
232
struct {
233
uint8_t round : 4;
234
uint8_t denorm : 4;
235
};
236
uint8_t val = 0;
237
};
238
/* if false, optimizations which may remove infs/nan/-0.0 can be done */
239
bool preserve_signed_zero_inf_nan32 : 1;
240
bool preserve_signed_zero_inf_nan16_64 : 1;
241
/* if false, optimizations which may remove denormal flushing can be done */
242
bool must_flush_denorms32 : 1;
243
bool must_flush_denorms16_64 : 1;
244
bool care_about_round32 : 1;
245
bool care_about_round16_64 : 1;
246
247
/* Returns true if instructions using the mode "other" can safely use the
248
* current one instead. */
249
bool canReplace(float_mode other) const noexcept
250
{
251
return val == other.val &&
252
(preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
253
(preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
254
(must_flush_denorms32 || !other.must_flush_denorms32) &&
255
(must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
256
(care_about_round32 || !other.care_about_round32) &&
257
(care_about_round16_64 || !other.care_about_round16_64);
258
}
259
};
260
261
struct wait_imm {
262
static const uint8_t unset_counter = 0xff;
263
264
uint8_t vm;
265
uint8_t exp;
266
uint8_t lgkm;
267
uint8_t vs;
268
269
wait_imm();
270
wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
271
wait_imm(enum chip_class chip, uint16_t packed);
272
273
uint16_t pack(enum chip_class chip) const;
274
275
bool combine(const wait_imm& other);
276
277
bool empty() const;
278
};
279
280
constexpr Format
281
asVOP3(Format format)
282
{
283
return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
284
};
285
286
constexpr Format
287
asSDWA(Format format)
288
{
289
assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
290
return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
291
}
292
293
enum class RegType {
294
none = 0,
295
sgpr,
296
vgpr,
297
linear_vgpr,
298
};
299
300
struct RegClass {
301
302
enum RC : uint8_t {
303
s1 = 1,
304
s2 = 2,
305
s3 = 3,
306
s4 = 4,
307
s6 = 6,
308
s8 = 8,
309
s16 = 16,
310
v1 = s1 | (1 << 5),
311
v2 = s2 | (1 << 5),
312
v3 = s3 | (1 << 5),
313
v4 = s4 | (1 << 5),
314
v5 = 5 | (1 << 5),
315
v6 = 6 | (1 << 5),
316
v7 = 7 | (1 << 5),
317
v8 = 8 | (1 << 5),
318
/* byte-sized register class */
319
v1b = v1 | (1 << 7),
320
v2b = v2 | (1 << 7),
321
v3b = v3 | (1 << 7),
322
v4b = v4 | (1 << 7),
323
v6b = v6 | (1 << 7),
324
v8b = v8 | (1 << 7),
325
/* these are used for WWM and spills to vgpr */
326
v1_linear = v1 | (1 << 6),
327
v2_linear = v2 | (1 << 6),
328
};
329
330
RegClass() = default;
331
constexpr RegClass(RC rc_) : rc(rc_) {}
332
constexpr RegClass(RegType type, unsigned size)
333
: rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
334
{}
335
336
constexpr operator RC() const { return rc; }
337
explicit operator bool() = delete;
338
339
constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
340
constexpr bool is_subdword() const { return rc & (1 << 7); }
341
constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
342
// TODO: use size() less in favor of bytes()
343
constexpr unsigned size() const { return (bytes() + 3) >> 2; }
344
constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
345
constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
346
constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
347
348
static constexpr RegClass get(RegType type, unsigned bytes)
349
{
350
if (type == RegType::sgpr) {
351
return RegClass(type, DIV_ROUND_UP(bytes, 4u));
352
} else {
353
return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
354
}
355
}
356
357
private:
358
RC rc;
359
};
360
361
/* transitional helper expressions */
362
static constexpr RegClass s1{RegClass::s1};
363
static constexpr RegClass s2{RegClass::s2};
364
static constexpr RegClass s3{RegClass::s3};
365
static constexpr RegClass s4{RegClass::s4};
366
static constexpr RegClass s8{RegClass::s8};
367
static constexpr RegClass s16{RegClass::s16};
368
static constexpr RegClass v1{RegClass::v1};
369
static constexpr RegClass v2{RegClass::v2};
370
static constexpr RegClass v3{RegClass::v3};
371
static constexpr RegClass v4{RegClass::v4};
372
static constexpr RegClass v5{RegClass::v5};
373
static constexpr RegClass v6{RegClass::v6};
374
static constexpr RegClass v7{RegClass::v7};
375
static constexpr RegClass v8{RegClass::v8};
376
static constexpr RegClass v1b{RegClass::v1b};
377
static constexpr RegClass v2b{RegClass::v2b};
378
static constexpr RegClass v3b{RegClass::v3b};
379
static constexpr RegClass v4b{RegClass::v4b};
380
static constexpr RegClass v6b{RegClass::v6b};
381
static constexpr RegClass v8b{RegClass::v8b};
382
383
/**
384
* Temp Class
385
* Each temporary virtual register has a
386
* register class (i.e. size and type)
387
* and SSA id.
388
*/
389
struct Temp {
390
Temp() noexcept : id_(0), reg_class(0) {}
391
constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
392
393
constexpr uint32_t id() const noexcept { return id_; }
394
constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
395
396
constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
397
constexpr unsigned size() const noexcept { return regClass().size(); }
398
constexpr RegType type() const noexcept { return regClass().type(); }
399
constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
400
401
constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
402
constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
403
constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
404
405
private:
406
uint32_t id_ : 24;
407
uint32_t reg_class : 8;
408
};
409
410
/**
411
* PhysReg
412
* Represents the physical register for each
413
* Operand and Definition.
414
*/
415
struct PhysReg {
416
constexpr PhysReg() = default;
417
explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
418
constexpr unsigned reg() const { return reg_b >> 2; }
419
constexpr unsigned byte() const { return reg_b & 0x3; }
420
constexpr operator unsigned() const { return reg(); }
421
constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
422
constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
423
constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
424
constexpr PhysReg advance(int bytes) const
425
{
426
PhysReg res = *this;
427
res.reg_b += bytes;
428
return res;
429
}
430
431
uint16_t reg_b = 0;
432
};
433
434
/* helper expressions for special registers */
435
static constexpr PhysReg m0{124};
436
static constexpr PhysReg vcc{106};
437
static constexpr PhysReg vcc_hi{107};
438
static constexpr PhysReg tba{108}; /* GFX6-GFX8 */
439
static constexpr PhysReg tma{110}; /* GFX6-GFX8 */
440
static constexpr PhysReg ttmp0{112};
441
static constexpr PhysReg ttmp1{113};
442
static constexpr PhysReg ttmp2{114};
443
static constexpr PhysReg ttmp3{115};
444
static constexpr PhysReg ttmp4{116};
445
static constexpr PhysReg ttmp5{117};
446
static constexpr PhysReg ttmp6{118};
447
static constexpr PhysReg ttmp7{119};
448
static constexpr PhysReg ttmp8{120};
449
static constexpr PhysReg ttmp9{121};
450
static constexpr PhysReg ttmp10{122};
451
static constexpr PhysReg ttmp11{123};
452
static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
453
static constexpr PhysReg exec{126};
454
static constexpr PhysReg exec_lo{126};
455
static constexpr PhysReg exec_hi{127};
456
static constexpr PhysReg vccz{251};
457
static constexpr PhysReg execz{252};
458
static constexpr PhysReg scc{253};
459
460
/**
461
* Operand Class
462
* Initially, each Operand refers to either
463
* a temporary virtual register
464
* or to a constant value
465
* Temporary registers get mapped to physical register during RA
466
* Constant values are inlined into the instruction sequence.
467
*/
468
class Operand final {
469
public:
470
constexpr Operand()
471
: reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
472
isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false),
473
is24bit_(false), signext(false)
474
{}
475
476
explicit Operand(Temp r) noexcept
477
{
478
data_.temp = r;
479
if (r.id()) {
480
isTemp_ = true;
481
} else {
482
isUndef_ = true;
483
setFixed(PhysReg{128});
484
}
485
};
486
explicit Operand(Temp r, PhysReg reg) noexcept
487
{
488
assert(r.id()); /* Don't allow fixing an undef to a register */
489
data_.temp = r;
490
isTemp_ = true;
491
setFixed(reg);
492
};
493
494
/* 8-bit constant */
495
static Operand c8(uint8_t v) noexcept
496
{
497
/* 8-bit constants are only used for copies and copies from any 8-bit
498
* constant can be implemented with a SDWA v_mul_u32_u24. So consider all
499
* to be inline constants. */
500
Operand op;
501
op.control_ = 0;
502
op.data_.i = v;
503
op.isConstant_ = true;
504
op.constSize = 0;
505
op.setFixed(PhysReg{0u});
506
return op;
507
};
508
509
/* 16-bit constant */
510
static Operand c16(uint16_t v) noexcept
511
{
512
Operand op;
513
op.control_ = 0;
514
op.data_.i = v;
515
op.isConstant_ = true;
516
op.constSize = 1;
517
if (v <= 64)
518
op.setFixed(PhysReg{128u + v});
519
else if (v >= 0xFFF0) /* [-16 .. -1] */
520
op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
521
else if (v == 0x3800) /* 0.5 */
522
op.setFixed(PhysReg{240});
523
else if (v == 0xB800) /* -0.5 */
524
op.setFixed(PhysReg{241});
525
else if (v == 0x3C00) /* 1.0 */
526
op.setFixed(PhysReg{242});
527
else if (v == 0xBC00) /* -1.0 */
528
op.setFixed(PhysReg{243});
529
else if (v == 0x4000) /* 2.0 */
530
op.setFixed(PhysReg{244});
531
else if (v == 0xC000) /* -2.0 */
532
op.setFixed(PhysReg{245});
533
else if (v == 0x4400) /* 4.0 */
534
op.setFixed(PhysReg{246});
535
else if (v == 0xC400) /* -4.0 */
536
op.setFixed(PhysReg{247});
537
else if (v == 0x3118) /* 1/2 PI */
538
op.setFixed(PhysReg{248});
539
else /* Literal Constant */
540
op.setFixed(PhysReg{255});
541
return op;
542
}
543
544
/* 32-bit constant */
545
static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
546
547
/* 64-bit constant */
548
static Operand c64(uint64_t v) noexcept
549
{
550
Operand op;
551
op.control_ = 0;
552
op.isConstant_ = true;
553
op.constSize = 3;
554
if (v <= 64) {
555
op.data_.i = (uint32_t)v;
556
op.setFixed(PhysReg{128 + (uint32_t)v});
557
} else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
558
op.data_.i = (uint32_t)v;
559
op.setFixed(PhysReg{192 - (uint32_t)v});
560
} else if (v == 0x3FE0000000000000) { /* 0.5 */
561
op.data_.i = 0x3f000000;
562
op.setFixed(PhysReg{240});
563
} else if (v == 0xBFE0000000000000) { /* -0.5 */
564
op.data_.i = 0xbf000000;
565
op.setFixed(PhysReg{241});
566
} else if (v == 0x3FF0000000000000) { /* 1.0 */
567
op.data_.i = 0x3f800000;
568
op.setFixed(PhysReg{242});
569
} else if (v == 0xBFF0000000000000) { /* -1.0 */
570
op.data_.i = 0xbf800000;
571
op.setFixed(PhysReg{243});
572
} else if (v == 0x4000000000000000) { /* 2.0 */
573
op.data_.i = 0x40000000;
574
op.setFixed(PhysReg{244});
575
} else if (v == 0xC000000000000000) { /* -2.0 */
576
op.data_.i = 0xc0000000;
577
op.setFixed(PhysReg{245});
578
} else if (v == 0x4010000000000000) { /* 4.0 */
579
op.data_.i = 0x40800000;
580
op.setFixed(PhysReg{246});
581
} else if (v == 0xC010000000000000) { /* -4.0 */
582
op.data_.i = 0xc0800000;
583
op.setFixed(PhysReg{247});
584
} else { /* Literal Constant: we don't know if it is a long or double.*/
585
op.signext = v >> 63;
586
op.data_.i = v & 0xffffffffu;
587
op.setFixed(PhysReg{255});
588
assert(op.constantValue64() == v &&
589
"attempt to create a unrepresentable 64-bit literal constant");
590
}
591
return op;
592
}
593
594
/* 32-bit constant stored as a 32-bit or 64-bit operand */
595
static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
596
{
597
Operand op;
598
op.control_ = 0;
599
op.data_.i = v;
600
op.isConstant_ = true;
601
op.constSize = is64bit ? 3 : 2;
602
if (v <= 64)
603
op.setFixed(PhysReg{128 + v});
604
else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
605
op.setFixed(PhysReg{192 - v});
606
else if (v == 0x3f000000) /* 0.5 */
607
op.setFixed(PhysReg{240});
608
else if (v == 0xbf000000) /* -0.5 */
609
op.setFixed(PhysReg{241});
610
else if (v == 0x3f800000) /* 1.0 */
611
op.setFixed(PhysReg{242});
612
else if (v == 0xbf800000) /* -1.0 */
613
op.setFixed(PhysReg{243});
614
else if (v == 0x40000000) /* 2.0 */
615
op.setFixed(PhysReg{244});
616
else if (v == 0xc0000000) /* -2.0 */
617
op.setFixed(PhysReg{245});
618
else if (v == 0x40800000) /* 4.0 */
619
op.setFixed(PhysReg{246});
620
else if (v == 0xc0800000) /* -4.0 */
621
op.setFixed(PhysReg{247});
622
else { /* Literal Constant */
623
assert(!is64bit && "attempt to create a 64-bit literal constant");
624
op.setFixed(PhysReg{255});
625
}
626
return op;
627
}
628
629
explicit Operand(RegClass type) noexcept
630
{
631
isUndef_ = true;
632
data_.temp = Temp(0, type);
633
setFixed(PhysReg{128});
634
};
635
explicit Operand(PhysReg reg, RegClass type) noexcept
636
{
637
data_.temp = Temp(0, type);
638
setFixed(reg);
639
}
640
641
static Operand zero(unsigned bytes = 4) noexcept
642
{
643
if (bytes == 8)
644
return Operand::c64(0);
645
else if (bytes == 4)
646
return Operand::c32(0);
647
else if (bytes == 2)
648
return Operand::c16(0);
649
assert(bytes == 1);
650
return Operand::c8(0);
651
}
652
653
/* This is useful over the constructors when you want to take a chip class
654
* for 1/2 PI or an unknown operand size.
655
*/
656
static Operand get_const(enum chip_class chip, uint64_t val, unsigned bytes)
657
{
658
if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
659
/* 1/2 PI can be an inline constant on GFX8+ */
660
Operand op = Operand::c32(val);
661
op.setFixed(PhysReg{248});
662
return op;
663
}
664
665
if (bytes == 8)
666
return Operand::c64(val);
667
else if (bytes == 4)
668
return Operand::c32(val);
669
else if (bytes == 2)
670
return Operand::c16(val);
671
assert(bytes == 1);
672
return Operand::c8(val);
673
}
674
675
static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
676
bool sext = false)
677
{
678
if (bytes <= 4)
679
return true;
680
681
if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
682
return true;
683
uint64_t upper33 = val & 0xFFFFFFFF80000000;
684
if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
685
return true;
686
687
return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 || /* [-16 .. 64] */
688
val == 0x3FE0000000000000 || /* 0.5 */
689
val == 0xBFE0000000000000 || /* -0.5 */
690
val == 0x3FF0000000000000 || /* 1.0 */
691
val == 0xBFF0000000000000 || /* -1.0 */
692
val == 0x4000000000000000 || /* 2.0 */
693
val == 0xC000000000000000 || /* -2.0 */
694
val == 0x4010000000000000 || /* 4.0 */
695
val == 0xC010000000000000; /* -4.0 */
696
}
697
698
constexpr bool isTemp() const noexcept { return isTemp_; }
699
700
constexpr void setTemp(Temp t) noexcept
701
{
702
assert(!isConstant_);
703
isTemp_ = true;
704
data_.temp = t;
705
}
706
707
constexpr Temp getTemp() const noexcept { return data_.temp; }
708
709
constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
710
711
constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); }
712
713
constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
714
715
constexpr unsigned bytes() const noexcept
716
{
717
if (isConstant())
718
return 1 << constSize;
719
else
720
return data_.temp.bytes();
721
}
722
723
constexpr unsigned size() const noexcept
724
{
725
if (isConstant())
726
return constSize > 2 ? 2 : 1;
727
else
728
return data_.temp.size();
729
}
730
731
constexpr bool isFixed() const noexcept { return isFixed_; }
732
733
constexpr PhysReg physReg() const noexcept { return reg_; }
734
735
constexpr void setFixed(PhysReg reg) noexcept
736
{
737
isFixed_ = reg != unsigned(-1);
738
reg_ = reg;
739
}
740
741
constexpr bool isConstant() const noexcept { return isConstant_; }
742
743
constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
744
745
constexpr bool isUndefined() const noexcept { return isUndef_; }
746
747
constexpr uint32_t constantValue() const noexcept { return data_.i; }
748
749
constexpr bool constantEquals(uint32_t cmp) const noexcept
750
{
751
return isConstant() && constantValue() == cmp;
752
}
753
754
constexpr uint64_t constantValue64() const noexcept
755
{
756
if (constSize == 3) {
757
if (reg_ <= 192)
758
return reg_ - 128;
759
else if (reg_ <= 208)
760
return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
761
762
switch (reg_) {
763
case 240: return 0x3FE0000000000000;
764
case 241: return 0xBFE0000000000000;
765
case 242: return 0x3FF0000000000000;
766
case 243: return 0xBFF0000000000000;
767
case 244: return 0x4000000000000000;
768
case 245: return 0xC000000000000000;
769
case 246: return 0x4010000000000000;
770
case 247: return 0xC010000000000000;
771
case 255:
772
return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
773
}
774
unreachable("invalid register for 64-bit constant");
775
} else {
776
return data_.i;
777
}
778
}
779
780
constexpr bool isOfType(RegType type) const noexcept
781
{
782
return hasRegClass() && regClass().type() == type;
783
}
784
785
/* Indicates that the killed operand's live range intersects with the
786
* instruction's definitions. Unlike isKill() and isFirstKill(), this is
787
* not set by liveness analysis. */
788
constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
789
790
constexpr bool isLateKill() const noexcept { return isLateKill_; }
791
792
constexpr void setKill(bool flag) noexcept
793
{
794
isKill_ = flag;
795
if (!flag)
796
setFirstKill(false);
797
}
798
799
constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
800
801
constexpr void setFirstKill(bool flag) noexcept
802
{
803
isFirstKill_ = flag;
804
if (flag)
805
setKill(flag);
806
}
807
808
/* When there are multiple operands killing the same temporary,
809
* isFirstKill() is only returns true for the first one. */
810
constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
811
812
constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
813
814
constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
815
816
constexpr bool operator==(Operand other) const noexcept
817
{
818
if (other.size() != size())
819
return false;
820
if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
821
return false;
822
if (isFixed() && other.isFixed() && physReg() != other.physReg())
823
return false;
824
if (isLiteral())
825
return other.isLiteral() && other.constantValue() == constantValue();
826
else if (isConstant())
827
return other.isConstant() && other.physReg() == physReg();
828
else if (isUndefined())
829
return other.isUndefined() && other.regClass() == regClass();
830
else
831
return other.isTemp() && other.getTemp() == getTemp();
832
}
833
834
constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
835
836
constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
837
838
constexpr bool is16bit() const noexcept { return is16bit_; }
839
840
constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
841
842
constexpr bool is24bit() const noexcept { return is24bit_; }
843
844
private:
845
union {
846
Temp temp;
847
uint32_t i;
848
float f;
849
} data_ = {Temp(0, s1)};
850
PhysReg reg_;
851
union {
852
struct {
853
uint8_t isTemp_ : 1;
854
uint8_t isFixed_ : 1;
855
uint8_t isConstant_ : 1;
856
uint8_t isKill_ : 1;
857
uint8_t isUndef_ : 1;
858
uint8_t isFirstKill_ : 1;
859
uint8_t constSize : 2;
860
uint8_t isLateKill_ : 1;
861
uint8_t is16bit_ : 1;
862
uint8_t is24bit_ : 1;
863
uint8_t signext : 1;
864
};
865
/* can't initialize bit-fields in c++11, so work around using a union */
866
uint16_t control_ = 0;
867
};
868
};
869
870
/**
871
* Definition Class
872
* Definitions are the results of Instructions
873
* and refer to temporary virtual registers
874
* which are later mapped to physical registers
875
*/
876
class Definition final {
877
public:
878
constexpr Definition()
879
: temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0), isPrecise_(0), isNUW_(0),
880
isNoCSE_(0)
881
{}
882
Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
883
explicit Definition(Temp tmp) noexcept : temp(tmp) {}
884
Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
885
Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
886
{
887
setFixed(reg);
888
}
889
890
constexpr bool isTemp() const noexcept { return tempId() > 0; }
891
892
constexpr Temp getTemp() const noexcept { return temp; }
893
894
constexpr uint32_t tempId() const noexcept { return temp.id(); }
895
896
constexpr void setTemp(Temp t) noexcept { temp = t; }
897
898
void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
899
900
constexpr RegClass regClass() const noexcept { return temp.regClass(); }
901
902
constexpr unsigned bytes() const noexcept { return temp.bytes(); }
903
904
constexpr unsigned size() const noexcept { return temp.size(); }
905
906
constexpr bool isFixed() const noexcept { return isFixed_; }
907
908
constexpr PhysReg physReg() const noexcept { return reg_; }
909
910
constexpr void setFixed(PhysReg reg) noexcept
911
{
912
isFixed_ = 1;
913
reg_ = reg;
914
}
915
916
constexpr void setHint(PhysReg reg) noexcept
917
{
918
hasHint_ = 1;
919
reg_ = reg;
920
}
921
922
constexpr bool hasHint() const noexcept { return hasHint_; }
923
924
constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
925
926
constexpr bool isKill() const noexcept { return isKill_; }
927
928
constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
929
930
constexpr bool isPrecise() const noexcept { return isPrecise_; }
931
932
/* No Unsigned Wrap */
933
constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
934
935
constexpr bool isNUW() const noexcept { return isNUW_; }
936
937
constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
938
939
constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
940
941
private:
942
Temp temp = Temp(0, s1);
943
PhysReg reg_;
944
union {
945
struct {
946
uint8_t isFixed_ : 1;
947
uint8_t hasHint_ : 1;
948
uint8_t isKill_ : 1;
949
uint8_t isPrecise_ : 1;
950
uint8_t isNUW_ : 1;
951
uint8_t isNoCSE_ : 1;
952
};
953
/* can't initialize bit-fields in c++11, so work around using a union */
954
uint8_t control_ = 0;
955
};
956
};
957
958
struct Block;
959
struct Instruction;
960
struct Pseudo_instruction;
961
struct SOP1_instruction;
962
struct SOP2_instruction;
963
struct SOPK_instruction;
964
struct SOPP_instruction;
965
struct SOPC_instruction;
966
struct SMEM_instruction;
967
struct DS_instruction;
968
struct MTBUF_instruction;
969
struct MUBUF_instruction;
970
struct MIMG_instruction;
971
struct Export_instruction;
972
struct FLAT_instruction;
973
struct Pseudo_branch_instruction;
974
struct Pseudo_barrier_instruction;
975
struct Pseudo_reduction_instruction;
976
struct VOP3P_instruction;
977
struct VOP1_instruction;
978
struct VOP2_instruction;
979
struct VOPC_instruction;
980
struct VOP3_instruction;
981
struct Interp_instruction;
982
struct DPP_instruction;
983
struct SDWA_instruction;
984
985
struct Instruction {
986
aco_opcode opcode;
987
Format format;
988
uint32_t pass_flags;
989
990
aco::span<Operand> operands;
991
aco::span<Definition> definitions;
992
993
constexpr bool usesModifiers() const noexcept;
994
995
constexpr bool reads_exec() const noexcept
996
{
997
for (const Operand& op : operands) {
998
if (op.isFixed() && op.physReg() == exec)
999
return true;
1000
}
1001
return false;
1002
}
1003
1004
Pseudo_instruction& pseudo() noexcept
1005
{
1006
assert(isPseudo());
1007
return *(Pseudo_instruction*)this;
1008
}
1009
const Pseudo_instruction& pseudo() const noexcept
1010
{
1011
assert(isPseudo());
1012
return *(Pseudo_instruction*)this;
1013
}
1014
constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
1015
SOP1_instruction& sop1() noexcept
1016
{
1017
assert(isSOP1());
1018
return *(SOP1_instruction*)this;
1019
}
1020
const SOP1_instruction& sop1() const noexcept
1021
{
1022
assert(isSOP1());
1023
return *(SOP1_instruction*)this;
1024
}
1025
constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
1026
SOP2_instruction& sop2() noexcept
1027
{
1028
assert(isSOP2());
1029
return *(SOP2_instruction*)this;
1030
}
1031
const SOP2_instruction& sop2() const noexcept
1032
{
1033
assert(isSOP2());
1034
return *(SOP2_instruction*)this;
1035
}
1036
constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
1037
SOPK_instruction& sopk() noexcept
1038
{
1039
assert(isSOPK());
1040
return *(SOPK_instruction*)this;
1041
}
1042
const SOPK_instruction& sopk() const noexcept
1043
{
1044
assert(isSOPK());
1045
return *(SOPK_instruction*)this;
1046
}
1047
constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
1048
SOPP_instruction& sopp() noexcept
1049
{
1050
assert(isSOPP());
1051
return *(SOPP_instruction*)this;
1052
}
1053
const SOPP_instruction& sopp() const noexcept
1054
{
1055
assert(isSOPP());
1056
return *(SOPP_instruction*)this;
1057
}
1058
constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
1059
SOPC_instruction& sopc() noexcept
1060
{
1061
assert(isSOPC());
1062
return *(SOPC_instruction*)this;
1063
}
1064
const SOPC_instruction& sopc() const noexcept
1065
{
1066
assert(isSOPC());
1067
return *(SOPC_instruction*)this;
1068
}
1069
constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
1070
SMEM_instruction& smem() noexcept
1071
{
1072
assert(isSMEM());
1073
return *(SMEM_instruction*)this;
1074
}
1075
const SMEM_instruction& smem() const noexcept
1076
{
1077
assert(isSMEM());
1078
return *(SMEM_instruction*)this;
1079
}
1080
constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
1081
DS_instruction& ds() noexcept
1082
{
1083
assert(isDS());
1084
return *(DS_instruction*)this;
1085
}
1086
const DS_instruction& ds() const noexcept
1087
{
1088
assert(isDS());
1089
return *(DS_instruction*)this;
1090
}
1091
constexpr bool isDS() const noexcept { return format == Format::DS; }
1092
MTBUF_instruction& mtbuf() noexcept
1093
{
1094
assert(isMTBUF());
1095
return *(MTBUF_instruction*)this;
1096
}
1097
const MTBUF_instruction& mtbuf() const noexcept
1098
{
1099
assert(isMTBUF());
1100
return *(MTBUF_instruction*)this;
1101
}
1102
constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
1103
MUBUF_instruction& mubuf() noexcept
1104
{
1105
assert(isMUBUF());
1106
return *(MUBUF_instruction*)this;
1107
}
1108
const MUBUF_instruction& mubuf() const noexcept
1109
{
1110
assert(isMUBUF());
1111
return *(MUBUF_instruction*)this;
1112
}
1113
constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
1114
MIMG_instruction& mimg() noexcept
1115
{
1116
assert(isMIMG());
1117
return *(MIMG_instruction*)this;
1118
}
1119
const MIMG_instruction& mimg() const noexcept
1120
{
1121
assert(isMIMG());
1122
return *(MIMG_instruction*)this;
1123
}
1124
constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
1125
Export_instruction& exp() noexcept
1126
{
1127
assert(isEXP());
1128
return *(Export_instruction*)this;
1129
}
1130
const Export_instruction& exp() const noexcept
1131
{
1132
assert(isEXP());
1133
return *(Export_instruction*)this;
1134
}
1135
constexpr bool isEXP() const noexcept { return format == Format::EXP; }
1136
FLAT_instruction& flat() noexcept
1137
{
1138
assert(isFlat());
1139
return *(FLAT_instruction*)this;
1140
}
1141
const FLAT_instruction& flat() const noexcept
1142
{
1143
assert(isFlat());
1144
return *(FLAT_instruction*)this;
1145
}
1146
constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
1147
FLAT_instruction& global() noexcept
1148
{
1149
assert(isGlobal());
1150
return *(FLAT_instruction*)this;
1151
}
1152
const FLAT_instruction& global() const noexcept
1153
{
1154
assert(isGlobal());
1155
return *(FLAT_instruction*)this;
1156
}
1157
constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
1158
FLAT_instruction& scratch() noexcept
1159
{
1160
assert(isScratch());
1161
return *(FLAT_instruction*)this;
1162
}
1163
const FLAT_instruction& scratch() const noexcept
1164
{
1165
assert(isScratch());
1166
return *(FLAT_instruction*)this;
1167
}
1168
constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
1169
Pseudo_branch_instruction& branch() noexcept
1170
{
1171
assert(isBranch());
1172
return *(Pseudo_branch_instruction*)this;
1173
}
1174
const Pseudo_branch_instruction& branch() const noexcept
1175
{
1176
assert(isBranch());
1177
return *(Pseudo_branch_instruction*)this;
1178
}
1179
constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
1180
Pseudo_barrier_instruction& barrier() noexcept
1181
{
1182
assert(isBarrier());
1183
return *(Pseudo_barrier_instruction*)this;
1184
}
1185
const Pseudo_barrier_instruction& barrier() const noexcept
1186
{
1187
assert(isBarrier());
1188
return *(Pseudo_barrier_instruction*)this;
1189
}
1190
constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
1191
Pseudo_reduction_instruction& reduction() noexcept
1192
{
1193
assert(isReduction());
1194
return *(Pseudo_reduction_instruction*)this;
1195
}
1196
const Pseudo_reduction_instruction& reduction() const noexcept
1197
{
1198
assert(isReduction());
1199
return *(Pseudo_reduction_instruction*)this;
1200
}
1201
constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
1202
VOP3P_instruction& vop3p() noexcept
1203
{
1204
assert(isVOP3P());
1205
return *(VOP3P_instruction*)this;
1206
}
1207
const VOP3P_instruction& vop3p() const noexcept
1208
{
1209
assert(isVOP3P());
1210
return *(VOP3P_instruction*)this;
1211
}
1212
constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
1213
VOP1_instruction& vop1() noexcept
1214
{
1215
assert(isVOP1());
1216
return *(VOP1_instruction*)this;
1217
}
1218
const VOP1_instruction& vop1() const noexcept
1219
{
1220
assert(isVOP1());
1221
return *(VOP1_instruction*)this;
1222
}
1223
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
1224
VOP2_instruction& vop2() noexcept
1225
{
1226
assert(isVOP2());
1227
return *(VOP2_instruction*)this;
1228
}
1229
const VOP2_instruction& vop2() const noexcept
1230
{
1231
assert(isVOP2());
1232
return *(VOP2_instruction*)this;
1233
}
1234
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
1235
VOPC_instruction& vopc() noexcept
1236
{
1237
assert(isVOPC());
1238
return *(VOPC_instruction*)this;
1239
}
1240
const VOPC_instruction& vopc() const noexcept
1241
{
1242
assert(isVOPC());
1243
return *(VOPC_instruction*)this;
1244
}
1245
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
1246
VOP3_instruction& vop3() noexcept
1247
{
1248
assert(isVOP3());
1249
return *(VOP3_instruction*)this;
1250
}
1251
const VOP3_instruction& vop3() const noexcept
1252
{
1253
assert(isVOP3());
1254
return *(VOP3_instruction*)this;
1255
}
1256
constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
1257
Interp_instruction& vintrp() noexcept
1258
{
1259
assert(isVINTRP());
1260
return *(Interp_instruction*)this;
1261
}
1262
const Interp_instruction& vintrp() const noexcept
1263
{
1264
assert(isVINTRP());
1265
return *(Interp_instruction*)this;
1266
}
1267
constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; }
1268
DPP_instruction& dpp() noexcept
1269
{
1270
assert(isDPP());
1271
return *(DPP_instruction*)this;
1272
}
1273
const DPP_instruction& dpp() const noexcept
1274
{
1275
assert(isDPP());
1276
return *(DPP_instruction*)this;
1277
}
1278
constexpr bool isDPP() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP; }
1279
SDWA_instruction& sdwa() noexcept
1280
{
1281
assert(isSDWA());
1282
return *(SDWA_instruction*)this;
1283
}
1284
const SDWA_instruction& sdwa() const noexcept
1285
{
1286
assert(isSDWA());
1287
return *(SDWA_instruction*)this;
1288
}
1289
constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
1290
1291
FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
1292
1293
const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
1294
1295
constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
1296
1297
constexpr bool isVALU() const noexcept
1298
{
1299
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P();
1300
}
1301
1302
constexpr bool isSALU() const noexcept
1303
{
1304
return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
1305
}
1306
1307
constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
1308
};
1309
static_assert(sizeof(Instruction) == 16, "Unexpected padding");
1310
1311
struct SOPK_instruction : public Instruction {
1312
uint16_t imm;
1313
uint16_t padding;
1314
};
1315
static_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1316
1317
struct SOPP_instruction : public Instruction {
1318
uint32_t imm;
1319
int block;
1320
};
1321
static_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1322
1323
struct SOPC_instruction : public Instruction {};
1324
static_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1325
1326
struct SOP1_instruction : public Instruction {};
1327
static_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1328
1329
struct SOP2_instruction : public Instruction {};
1330
static_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1331
1332
/**
1333
* Scalar Memory Format:
1334
* For s_(buffer_)load_dword*:
1335
* Operand(0): SBASE - SGPR-pair which provides base address
1336
* Operand(1): Offset - immediate (un)signed offset or SGPR
1337
* Operand(2) / Definition(0): SDATA - SGPR for read / write result
1338
* Operand(n-1): SOffset - SGPR offset (Vega only)
1339
*
1340
* Having no operands is also valid for instructions such as s_dcache_inv.
1341
*
1342
*/
1343
struct SMEM_instruction : public Instruction {
1344
memory_sync_info sync;
1345
bool glc : 1; /* VI+: globally coherent */
1346
bool dlc : 1; /* NAVI: device level coherent */
1347
bool nv : 1; /* VEGA only: Non-volatile */
1348
bool disable_wqm : 1;
1349
bool prevent_overflow : 1; /* avoid overflow when combining additions */
1350
uint8_t padding : 3;
1351
};
1352
static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1353
1354
struct VOP1_instruction : public Instruction {};
1355
static_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1356
1357
struct VOP2_instruction : public Instruction {};
1358
static_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1359
1360
struct VOPC_instruction : public Instruction {};
1361
static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
1362
1363
struct VOP3_instruction : public Instruction {
1364
bool abs[3];
1365
bool neg[3];
1366
uint8_t opsel : 4;
1367
uint8_t omod : 2;
1368
bool clamp : 1;
1369
uint8_t padding0 : 1;
1370
uint8_t padding1;
1371
};
1372
static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1373
1374
struct VOP3P_instruction : public Instruction {
1375
bool neg_lo[3];
1376
bool neg_hi[3];
1377
uint8_t opsel_lo : 3;
1378
uint8_t opsel_hi : 3;
1379
bool clamp : 1;
1380
uint8_t padding0 : 1;
1381
uint8_t padding1;
1382
};
1383
static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1384
1385
/**
1386
* Data Parallel Primitives Format:
1387
* This format can be used for VOP1, VOP2 or VOPC instructions.
1388
* The swizzle applies to the src0 operand.
1389
*
1390
*/
1391
struct DPP_instruction : public Instruction {
1392
bool abs[2];
1393
bool neg[2];
1394
uint16_t dpp_ctrl;
1395
uint8_t row_mask : 4;
1396
uint8_t bank_mask : 4;
1397
bool bound_ctrl : 1;
1398
uint8_t padding : 7;
1399
};
1400
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1401
1402
enum sdwa_sel : uint8_t {
1403
/* masks */
1404
sdwa_wordnum = 0x1,
1405
sdwa_bytenum = 0x3,
1406
sdwa_asuint = 0x7 | 0x10,
1407
sdwa_rasize = 0x3,
1408
1409
/* flags */
1410
sdwa_isword = 0x4,
1411
sdwa_sext = 0x8,
1412
sdwa_isra = 0x10,
1413
1414
/* specific values */
1415
sdwa_ubyte0 = 0,
1416
sdwa_ubyte1 = 1,
1417
sdwa_ubyte2 = 2,
1418
sdwa_ubyte3 = 3,
1419
sdwa_uword0 = sdwa_isword | 0,
1420
sdwa_uword1 = sdwa_isword | 1,
1421
sdwa_udword = 6,
1422
1423
sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
1424
sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
1425
sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
1426
sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
1427
sdwa_sword0 = sdwa_uword0 | sdwa_sext,
1428
sdwa_sword1 = sdwa_uword1 | sdwa_sext,
1429
sdwa_sdword = sdwa_udword | sdwa_sext,
1430
1431
/* register-allocated */
1432
sdwa_ubyte = 1 | sdwa_isra,
1433
sdwa_uword = 2 | sdwa_isra,
1434
sdwa_sbyte = sdwa_ubyte | sdwa_sext,
1435
sdwa_sword = sdwa_uword | sdwa_sext,
1436
};
1437
1438
/**
1439
* Sub-Dword Addressing Format:
1440
* This format can be used for VOP1, VOP2 or VOPC instructions.
1441
*
1442
* omod and SGPR/constant operands are only available on GFX9+. For VOPC,
1443
* the definition doesn't have to be VCC on GFX9+.
1444
*
1445
*/
1446
struct SDWA_instruction : public Instruction {
1447
/* these destination modifiers aren't available with VOPC except for
1448
* clamp on GFX8 */
1449
uint8_t sel[2];
1450
uint8_t dst_sel;
1451
bool neg[2];
1452
bool abs[2];
1453
bool dst_preserve : 1;
1454
bool clamp : 1;
1455
uint8_t omod : 2; /* GFX9+ */
1456
uint8_t padding : 4;
1457
};
1458
static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1459
1460
struct Interp_instruction : public Instruction {
1461
uint8_t attribute;
1462
uint8_t component;
1463
uint16_t padding;
1464
};
1465
static_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1466
1467
/**
1468
* Local and Global Data Sharing instructions
1469
* Operand(0): ADDR - VGPR which supplies the address.
1470
* Operand(1): DATA0 - First data VGPR.
1471
* Operand(2): DATA1 - Second data VGPR.
1472
* Operand(n-1): M0 - LDS size.
1473
* Definition(0): VDST - Destination VGPR when results returned to VGPRs.
1474
*
1475
*/
1476
struct DS_instruction : public Instruction {
1477
memory_sync_info sync;
1478
bool gds;
1479
int16_t offset0;
1480
int8_t offset1;
1481
uint8_t padding;
1482
};
1483
static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1484
1485
/**
1486
* Vector Memory Untyped-buffer Instructions
1487
* Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1488
* Operand(1): VADDR - Address source. Can carry an index and/or offset
1489
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1490
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1491
*
1492
*/
1493
struct MUBUF_instruction : public Instruction {
1494
memory_sync_info sync;
1495
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1496
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
1497
bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
1498
bool glc : 1; /* globally coherent */
1499
bool dlc : 1; /* NAVI: device level coherent */
1500
bool slc : 1; /* system level coherent */
1501
bool tfe : 1; /* texture fail enable */
1502
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
1503
uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1504
uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
1505
uint16_t swizzled : 1;
1506
uint16_t padding0 : 2;
1507
uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
1508
uint16_t padding1 : 10;
1509
};
1510
static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1511
1512
/**
1513
* Vector Memory Typed-buffer Instructions
1514
* Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
1515
* Operand(1): VADDR - Address source. Can carry an index and/or offset
1516
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
1517
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
1518
*
1519
*/
1520
struct MTBUF_instruction : public Instruction {
1521
memory_sync_info sync;
1522
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
1523
uint8_t nfmt : 3; /* Numeric format of data in memory */
1524
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
1525
uint16_t idxen : 1; /* Supply an index from VGPR (VADDR) */
1526
uint16_t glc : 1; /* globally coherent */
1527
uint16_t dlc : 1; /* NAVI: device level coherent */
1528
uint16_t slc : 1; /* system level coherent */
1529
uint16_t tfe : 1; /* texture fail enable */
1530
uint16_t disable_wqm : 1; /* Require an exec mask without helper invocations */
1531
uint16_t vtx_binding : 6; /* 0 if this is not a vertex attribute load */
1532
uint16_t padding : 4;
1533
uint16_t offset; /* Unsigned byte offset - 12 bit */
1534
};
1535
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1536
1537
/**
1538
* Vector Memory Image Instructions
1539
* Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
1540
* Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
1541
* Operand(2): VDATA - Vector GPR for write data or zero if TFE/LWE=1.
1542
* Operand(3): VADDR - Address source. Can carry an offset or an index.
1543
* Definition(0): VDATA - Vector GPR for read result.
1544
*
1545
*/
1546
struct MIMG_instruction : public Instruction {
1547
memory_sync_info sync;
1548
uint8_t dmask; /* Data VGPR enable mask */
1549
uint8_t dim : 3; /* NAVI: dimensionality */
1550
bool unrm : 1; /* Force address to be un-normalized */
1551
bool dlc : 1; /* NAVI: device level coherent */
1552
bool glc : 1; /* globally coherent */
1553
bool slc : 1; /* system level coherent */
1554
bool tfe : 1; /* texture fail enable */
1555
bool da : 1; /* declare an array */
1556
bool lwe : 1; /* LOD warning enable */
1557
bool r128 : 1; /* NAVI: Texture resource size */
1558
bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
1559
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
1560
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1561
uint8_t padding0 : 2;
1562
uint8_t padding1;
1563
uint8_t padding2;
1564
};
1565
static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1566
1567
/**
1568
* Flat/Scratch/Global Instructions
1569
* Operand(0): ADDR
1570
* Operand(1): SADDR
1571
* Operand(2) / Definition(0): DATA/VDST
1572
*
1573
*/
1574
struct FLAT_instruction : public Instruction {
1575
memory_sync_info sync;
1576
bool slc : 1; /* system level coherent */
1577
bool glc : 1; /* globally coherent */
1578
bool dlc : 1; /* NAVI: device level coherent */
1579
bool lds : 1;
1580
bool nv : 1;
1581
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
1582
uint8_t padding0 : 2;
1583
uint16_t offset; /* Vega/Navi only */
1584
uint16_t padding1;
1585
};
1586
static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1587
1588
struct Export_instruction : public Instruction {
1589
uint8_t enabled_mask;
1590
uint8_t dest;
1591
bool compressed : 1;
1592
bool done : 1;
1593
bool valid_mask : 1;
1594
uint8_t padding0 : 5;
1595
uint8_t padding1;
1596
};
1597
static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1598
1599
struct Pseudo_instruction : public Instruction {
1600
PhysReg scratch_sgpr; /* might not be valid if it's not needed */
1601
bool tmp_in_scc;
1602
uint8_t padding;
1603
};
1604
static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1605
1606
struct Pseudo_branch_instruction : public Instruction {
1607
/* target[0] is the block index of the branch target.
1608
* For conditional branches, target[1] contains the fall-through alternative.
1609
* A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
1610
*/
1611
uint32_t target[2];
1612
};
1613
static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
1614
1615
struct Pseudo_barrier_instruction : public Instruction {
1616
memory_sync_info sync;
1617
sync_scope exec_scope;
1618
};
1619
static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
1620
1621
enum ReduceOp : uint16_t {
1622
// clang-format off
1623
iadd8, iadd16, iadd32, iadd64,
1624
imul8, imul16, imul32, imul64,
1625
fadd16, fadd32, fadd64,
1626
fmul16, fmul32, fmul64,
1627
imin8, imin16, imin32, imin64,
1628
imax8, imax16, imax32, imax64,
1629
umin8, umin16, umin32, umin64,
1630
umax8, umax16, umax32, umax64,
1631
fmin16, fmin32, fmin64,
1632
fmax16, fmax32, fmax64,
1633
iand8, iand16, iand32, iand64,
1634
ior8, ior16, ior32, ior64,
1635
ixor8, ixor16, ixor32, ixor64,
1636
num_reduce_ops,
1637
// clang-format on
1638
};
1639
1640
/**
1641
* Subgroup Reduction Instructions, everything except for the data to be
1642
* reduced and the result as inserted by setup_reduce_temp().
1643
* Operand(0): data to be reduced
1644
* Operand(1): reduce temporary
1645
* Operand(2): vector temporary
1646
* Definition(0): result
1647
* Definition(1): scalar temporary
1648
* Definition(2): scalar identity temporary (not used to store identity on GFX10)
1649
* Definition(3): scc clobber
1650
* Definition(4): vcc clobber
1651
*
1652
*/
1653
struct Pseudo_reduction_instruction : public Instruction {
1654
ReduceOp reduce_op;
1655
uint16_t cluster_size; // must be 0 for scans
1656
};
1657
static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
1658
"Unexpected padding");
1659
1660
struct instr_deleter_functor {
1661
void operator()(void* p) { free(p); }
1662
};
1663
1664
template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
1665
1666
template <typename T>
1667
T*
1668
create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
1669
uint32_t num_definitions)
1670
{
1671
std::size_t size =
1672
sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
1673
char* data = (char*)calloc(1, size);
1674
T* inst = (T*)data;
1675
1676
inst->opcode = opcode;
1677
inst->format = format;
1678
1679
uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
1680
inst->operands = aco::span<Operand>(operands_offset, num_operands);
1681
uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
1682
inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
1683
1684
return inst;
1685
}
1686
1687
constexpr bool
1688
Instruction::usesModifiers() const noexcept
1689
{
1690
if (isDPP() || isSDWA())
1691
return true;
1692
1693
if (isVOP3P()) {
1694
const VOP3P_instruction& vop3p = this->vop3p();
1695
for (unsigned i = 0; i < operands.size(); i++) {
1696
if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
1697
return true;
1698
1699
/* opsel_hi must be 1 to not be considered a modifier - even for constants */
1700
if (!(vop3p.opsel_hi & (1 << i)))
1701
return true;
1702
}
1703
return vop3p.opsel_lo || vop3p.clamp;
1704
} else if (isVOP3()) {
1705
const VOP3_instruction& vop3 = this->vop3();
1706
for (unsigned i = 0; i < operands.size(); i++) {
1707
if (vop3.abs[i] || vop3.neg[i])
1708
return true;
1709
}
1710
return vop3.opsel || vop3.clamp || vop3.omod;
1711
}
1712
return false;
1713
}
1714
1715
constexpr bool
1716
is_phi(Instruction* instr)
1717
{
1718
return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1719
}
1720
1721
static inline bool
1722
is_phi(aco_ptr<Instruction>& instr)
1723
{
1724
return is_phi(instr.get());
1725
}
1726
1727
memory_sync_info get_sync_info(const Instruction* instr);
1728
1729
bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
1730
1731
bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
1732
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
1733
/* updates "instr" and returns the old instruction (or NULL if no update was needed) */
1734
aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
1735
bool needs_exec_mask(const Instruction* instr);
1736
1737
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
1738
1739
unsigned get_mimg_nsa_dwords(const Instruction* instr);
1740
1741
bool should_form_clause(const Instruction* a, const Instruction* b);
1742
1743
enum block_kind {
1744
/* uniform indicates that leaving this block,
1745
* all actives lanes stay active */
1746
block_kind_uniform = 1 << 0,
1747
block_kind_top_level = 1 << 1,
1748
block_kind_loop_preheader = 1 << 2,
1749
block_kind_loop_header = 1 << 3,
1750
block_kind_loop_exit = 1 << 4,
1751
block_kind_continue = 1 << 5,
1752
block_kind_break = 1 << 6,
1753
block_kind_continue_or_break = 1 << 7,
1754
block_kind_discard = 1 << 8,
1755
block_kind_branch = 1 << 9,
1756
block_kind_merge = 1 << 10,
1757
block_kind_invert = 1 << 11,
1758
block_kind_uses_discard_if = 1 << 12,
1759
block_kind_needs_lowering = 1 << 13,
1760
block_kind_uses_demote = 1 << 14,
1761
block_kind_export_end = 1 << 15,
1762
};
1763
1764
struct RegisterDemand {
1765
constexpr RegisterDemand() = default;
1766
constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
1767
int16_t vgpr = 0;
1768
int16_t sgpr = 0;
1769
1770
constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
1771
{
1772
return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1773
}
1774
1775
constexpr bool exceeds(const RegisterDemand other) const noexcept
1776
{
1777
return vgpr > other.vgpr || sgpr > other.sgpr;
1778
}
1779
1780
constexpr RegisterDemand operator+(const Temp t) const noexcept
1781
{
1782
if (t.type() == RegType::sgpr)
1783
return RegisterDemand(vgpr, sgpr + t.size());
1784
else
1785
return RegisterDemand(vgpr + t.size(), sgpr);
1786
}
1787
1788
constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
1789
{
1790
return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1791
}
1792
1793
constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
1794
{
1795
return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1796
}
1797
1798
constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
1799
{
1800
vgpr += other.vgpr;
1801
sgpr += other.sgpr;
1802
return *this;
1803
}
1804
1805
constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
1806
{
1807
vgpr -= other.vgpr;
1808
sgpr -= other.sgpr;
1809
return *this;
1810
}
1811
1812
constexpr RegisterDemand& operator+=(const Temp t) noexcept
1813
{
1814
if (t.type() == RegType::sgpr)
1815
sgpr += t.size();
1816
else
1817
vgpr += t.size();
1818
return *this;
1819
}
1820
1821
constexpr RegisterDemand& operator-=(const Temp t) noexcept
1822
{
1823
if (t.type() == RegType::sgpr)
1824
sgpr -= t.size();
1825
else
1826
vgpr -= t.size();
1827
return *this;
1828
}
1829
1830
constexpr void update(const RegisterDemand other) noexcept
1831
{
1832
vgpr = std::max(vgpr, other.vgpr);
1833
sgpr = std::max(sgpr, other.sgpr);
1834
}
1835
};
1836
1837
/* CFG */
1838
struct Block {
1839
float_mode fp_mode;
1840
unsigned index;
1841
unsigned offset = 0;
1842
std::vector<aco_ptr<Instruction>> instructions;
1843
std::vector<unsigned> logical_preds;
1844
std::vector<unsigned> linear_preds;
1845
std::vector<unsigned> logical_succs;
1846
std::vector<unsigned> linear_succs;
1847
RegisterDemand register_demand = RegisterDemand();
1848
uint16_t loop_nest_depth = 0;
1849
uint16_t divergent_if_logical_depth = 0;
1850
uint16_t uniform_if_depth = 0;
1851
uint16_t kind = 0;
1852
int logical_idom = -1;
1853
int linear_idom = -1;
1854
1855
/* this information is needed for predecessors to blocks with phis when
1856
* moving out of ssa */
1857
bool scc_live_out = false;
1858
PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1859
1860
Block() : index(0) {}
1861
};
1862
1863
/*
1864
* Shader stages as provided in Vulkan by the application. Contrast this to HWStage.
1865
*/
1866
enum class SWStage : uint8_t {
1867
None = 0,
1868
VS = 1 << 0, /* Vertex Shader */
1869
GS = 1 << 1, /* Geometry Shader */
1870
TCS = 1 << 2, /* Tessellation Control aka Hull Shader */
1871
TES = 1 << 3, /* Tessellation Evaluation aka Domain Shader */
1872
FS = 1 << 4, /* Fragment aka Pixel Shader */
1873
CS = 1 << 5, /* Compute Shader */
1874
GSCopy = 1 << 6, /* GS Copy Shader (internal) */
1875
1876
/* Stage combinations merged to run on a single HWStage */
1877
VS_GS = VS | GS,
1878
VS_TCS = VS | TCS,
1879
TES_GS = TES | GS,
1880
};
1881
1882
constexpr SWStage
1883
operator|(SWStage a, SWStage b)
1884
{
1885
return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
1886
}
1887
1888
/*
1889
* Shader stages as running on the AMD GPU.
1890
*
1891
* The relation between HWStages and SWStages is not a one-to-one mapping:
1892
* Some SWStages are merged by ACO to run on a single HWStage.
1893
* See README.md for details.
1894
*/
1895
enum class HWStage : uint8_t {
1896
VS,
1897
ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
1898
GS, /* Geometry shader on GFX10/legacy and GFX6-9. */
1899
NGG, /* Primitive shader, used to implement VS, TES, GS. */
1900
LS, /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
1901
HS, /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
1902
FS,
1903
CS,
1904
};
1905
1906
/*
1907
* Set of SWStages to be merged into a single shader paired with the
1908
* HWStage it will run on.
1909
*/
1910
struct Stage {
1911
constexpr Stage() = default;
1912
1913
explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
1914
1915
/* Check if the given SWStage is included */
1916
constexpr bool has(SWStage stage) const
1917
{
1918
return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage));
1919
}
1920
1921
unsigned num_sw_stages() const { return util_bitcount(static_cast<uint8_t>(sw)); }
1922
1923
constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
1924
1925
constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
1926
1927
/* Mask of merged software stages */
1928
SWStage sw = SWStage::None;
1929
1930
/* Active hardware stage */
1931
HWStage hw{};
1932
};
1933
1934
/* possible settings of Program::stage */
1935
static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
1936
static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
1937
static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
1938
static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
1939
static constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
1940
/* GFX10/NGG */
1941
static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
1942
static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
1943
static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
1944
static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
1945
/* GFX9 (and GFX10 if NGG isn't used) */
1946
static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
1947
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
1948
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
1949
/* pre-GFX9 */
1950
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tesselation control */
1951
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
1952
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
1953
static constexpr Stage tess_eval_es(HWStage::ES,
1954
SWStage::TES); /* tesselation evaluation before geometry */
1955
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
1956
1957
enum statistic {
1958
statistic_hash,
1959
statistic_instructions,
1960
statistic_copies,
1961
statistic_branches,
1962
statistic_latency,
1963
statistic_inv_throughput,
1964
statistic_vmem_clauses,
1965
statistic_smem_clauses,
1966
statistic_sgpr_presched,
1967
statistic_vgpr_presched,
1968
num_statistics
1969
};
1970
1971
struct DeviceInfo {
1972
uint16_t lds_encoding_granule;
1973
uint16_t lds_alloc_granule;
1974
uint32_t lds_limit; /* in bytes */
1975
bool has_16bank_lds;
1976
uint16_t physical_sgprs;
1977
uint16_t physical_vgprs;
1978
uint16_t vgpr_limit;
1979
uint16_t sgpr_limit;
1980
uint16_t sgpr_alloc_granule;
1981
uint16_t vgpr_alloc_granule; /* must be power of two */
1982
unsigned max_wave64_per_simd;
1983
unsigned simd_per_cu;
1984
bool has_fast_fma32 = false;
1985
bool xnack_enabled = false;
1986
bool sram_ecc_enabled = false;
1987
};
1988
1989
enum class CompilationProgress {
1990
after_isel,
1991
after_spilling,
1992
after_ra,
1993
};
1994
1995
class Program final {
1996
public:
1997
std::vector<Block> blocks;
1998
std::vector<RegClass> temp_rc = {s1};
1999
RegisterDemand max_reg_demand = RegisterDemand();
2000
uint16_t num_waves = 0;
2001
uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
2002
ac_shader_config* config;
2003
struct radv_shader_info* info;
2004
enum chip_class chip_class;
2005
enum radeon_family family;
2006
DeviceInfo dev;
2007
unsigned wave_size;
2008
RegClass lane_mask;
2009
Stage stage;
2010
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
2011
bool needs_wqm = false; /* there exists a p_wqm instruction */
2012
2013
std::vector<uint8_t> constant_data;
2014
Temp private_segment_buffer;
2015
Temp scratch_offset;
2016
2017
uint16_t min_waves = 0;
2018
unsigned workgroup_size; /* if known; otherwise UINT_MAX */
2019
bool wgp_mode;
2020
bool early_rast = false; /* whether rasterization can start as soon as the 1st DONE pos export */
2021
2022
bool needs_vcc = false;
2023
bool needs_flat_scr = false;
2024
2025
CompilationProgress progress;
2026
2027
bool collect_statistics = false;
2028
uint32_t statistics[num_statistics];
2029
2030
float_mode next_fp_mode;
2031
unsigned next_loop_depth = 0;
2032
unsigned next_divergent_if_logical_depth = 0;
2033
unsigned next_uniform_if_depth = 0;
2034
2035
struct {
2036
FILE* output = stderr;
2037
bool shorten_messages = false;
2038
void (*func)(void* private_data, enum radv_compiler_debug_level level, const char* message);
2039
void* private_data;
2040
} debug;
2041
2042
uint32_t allocateId(RegClass rc)
2043
{
2044
assert(allocationID <= 16777215);
2045
temp_rc.push_back(rc);
2046
return allocationID++;
2047
}
2048
2049
void allocateRange(unsigned amount)
2050
{
2051
assert(allocationID + amount <= 16777216);
2052
temp_rc.resize(temp_rc.size() + amount);
2053
allocationID += amount;
2054
}
2055
2056
Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
2057
2058
uint32_t peekAllocationId() { return allocationID; }
2059
2060
friend void reindex_ssa(Program* program);
2061
friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out);
2062
2063
Block* create_and_insert_block()
2064
{
2065
Block block;
2066
return insert_block(std::move(block));
2067
}
2068
2069
Block* insert_block(Block&& block)
2070
{
2071
block.index = blocks.size();
2072
block.fp_mode = next_fp_mode;
2073
block.loop_nest_depth = next_loop_depth;
2074
block.divergent_if_logical_depth = next_divergent_if_logical_depth;
2075
block.uniform_if_depth = next_uniform_if_depth;
2076
blocks.emplace_back(std::move(block));
2077
return &blocks.back();
2078
}
2079
2080
private:
2081
uint32_t allocationID = 1;
2082
};
2083
2084
struct live {
2085
/* live temps out per block */
2086
std::vector<IDSet> live_out;
2087
/* register demand (sgpr/vgpr) per instruction per block */
2088
std::vector<std::vector<RegisterDemand>> register_demand;
2089
};
2090
2091
struct ra_test_policy {
2092
/* Force RA to always use its pessimistic fallback algorithm */
2093
bool skip_optimistic_path = false;
2094
};
2095
2096
void init();
2097
2098
void init_program(Program* program, Stage stage, struct radv_shader_info* info,
2099
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
2100
ac_shader_config* config);
2101
2102
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
2103
ac_shader_config* config, struct radv_shader_args* args);
2104
void select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
2105
struct radv_shader_args* args);
2106
void select_trap_handler_shader(Program* program, struct nir_shader* shader,
2107
ac_shader_config* config, struct radv_shader_args* args);
2108
2109
void lower_phis(Program* program);
2110
void calc_min_waves(Program* program);
2111
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
2112
live live_var_analysis(Program* program);
2113
std::vector<uint16_t> dead_code_analysis(Program* program);
2114
void dominator_tree(Program* program);
2115
void insert_exec_mask(Program* program);
2116
void value_numbering(Program* program);
2117
void optimize(Program* program);
2118
void optimize_postRA(Program* program);
2119
void setup_reduce_temp(Program* program);
2120
void lower_to_cssa(Program* program, live& live_vars);
2121
void register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
2122
ra_test_policy = {});
2123
void ssa_elimination(Program* program);
2124
void lower_to_hw_instr(Program* program);
2125
void schedule_program(Program* program, live& live_vars);
2126
void spill(Program* program, live& live_vars);
2127
void insert_wait_states(Program* program);
2128
void insert_NOPs(Program* program);
2129
void form_hard_clauses(Program* program);
2130
unsigned emit_program(Program* program, std::vector<uint32_t>& code);
2131
bool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
2132
bool validate_ir(Program* program);
2133
bool validate_ra(Program* program);
2134
#ifndef NDEBUG
2135
void perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL);
2136
#else
2137
#define perfwarn(program, cond, msg, ...) \
2138
do { \
2139
} while (0)
2140
#endif
2141
2142
void collect_presched_stats(Program* program);
2143
void collect_preasm_stats(Program* program);
2144
void collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
2145
2146
enum print_flags {
2147
print_no_ssa = 0x1,
2148
print_perf_info = 0x2,
2149
print_kill = 0x4,
2150
print_live_vars = 0x8,
2151
};
2152
2153
void aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
2154
void aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0);
2155
void aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
2156
void aco_print_program(const Program* program, FILE* output, const live& live_vars,
2157
unsigned flags = 0);
2158
2159
void _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...);
2160
void _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
2161
2162
#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
2163
#define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
2164
2165
/* utilities for dealing with register demand */
2166
RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
2167
RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
2168
RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
2169
aco_ptr<Instruction>& instr_before);
2170
2171
/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
2172
uint16_t get_extra_sgprs(Program* program);
2173
2174
/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
2175
uint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
2176
uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
2177
2178
/* return number of addressable sgprs/vgprs for max_waves */
2179
uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
2180
uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
2181
2182
typedef struct {
2183
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
2184
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
2185
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
2186
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
2187
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
2188
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
2189
const char* name[static_cast<int>(aco_opcode::num_opcodes)];
2190
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
2191
/* sizes used for input/output modifiers and constants */
2192
const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
2193
const unsigned definition_size[static_cast<int>(aco_opcode::num_opcodes)];
2194
const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
2195
} Info;
2196
2197
extern const Info instr_info;
2198
2199
} // namespace aco
2200
2201
#endif /* ACO_IR_H */
2202
2203