#ifndef ACO_IR_H
#define ACO_IR_H
#include "aco_opcodes.h"
#include "aco_util.h"
#include "vulkan/radv_shader.h"
#include "nir.h"
#include <bitset>
#include <memory>
#include <vector>
struct radv_shader_args;
struct radv_shader_info;
namespace aco {
extern uint64_t debug_flags;
enum {
DEBUG_VALIDATE_IR = 0x1,
DEBUG_VALIDATE_RA = 0x2,
DEBUG_PERFWARN = 0x4,
DEBUG_FORCE_WAITCNT = 0x8,
DEBUG_NO_VN = 0x10,
DEBUG_NO_OPT = 0x20,
DEBUG_NO_SCHED = 0x40,
DEBUG_PERF_INFO = 0x80,
DEBUG_LIVE_INFO = 0x100,
};
enum class Format : std::uint16_t {
PSEUDO = 0,
SOP1 = 1,
SOP2 = 2,
SOPK = 3,
SOPP = 4,
SOPC = 5,
SMEM = 6,
DS = 8,
MTBUF = 9,
MUBUF = 10,
MIMG = 11,
EXP = 12,
FLAT = 13,
GLOBAL = 14,
SCRATCH = 15,
PSEUDO_BRANCH = 16,
PSEUDO_BARRIER = 17,
PSEUDO_REDUCTION = 18,
VOP3P = 19,
VOP1 = 1 << 8,
VOP2 = 1 << 9,
VOPC = 1 << 10,
VOP3 = 1 << 11,
VINTRP = 1 << 12,
DPP = 1 << 13,
SDWA = 1 << 14,
};
enum class instr_class : uint8_t {
valu32 = 0,
valu_convert32 = 1,
valu64 = 2,
valu_quarter_rate32 = 3,
valu_fma = 4,
valu_transcendental32 = 5,
valu_double = 6,
valu_double_add = 7,
valu_double_convert = 8,
valu_double_transcendental = 9,
salu = 10,
smem = 11,
barrier = 12,
branch = 13,
sendmsg = 14,
ds = 15,
exp = 16,
vmem = 17,
waitcnt = 18,
other = 19,
count,
};
enum storage_class : uint8_t {
storage_none = 0x0,
storage_buffer = 0x1,
storage_atomic_counter = 0x2,
storage_image = 0x4,
storage_shared = 0x8,
storage_vmem_output = 0x10,
storage_scratch = 0x20,
storage_vgpr_spill = 0x40,
storage_count = 8,
};
enum memory_semantics : uint8_t {
semantic_none = 0x0,
semantic_acquire = 0x1,
semantic_release = 0x2,
semantic_volatile = 0x4,
semantic_private = 0x8,
semantic_can_reorder = 0x10,
semantic_atomic = 0x20,
semantic_rmw = 0x40,
semantic_acqrel = semantic_acquire | semantic_release,
semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
};
enum sync_scope : uint8_t {
scope_invocation = 0,
scope_subgroup = 1,
scope_workgroup = 2,
scope_queuefamily = 3,
scope_device = 4,
};
struct memory_sync_info {
memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
memory_sync_info(int storage_, int semantics_ = 0, sync_scope scope_ = scope_invocation)
: storage((storage_class)storage_), semantics((memory_semantics)semantics_), scope(scope_)
{}
storage_class storage : 8;
memory_semantics semantics : 8;
sync_scope scope : 8;
bool operator==(const memory_sync_info& rhs) const
{
return storage == rhs.storage && semantics == rhs.semantics && scope == rhs.scope;
}
bool can_reorder() const
{
if (semantics & semantic_acqrel)
return false;
return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
}
};
static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
enum fp_round {
fp_round_ne = 0,
fp_round_pi = 1,
fp_round_ni = 2,
fp_round_tz = 3,
};
enum fp_denorm {
fp_denorm_flush = 0x0,
fp_denorm_keep_in = 0x1,
fp_denorm_keep_out = 0x2,
fp_denorm_keep = 0x3,
};
struct float_mode {
union {
struct {
fp_round round32 : 2;
fp_round round16_64 : 2;
unsigned denorm32 : 2;
unsigned denorm16_64 : 2;
};
struct {
uint8_t round : 4;
uint8_t denorm : 4;
};
uint8_t val = 0;
};
bool preserve_signed_zero_inf_nan32 : 1;
bool preserve_signed_zero_inf_nan16_64 : 1;
bool must_flush_denorms32 : 1;
bool must_flush_denorms16_64 : 1;
bool care_about_round32 : 1;
bool care_about_round16_64 : 1;
bool canReplace(float_mode other) const noexcept
{
return val == other.val &&
(preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
(preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
(must_flush_denorms32 || !other.must_flush_denorms32) &&
(must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
(care_about_round32 || !other.care_about_round32) &&
(care_about_round16_64 || !other.care_about_round16_64);
}
};
struct wait_imm {
static const uint8_t unset_counter = 0xff;
uint8_t vm;
uint8_t exp;
uint8_t lgkm;
uint8_t vs;
wait_imm();
wait_imm(uint16_t vm_, uint16_t exp_, uint16_t lgkm_, uint16_t vs_);
wait_imm(enum chip_class chip, uint16_t packed);
uint16_t pack(enum chip_class chip) const;
bool combine(const wait_imm& other);
bool empty() const;
};
constexpr Format
asVOP3(Format format)
{
return (Format)((uint32_t)Format::VOP3 | (uint32_t)format);
};
constexpr Format
asSDWA(Format format)
{
assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
}
enum class RegType {
none = 0,
sgpr,
vgpr,
linear_vgpr,
};
struct RegClass {
enum RC : uint8_t {
s1 = 1,
s2 = 2,
s3 = 3,
s4 = 4,
s6 = 6,
s8 = 8,
s16 = 16,
v1 = s1 | (1 << 5),
v2 = s2 | (1 << 5),
v3 = s3 | (1 << 5),
v4 = s4 | (1 << 5),
v5 = 5 | (1 << 5),
v6 = 6 | (1 << 5),
v7 = 7 | (1 << 5),
v8 = 8 | (1 << 5),
v1b = v1 | (1 << 7),
v2b = v2 | (1 << 7),
v3b = v3 | (1 << 7),
v4b = v4 | (1 << 7),
v6b = v6 | (1 << 7),
v8b = v8 | (1 << 7),
v1_linear = v1 | (1 << 6),
v2_linear = v2 | (1 << 6),
};
RegClass() = default;
constexpr RegClass(RC rc_) : rc(rc_) {}
constexpr RegClass(RegType type, unsigned size)
: rc((RC)((type == RegType::vgpr ? 1 << 5 : 0) | size))
{}
constexpr operator RC() const { return rc; }
explicit operator bool() = delete;
constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
constexpr bool is_subdword() const { return rc & (1 << 7); }
constexpr unsigned bytes() const { return ((unsigned)rc & 0x1F) * (is_subdword() ? 1 : 4); }
constexpr unsigned size() const { return (bytes() + 3) >> 2; }
constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
constexpr RegClass as_linear() const { return RegClass((RC)(rc | (1 << 6))); }
constexpr RegClass as_subdword() const { return RegClass((RC)(rc | 1 << 7)); }
static constexpr RegClass get(RegType type, unsigned bytes)
{
if (type == RegType::sgpr) {
return RegClass(type, DIV_ROUND_UP(bytes, 4u));
} else {
return bytes % 4u ? RegClass(type, bytes).as_subdword() : RegClass(type, bytes / 4u);
}
}
private:
RC rc;
};
static constexpr RegClass s1{RegClass::s1};
static constexpr RegClass s2{RegClass::s2};
static constexpr RegClass s3{RegClass::s3};
static constexpr RegClass s4{RegClass::s4};
static constexpr RegClass s8{RegClass::s8};
static constexpr RegClass s16{RegClass::s16};
static constexpr RegClass v1{RegClass::v1};
static constexpr RegClass v2{RegClass::v2};
static constexpr RegClass v3{RegClass::v3};
static constexpr RegClass v4{RegClass::v4};
static constexpr RegClass v5{RegClass::v5};
static constexpr RegClass v6{RegClass::v6};
static constexpr RegClass v7{RegClass::v7};
static constexpr RegClass v8{RegClass::v8};
static constexpr RegClass v1b{RegClass::v1b};
static constexpr RegClass v2b{RegClass::v2b};
static constexpr RegClass v3b{RegClass::v3b};
static constexpr RegClass v4b{RegClass::v4b};
static constexpr RegClass v6b{RegClass::v6b};
static constexpr RegClass v8b{RegClass::v8b};
struct Temp {
Temp() noexcept : id_(0), reg_class(0) {}
constexpr Temp(uint32_t id, RegClass cls) noexcept : id_(id), reg_class(uint8_t(cls)) {}
constexpr uint32_t id() const noexcept { return id_; }
constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
constexpr unsigned size() const noexcept { return regClass().size(); }
constexpr RegType type() const noexcept { return regClass().type(); }
constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
constexpr bool operator<(Temp other) const noexcept { return id() < other.id(); }
constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
private:
uint32_t id_ : 24;
uint32_t reg_class : 8;
};
struct PhysReg {
constexpr PhysReg() = default;
explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
constexpr unsigned reg() const { return reg_b >> 2; }
constexpr unsigned byte() const { return reg_b & 0x3; }
constexpr operator unsigned() const { return reg(); }
constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
constexpr bool operator<(PhysReg other) const { return reg_b < other.reg_b; }
constexpr PhysReg advance(int bytes) const
{
PhysReg res = *this;
res.reg_b += bytes;
return res;
}
uint16_t reg_b = 0;
};
static constexpr PhysReg m0{124};
static constexpr PhysReg vcc{106};
static constexpr PhysReg vcc_hi{107};
static constexpr PhysReg tba{108};
static constexpr PhysReg tma{110};
static constexpr PhysReg ttmp0{112};
static constexpr PhysReg ttmp1{113};
static constexpr PhysReg ttmp2{114};
static constexpr PhysReg ttmp3{115};
static constexpr PhysReg ttmp4{116};
static constexpr PhysReg ttmp5{117};
static constexpr PhysReg ttmp6{118};
static constexpr PhysReg ttmp7{119};
static constexpr PhysReg ttmp8{120};
static constexpr PhysReg ttmp9{121};
static constexpr PhysReg ttmp10{122};
static constexpr PhysReg ttmp11{123};
static constexpr PhysReg sgpr_null{125};
static constexpr PhysReg exec{126};
static constexpr PhysReg exec_lo{126};
static constexpr PhysReg exec_hi{127};
static constexpr PhysReg vccz{251};
static constexpr PhysReg execz{252};
static constexpr PhysReg scc{253};
class Operand final {
public:
constexpr Operand()
: reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false), isKill_(false),
isUndef_(true), isFirstKill_(false), constSize(0), isLateKill_(false), is16bit_(false),
is24bit_(false), signext(false)
{}
explicit Operand(Temp r) noexcept
{
data_.temp = r;
if (r.id()) {
isTemp_ = true;
} else {
isUndef_ = true;
setFixed(PhysReg{128});
}
};
explicit Operand(Temp r, PhysReg reg) noexcept
{
assert(r.id());
data_.temp = r;
isTemp_ = true;
setFixed(reg);
};
static Operand c8(uint8_t v) noexcept
{
Operand op;
op.control_ = 0;
op.data_.i = v;
op.isConstant_ = true;
op.constSize = 0;
op.setFixed(PhysReg{0u});
return op;
};
static Operand c16(uint16_t v) noexcept
{
Operand op;
op.control_ = 0;
op.data_.i = v;
op.isConstant_ = true;
op.constSize = 1;
if (v <= 64)
op.setFixed(PhysReg{128u + v});
else if (v >= 0xFFF0)
op.setFixed(PhysReg{(unsigned)(192 - (int16_t)v)});
else if (v == 0x3800)
op.setFixed(PhysReg{240});
else if (v == 0xB800)
op.setFixed(PhysReg{241});
else if (v == 0x3C00)
op.setFixed(PhysReg{242});
else if (v == 0xBC00)
op.setFixed(PhysReg{243});
else if (v == 0x4000)
op.setFixed(PhysReg{244});
else if (v == 0xC000)
op.setFixed(PhysReg{245});
else if (v == 0x4400)
op.setFixed(PhysReg{246});
else if (v == 0xC400)
op.setFixed(PhysReg{247});
else if (v == 0x3118)
op.setFixed(PhysReg{248});
else
op.setFixed(PhysReg{255});
return op;
}
static Operand c32(uint32_t v) noexcept { return c32_or_c64(v, false); }
static Operand c64(uint64_t v) noexcept
{
Operand op;
op.control_ = 0;
op.isConstant_ = true;
op.constSize = 3;
if (v <= 64) {
op.data_.i = (uint32_t)v;
op.setFixed(PhysReg{128 + (uint32_t)v});
} else if (v >= 0xFFFFFFFFFFFFFFF0) {
op.data_.i = (uint32_t)v;
op.setFixed(PhysReg{192 - (uint32_t)v});
} else if (v == 0x3FE0000000000000) {
op.data_.i = 0x3f000000;
op.setFixed(PhysReg{240});
} else if (v == 0xBFE0000000000000) {
op.data_.i = 0xbf000000;
op.setFixed(PhysReg{241});
} else if (v == 0x3FF0000000000000) {
op.data_.i = 0x3f800000;
op.setFixed(PhysReg{242});
} else if (v == 0xBFF0000000000000) {
op.data_.i = 0xbf800000;
op.setFixed(PhysReg{243});
} else if (v == 0x4000000000000000) {
op.data_.i = 0x40000000;
op.setFixed(PhysReg{244});
} else if (v == 0xC000000000000000) {
op.data_.i = 0xc0000000;
op.setFixed(PhysReg{245});
} else if (v == 0x4010000000000000) {
op.data_.i = 0x40800000;
op.setFixed(PhysReg{246});
} else if (v == 0xC010000000000000) {
op.data_.i = 0xc0800000;
op.setFixed(PhysReg{247});
} else {
op.signext = v >> 63;
op.data_.i = v & 0xffffffffu;
op.setFixed(PhysReg{255});
assert(op.constantValue64() == v &&
"attempt to create a unrepresentable 64-bit literal constant");
}
return op;
}
static Operand c32_or_c64(uint32_t v, bool is64bit) noexcept
{
Operand op;
op.control_ = 0;
op.data_.i = v;
op.isConstant_ = true;
op.constSize = is64bit ? 3 : 2;
if (v <= 64)
op.setFixed(PhysReg{128 + v});
else if (v >= 0xFFFFFFF0)
op.setFixed(PhysReg{192 - v});
else if (v == 0x3f000000)
op.setFixed(PhysReg{240});
else if (v == 0xbf000000)
op.setFixed(PhysReg{241});
else if (v == 0x3f800000)
op.setFixed(PhysReg{242});
else if (v == 0xbf800000)
op.setFixed(PhysReg{243});
else if (v == 0x40000000)
op.setFixed(PhysReg{244});
else if (v == 0xc0000000)
op.setFixed(PhysReg{245});
else if (v == 0x40800000)
op.setFixed(PhysReg{246});
else if (v == 0xc0800000)
op.setFixed(PhysReg{247});
else {
assert(!is64bit && "attempt to create a 64-bit literal constant");
op.setFixed(PhysReg{255});
}
return op;
}
explicit Operand(RegClass type) noexcept
{
isUndef_ = true;
data_.temp = Temp(0, type);
setFixed(PhysReg{128});
};
explicit Operand(PhysReg reg, RegClass type) noexcept
{
data_.temp = Temp(0, type);
setFixed(reg);
}
static Operand zero(unsigned bytes = 4) noexcept
{
if (bytes == 8)
return Operand::c64(0);
else if (bytes == 4)
return Operand::c32(0);
else if (bytes == 2)
return Operand::c16(0);
assert(bytes == 1);
return Operand::c8(0);
}
static Operand get_const(enum chip_class chip, uint64_t val, unsigned bytes)
{
if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) {
Operand op = Operand::c32(val);
op.setFixed(PhysReg{248});
return op;
}
if (bytes == 8)
return Operand::c64(val);
else if (bytes == 4)
return Operand::c32(val);
else if (bytes == 2)
return Operand::c16(val);
assert(bytes == 1);
return Operand::c8(val);
}
static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false,
bool sext = false)
{
if (bytes <= 4)
return true;
if (zext && (val & 0xFFFFFFFF00000000) == 0x0000000000000000)
return true;
uint64_t upper33 = val & 0xFFFFFFFF80000000;
if (sext && (upper33 == 0xFFFFFFFF80000000 || upper33 == 0))
return true;
return val >= 0xFFFFFFFFFFFFFFF0 || val <= 64 ||
val == 0x3FE0000000000000 ||
val == 0xBFE0000000000000 ||
val == 0x3FF0000000000000 ||
val == 0xBFF0000000000000 ||
val == 0x4000000000000000 ||
val == 0xC000000000000000 ||
val == 0x4010000000000000 ||
val == 0xC010000000000000;
}
constexpr bool isTemp() const noexcept { return isTemp_; }
constexpr void setTemp(Temp t) noexcept
{
assert(!isConstant_);
isTemp_ = true;
data_.temp = t;
}
constexpr Temp getTemp() const noexcept { return data_.temp; }
constexpr uint32_t tempId() const noexcept { return data_.temp.id(); }
constexpr bool hasRegClass() const noexcept { return isTemp() || isUndefined(); }
constexpr RegClass regClass() const noexcept { return data_.temp.regClass(); }
constexpr unsigned bytes() const noexcept
{
if (isConstant())
return 1 << constSize;
else
return data_.temp.bytes();
}
constexpr unsigned size() const noexcept
{
if (isConstant())
return constSize > 2 ? 2 : 1;
else
return data_.temp.size();
}
constexpr bool isFixed() const noexcept { return isFixed_; }
constexpr PhysReg physReg() const noexcept { return reg_; }
constexpr void setFixed(PhysReg reg) noexcept
{
isFixed_ = reg != unsigned(-1);
reg_ = reg;
}
constexpr bool isConstant() const noexcept { return isConstant_; }
constexpr bool isLiteral() const noexcept { return isConstant() && reg_ == 255; }
constexpr bool isUndefined() const noexcept { return isUndef_; }
constexpr uint32_t constantValue() const noexcept { return data_.i; }
constexpr bool constantEquals(uint32_t cmp) const noexcept
{
return isConstant() && constantValue() == cmp;
}
constexpr uint64_t constantValue64() const noexcept
{
if (constSize == 3) {
if (reg_ <= 192)
return reg_ - 128;
else if (reg_ <= 208)
return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
switch (reg_) {
case 240: return 0x3FE0000000000000;
case 241: return 0xBFE0000000000000;
case 242: return 0x3FF0000000000000;
case 243: return 0xBFF0000000000000;
case 244: return 0x4000000000000000;
case 245: return 0xC000000000000000;
case 246: return 0x4010000000000000;
case 247: return 0xC010000000000000;
case 255:
return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
}
unreachable("invalid register for 64-bit constant");
} else {
return data_.i;
}
}
constexpr bool isOfType(RegType type) const noexcept
{
return hasRegClass() && regClass().type() == type;
}
constexpr void setLateKill(bool flag) noexcept { isLateKill_ = flag; }
constexpr bool isLateKill() const noexcept { return isLateKill_; }
constexpr void setKill(bool flag) noexcept
{
isKill_ = flag;
if (!flag)
setFirstKill(false);
}
constexpr bool isKill() const noexcept { return isKill_ || isFirstKill(); }
constexpr void setFirstKill(bool flag) noexcept
{
isFirstKill_ = flag;
if (flag)
setKill(flag);
}
constexpr bool isFirstKill() const noexcept { return isFirstKill_; }
constexpr bool isKillBeforeDef() const noexcept { return isKill() && !isLateKill(); }
constexpr bool isFirstKillBeforeDef() const noexcept { return isFirstKill() && !isLateKill(); }
constexpr bool operator==(Operand other) const noexcept
{
if (other.size() != size())
return false;
if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
return false;
if (isFixed() && other.isFixed() && physReg() != other.physReg())
return false;
if (isLiteral())
return other.isLiteral() && other.constantValue() == constantValue();
else if (isConstant())
return other.isConstant() && other.physReg() == physReg();
else if (isUndefined())
return other.isUndefined() && other.regClass() == regClass();
else
return other.isTemp() && other.getTemp() == getTemp();
}
constexpr bool operator!=(Operand other) const noexcept { return !operator==(other); }
constexpr void set16bit(bool flag) noexcept { is16bit_ = flag; }
constexpr bool is16bit() const noexcept { return is16bit_; }
constexpr void set24bit(bool flag) noexcept { is24bit_ = flag; }
constexpr bool is24bit() const noexcept { return is24bit_; }
private:
union {
Temp temp;
uint32_t i;
float f;
} data_ = {Temp(0, s1)};
PhysReg reg_;
union {
struct {
uint8_t isTemp_ : 1;
uint8_t isFixed_ : 1;
uint8_t isConstant_ : 1;
uint8_t isKill_ : 1;
uint8_t isUndef_ : 1;
uint8_t isFirstKill_ : 1;
uint8_t constSize : 2;
uint8_t isLateKill_ : 1;
uint8_t is16bit_ : 1;
uint8_t is24bit_ : 1;
uint8_t signext : 1;
};
uint16_t control_ = 0;
};
};
class Definition final {
public:
constexpr Definition()
: temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0), isPrecise_(0), isNUW_(0),
isNoCSE_(0)
{}
Definition(uint32_t index, RegClass type) noexcept : temp(index, type) {}
explicit Definition(Temp tmp) noexcept : temp(tmp) {}
Definition(PhysReg reg, RegClass type) noexcept : temp(Temp(0, type)) { setFixed(reg); }
Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept : temp(Temp(tmpId, type))
{
setFixed(reg);
}
constexpr bool isTemp() const noexcept { return tempId() > 0; }
constexpr Temp getTemp() const noexcept { return temp; }
constexpr uint32_t tempId() const noexcept { return temp.id(); }
constexpr void setTemp(Temp t) noexcept { temp = t; }
void swapTemp(Definition& other) noexcept { std::swap(temp, other.temp); }
constexpr RegClass regClass() const noexcept { return temp.regClass(); }
constexpr unsigned bytes() const noexcept { return temp.bytes(); }
constexpr unsigned size() const noexcept { return temp.size(); }
constexpr bool isFixed() const noexcept { return isFixed_; }
constexpr PhysReg physReg() const noexcept { return reg_; }
constexpr void setFixed(PhysReg reg) noexcept
{
isFixed_ = 1;
reg_ = reg;
}
constexpr void setHint(PhysReg reg) noexcept
{
hasHint_ = 1;
reg_ = reg;
}
constexpr bool hasHint() const noexcept { return hasHint_; }
constexpr void setKill(bool flag) noexcept { isKill_ = flag; }
constexpr bool isKill() const noexcept { return isKill_; }
constexpr void setPrecise(bool precise) noexcept { isPrecise_ = precise; }
constexpr bool isPrecise() const noexcept { return isPrecise_; }
constexpr void setNUW(bool nuw) noexcept { isNUW_ = nuw; }
constexpr bool isNUW() const noexcept { return isNUW_; }
constexpr void setNoCSE(bool noCSE) noexcept { isNoCSE_ = noCSE; }
constexpr bool isNoCSE() const noexcept { return isNoCSE_; }
private:
Temp temp = Temp(0, s1);
PhysReg reg_;
union {
struct {
uint8_t isFixed_ : 1;
uint8_t hasHint_ : 1;
uint8_t isKill_ : 1;
uint8_t isPrecise_ : 1;
uint8_t isNUW_ : 1;
uint8_t isNoCSE_ : 1;
};
uint8_t control_ = 0;
};
};
struct Block;
struct Instruction;
struct Pseudo_instruction;
struct SOP1_instruction;
struct SOP2_instruction;
struct SOPK_instruction;
struct SOPP_instruction;
struct SOPC_instruction;
struct SMEM_instruction;
struct DS_instruction;
struct MTBUF_instruction;
struct MUBUF_instruction;
struct MIMG_instruction;
struct Export_instruction;
struct FLAT_instruction;
struct Pseudo_branch_instruction;
struct Pseudo_barrier_instruction;
struct Pseudo_reduction_instruction;
struct VOP3P_instruction;
struct VOP1_instruction;
struct VOP2_instruction;
struct VOPC_instruction;
struct VOP3_instruction;
struct Interp_instruction;
struct DPP_instruction;
struct SDWA_instruction;
struct Instruction {
aco_opcode opcode;
Format format;
uint32_t pass_flags;
aco::span<Operand> operands;
aco::span<Definition> definitions;
constexpr bool usesModifiers() const noexcept;
constexpr bool reads_exec() const noexcept
{
for (const Operand& op : operands) {
if (op.isFixed() && op.physReg() == exec)
return true;
}
return false;
}
Pseudo_instruction& pseudo() noexcept
{
assert(isPseudo());
return *(Pseudo_instruction*)this;
}
const Pseudo_instruction& pseudo() const noexcept
{
assert(isPseudo());
return *(Pseudo_instruction*)this;
}
constexpr bool isPseudo() const noexcept { return format == Format::PSEUDO; }
SOP1_instruction& sop1() noexcept
{
assert(isSOP1());
return *(SOP1_instruction*)this;
}
const SOP1_instruction& sop1() const noexcept
{
assert(isSOP1());
return *(SOP1_instruction*)this;
}
constexpr bool isSOP1() const noexcept { return format == Format::SOP1; }
SOP2_instruction& sop2() noexcept
{
assert(isSOP2());
return *(SOP2_instruction*)this;
}
const SOP2_instruction& sop2() const noexcept
{
assert(isSOP2());
return *(SOP2_instruction*)this;
}
constexpr bool isSOP2() const noexcept { return format == Format::SOP2; }
SOPK_instruction& sopk() noexcept
{
assert(isSOPK());
return *(SOPK_instruction*)this;
}
const SOPK_instruction& sopk() const noexcept
{
assert(isSOPK());
return *(SOPK_instruction*)this;
}
constexpr bool isSOPK() const noexcept { return format == Format::SOPK; }
SOPP_instruction& sopp() noexcept
{
assert(isSOPP());
return *(SOPP_instruction*)this;
}
const SOPP_instruction& sopp() const noexcept
{
assert(isSOPP());
return *(SOPP_instruction*)this;
}
constexpr bool isSOPP() const noexcept { return format == Format::SOPP; }
SOPC_instruction& sopc() noexcept
{
assert(isSOPC());
return *(SOPC_instruction*)this;
}
const SOPC_instruction& sopc() const noexcept
{
assert(isSOPC());
return *(SOPC_instruction*)this;
}
constexpr bool isSOPC() const noexcept { return format == Format::SOPC; }
SMEM_instruction& smem() noexcept
{
assert(isSMEM());
return *(SMEM_instruction*)this;
}
const SMEM_instruction& smem() const noexcept
{
assert(isSMEM());
return *(SMEM_instruction*)this;
}
constexpr bool isSMEM() const noexcept { return format == Format::SMEM; }
DS_instruction& ds() noexcept
{
assert(isDS());
return *(DS_instruction*)this;
}
const DS_instruction& ds() const noexcept
{
assert(isDS());
return *(DS_instruction*)this;
}
constexpr bool isDS() const noexcept { return format == Format::DS; }
MTBUF_instruction& mtbuf() noexcept
{
assert(isMTBUF());
return *(MTBUF_instruction*)this;
}
const MTBUF_instruction& mtbuf() const noexcept
{
assert(isMTBUF());
return *(MTBUF_instruction*)this;
}
constexpr bool isMTBUF() const noexcept { return format == Format::MTBUF; }
MUBUF_instruction& mubuf() noexcept
{
assert(isMUBUF());
return *(MUBUF_instruction*)this;
}
const MUBUF_instruction& mubuf() const noexcept
{
assert(isMUBUF());
return *(MUBUF_instruction*)this;
}
constexpr bool isMUBUF() const noexcept { return format == Format::MUBUF; }
MIMG_instruction& mimg() noexcept
{
assert(isMIMG());
return *(MIMG_instruction*)this;
}
const MIMG_instruction& mimg() const noexcept
{
assert(isMIMG());
return *(MIMG_instruction*)this;
}
constexpr bool isMIMG() const noexcept { return format == Format::MIMG; }
Export_instruction& exp() noexcept
{
assert(isEXP());
return *(Export_instruction*)this;
}
const Export_instruction& exp() const noexcept
{
assert(isEXP());
return *(Export_instruction*)this;
}
constexpr bool isEXP() const noexcept { return format == Format::EXP; }
FLAT_instruction& flat() noexcept
{
assert(isFlat());
return *(FLAT_instruction*)this;
}
const FLAT_instruction& flat() const noexcept
{
assert(isFlat());
return *(FLAT_instruction*)this;
}
constexpr bool isFlat() const noexcept { return format == Format::FLAT; }
FLAT_instruction& global() noexcept
{
assert(isGlobal());
return *(FLAT_instruction*)this;
}
const FLAT_instruction& global() const noexcept
{
assert(isGlobal());
return *(FLAT_instruction*)this;
}
constexpr bool isGlobal() const noexcept { return format == Format::GLOBAL; }
FLAT_instruction& scratch() noexcept
{
assert(isScratch());
return *(FLAT_instruction*)this;
}
const FLAT_instruction& scratch() const noexcept
{
assert(isScratch());
return *(FLAT_instruction*)this;
}
constexpr bool isScratch() const noexcept { return format == Format::SCRATCH; }
Pseudo_branch_instruction& branch() noexcept
{
assert(isBranch());
return *(Pseudo_branch_instruction*)this;
}
const Pseudo_branch_instruction& branch() const noexcept
{
assert(isBranch());
return *(Pseudo_branch_instruction*)this;
}
constexpr bool isBranch() const noexcept { return format == Format::PSEUDO_BRANCH; }
Pseudo_barrier_instruction& barrier() noexcept
{
assert(isBarrier());
return *(Pseudo_barrier_instruction*)this;
}
const Pseudo_barrier_instruction& barrier() const noexcept
{
assert(isBarrier());
return *(Pseudo_barrier_instruction*)this;
}
constexpr bool isBarrier() const noexcept { return format == Format::PSEUDO_BARRIER; }
Pseudo_reduction_instruction& reduction() noexcept
{
assert(isReduction());
return *(Pseudo_reduction_instruction*)this;
}
const Pseudo_reduction_instruction& reduction() const noexcept
{
assert(isReduction());
return *(Pseudo_reduction_instruction*)this;
}
constexpr bool isReduction() const noexcept { return format == Format::PSEUDO_REDUCTION; }
VOP3P_instruction& vop3p() noexcept
{
assert(isVOP3P());
return *(VOP3P_instruction*)this;
}
const VOP3P_instruction& vop3p() const noexcept
{
assert(isVOP3P());
return *(VOP3P_instruction*)this;
}
constexpr bool isVOP3P() const noexcept { return format == Format::VOP3P; }
VOP1_instruction& vop1() noexcept
{
assert(isVOP1());
return *(VOP1_instruction*)this;
}
const VOP1_instruction& vop1() const noexcept
{
assert(isVOP1());
return *(VOP1_instruction*)this;
}
constexpr bool isVOP1() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP1; }
VOP2_instruction& vop2() noexcept
{
assert(isVOP2());
return *(VOP2_instruction*)this;
}
const VOP2_instruction& vop2() const noexcept
{
assert(isVOP2());
return *(VOP2_instruction*)this;
}
constexpr bool isVOP2() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP2; }
VOPC_instruction& vopc() noexcept
{
assert(isVOPC());
return *(VOPC_instruction*)this;
}
const VOPC_instruction& vopc() const noexcept
{
assert(isVOPC());
return *(VOPC_instruction*)this;
}
constexpr bool isVOPC() const noexcept { return (uint16_t)format & (uint16_t)Format::VOPC; }
VOP3_instruction& vop3() noexcept
{
assert(isVOP3());
return *(VOP3_instruction*)this;
}
const VOP3_instruction& vop3() const noexcept
{
assert(isVOP3());
return *(VOP3_instruction*)this;
}
constexpr bool isVOP3() const noexcept { return (uint16_t)format & (uint16_t)Format::VOP3; }
Interp_instruction& vintrp() noexcept
{
assert(isVINTRP());
return *(Interp_instruction*)this;
}
const Interp_instruction& vintrp() const noexcept
{
assert(isVINTRP());
return *(Interp_instruction*)this;
}
constexpr bool isVINTRP() const noexcept { return (uint16_t)format & (uint16_t)Format::VINTRP; }
DPP_instruction& dpp() noexcept
{
assert(isDPP());
return *(DPP_instruction*)this;
}
const DPP_instruction& dpp() const noexcept
{
assert(isDPP());
return *(DPP_instruction*)this;
}
constexpr bool isDPP() const noexcept { return (uint16_t)format & (uint16_t)Format::DPP; }
SDWA_instruction& sdwa() noexcept
{
assert(isSDWA());
return *(SDWA_instruction*)this;
}
const SDWA_instruction& sdwa() const noexcept
{
assert(isSDWA());
return *(SDWA_instruction*)this;
}
constexpr bool isSDWA() const noexcept { return (uint16_t)format & (uint16_t)Format::SDWA; }
FLAT_instruction& flatlike() { return *(FLAT_instruction*)this; }
const FLAT_instruction& flatlike() const { return *(FLAT_instruction*)this; }
constexpr bool isFlatLike() const noexcept { return isFlat() || isGlobal() || isScratch(); }
constexpr bool isVALU() const noexcept
{
return isVOP1() || isVOP2() || isVOPC() || isVOP3() || isVOP3P();
}
constexpr bool isSALU() const noexcept
{
return isSOP1() || isSOP2() || isSOPC() || isSOPK() || isSOPP();
}
constexpr bool isVMEM() const noexcept { return isMTBUF() || isMUBUF() || isMIMG(); }
};
static_assert(sizeof(Instruction) == 16, "Unexpected padding");
struct SOPK_instruction : public Instruction {
uint16_t imm;
uint16_t padding;
};
static_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct SOPP_instruction : public Instruction {
uint32_t imm;
int block;
};
static_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct SOPC_instruction : public Instruction {};
static_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct SOP1_instruction : public Instruction {};
static_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct SOP2_instruction : public Instruction {};
static_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct SMEM_instruction : public Instruction {
memory_sync_info sync;
bool glc : 1;
bool dlc : 1;
bool nv : 1;
bool disable_wqm : 1;
bool prevent_overflow : 1;
uint8_t padding : 3;
};
static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct VOP1_instruction : public Instruction {};
static_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOP2_instruction : public Instruction {};
static_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOPC_instruction : public Instruction {};
static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOP3_instruction : public Instruction {
bool abs[3];
bool neg[3];
uint8_t opsel : 4;
uint8_t omod : 2;
bool clamp : 1;
uint8_t padding0 : 1;
uint8_t padding1;
};
static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct VOP3P_instruction : public Instruction {
bool neg_lo[3];
bool neg_hi[3];
uint8_t opsel_lo : 3;
uint8_t opsel_hi : 3;
bool clamp : 1;
uint8_t padding0 : 1;
uint8_t padding1;
};
static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct DPP_instruction : public Instruction {
bool abs[2];
bool neg[2];
uint16_t dpp_ctrl;
uint8_t row_mask : 4;
uint8_t bank_mask : 4;
bool bound_ctrl : 1;
uint8_t padding : 7;
};
static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
enum sdwa_sel : uint8_t {
sdwa_wordnum = 0x1,
sdwa_bytenum = 0x3,
sdwa_asuint = 0x7 | 0x10,
sdwa_rasize = 0x3,
sdwa_isword = 0x4,
sdwa_sext = 0x8,
sdwa_isra = 0x10,
sdwa_ubyte0 = 0,
sdwa_ubyte1 = 1,
sdwa_ubyte2 = 2,
sdwa_ubyte3 = 3,
sdwa_uword0 = sdwa_isword | 0,
sdwa_uword1 = sdwa_isword | 1,
sdwa_udword = 6,
sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
sdwa_sword0 = sdwa_uword0 | sdwa_sext,
sdwa_sword1 = sdwa_uword1 | sdwa_sext,
sdwa_sdword = sdwa_udword | sdwa_sext,
sdwa_ubyte = 1 | sdwa_isra,
sdwa_uword = 2 | sdwa_isra,
sdwa_sbyte = sdwa_ubyte | sdwa_sext,
sdwa_sword = sdwa_uword | sdwa_sext,
};
struct SDWA_instruction : public Instruction {
uint8_t sel[2];
uint8_t dst_sel;
bool neg[2];
bool abs[2];
bool dst_preserve : 1;
bool clamp : 1;
uint8_t omod : 2;
uint8_t padding : 4;
};
static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Interp_instruction : public Instruction {
uint8_t attribute;
uint8_t component;
uint16_t padding;
};
static_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct DS_instruction : public Instruction {
memory_sync_info sync;
bool gds;
int16_t offset0;
int8_t offset1;
uint8_t padding;
};
static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct MUBUF_instruction : public Instruction {
memory_sync_info sync;
bool offen : 1;
bool idxen : 1;
bool addr64 : 1;
bool glc : 1;
bool dlc : 1;
bool slc : 1;
bool tfe : 1;
bool lds : 1;
uint16_t disable_wqm : 1;
uint16_t offset : 12;
uint16_t swizzled : 1;
uint16_t padding0 : 2;
uint16_t vtx_binding : 6;
uint16_t padding1 : 10;
};
static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct MTBUF_instruction : public Instruction {
memory_sync_info sync;
uint8_t dfmt : 4;
uint8_t nfmt : 3;
bool offen : 1;
uint16_t idxen : 1;
uint16_t glc : 1;
uint16_t dlc : 1;
uint16_t slc : 1;
uint16_t tfe : 1;
uint16_t disable_wqm : 1;
uint16_t vtx_binding : 6;
uint16_t padding : 4;
uint16_t offset;
};
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct MIMG_instruction : public Instruction {
memory_sync_info sync;
uint8_t dmask;
uint8_t dim : 3;
bool unrm : 1;
bool dlc : 1;
bool glc : 1;
bool slc : 1;
bool tfe : 1;
bool da : 1;
bool lwe : 1;
bool r128 : 1;
bool a16 : 1;
bool d16 : 1;
bool disable_wqm : 1;
uint8_t padding0 : 2;
uint8_t padding1;
uint8_t padding2;
};
static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct FLAT_instruction : public Instruction {
memory_sync_info sync;
bool slc : 1;
bool glc : 1;
bool dlc : 1;
bool lds : 1;
bool nv : 1;
bool disable_wqm : 1;
uint8_t padding0 : 2;
uint16_t offset;
uint16_t padding1;
};
static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Export_instruction : public Instruction {
uint8_t enabled_mask;
uint8_t dest;
bool compressed : 1;
bool done : 1;
bool valid_mask : 1;
uint8_t padding0 : 5;
uint8_t padding1;
};
static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Pseudo_instruction : public Instruction {
PhysReg scratch_sgpr;
bool tmp_in_scc;
uint8_t padding;
};
static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Pseudo_branch_instruction : public Instruction {
uint32_t target[2];
};
static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Pseudo_barrier_instruction : public Instruction {
memory_sync_info sync;
sync_scope exec_scope;
};
static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
enum ReduceOp : uint16_t {
iadd8, iadd16, iadd32, iadd64,
imul8, imul16, imul32, imul64,
fadd16, fadd32, fadd64,
fmul16, fmul32, fmul64,
imin8, imin16, imin32, imin64,
imax8, imax16, imax32, imax64,
umin8, umin16, umin32, umin64,
umax8, umax16, umax32, umax64,
fmin16, fmin32, fmin64,
fmax16, fmax32, fmax64,
iand8, iand16, iand32, iand64,
ior8, ior16, ior32, ior64,
ixor8, ixor16, ixor32, ixor64,
num_reduce_ops,
};
struct Pseudo_reduction_instruction : public Instruction {
ReduceOp reduce_op;
uint16_t cluster_size;
};
static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4,
"Unexpected padding");
struct instr_deleter_functor {
void operator()(void* p) { free(p); }
};
template <typename T> using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
template <typename T>
T*
create_instruction(aco_opcode opcode, Format format, uint32_t num_operands,
uint32_t num_definitions)
{
std::size_t size =
sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
char* data = (char*)calloc(1, size);
T* inst = (T*)data;
inst->opcode = opcode;
inst->format = format;
uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
inst->operands = aco::span<Operand>(operands_offset, num_operands);
uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
return inst;
}
constexpr bool
Instruction::usesModifiers() const noexcept
{
if (isDPP() || isSDWA())
return true;
if (isVOP3P()) {
const VOP3P_instruction& vop3p = this->vop3p();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3p.neg_lo[i] || vop3p.neg_hi[i])
return true;
if (!(vop3p.opsel_hi & (1 << i)))
return true;
}
return vop3p.opsel_lo || vop3p.clamp;
} else if (isVOP3()) {
const VOP3_instruction& vop3 = this->vop3();
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3.abs[i] || vop3.neg[i])
return true;
}
return vop3.opsel || vop3.clamp || vop3.omod;
}
return false;
}
constexpr bool
is_phi(Instruction* instr)
{
return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
}
static inline bool
is_phi(aco_ptr<Instruction>& instr)
{
return is_phi(instr.get());
}
memory_sync_info get_sync_info(const Instruction* instr);
bool is_dead(const std::vector<uint16_t>& uses, Instruction* instr);
bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr, bool pre_ra);
aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& instr);
bool needs_exec_mask(const Instruction* instr);
uint32_t get_reduction_identity(ReduceOp op, unsigned idx);
unsigned get_mimg_nsa_dwords(const Instruction* instr);
bool should_form_clause(const Instruction* a, const Instruction* b);
enum block_kind {
block_kind_uniform = 1 << 0,
block_kind_top_level = 1 << 1,
block_kind_loop_preheader = 1 << 2,
block_kind_loop_header = 1 << 3,
block_kind_loop_exit = 1 << 4,
block_kind_continue = 1 << 5,
block_kind_break = 1 << 6,
block_kind_continue_or_break = 1 << 7,
block_kind_discard = 1 << 8,
block_kind_branch = 1 << 9,
block_kind_merge = 1 << 10,
block_kind_invert = 1 << 11,
block_kind_uses_discard_if = 1 << 12,
block_kind_needs_lowering = 1 << 13,
block_kind_uses_demote = 1 << 14,
block_kind_export_end = 1 << 15,
};
struct RegisterDemand {
constexpr RegisterDemand() = default;
constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept : vgpr{v}, sgpr{s} {}
int16_t vgpr = 0;
int16_t sgpr = 0;
constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept
{
return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
}
constexpr bool exceeds(const RegisterDemand other) const noexcept
{
return vgpr > other.vgpr || sgpr > other.sgpr;
}
constexpr RegisterDemand operator+(const Temp t) const noexcept
{
if (t.type() == RegType::sgpr)
return RegisterDemand(vgpr, sgpr + t.size());
else
return RegisterDemand(vgpr + t.size(), sgpr);
}
constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept
{
return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
}
constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept
{
return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
}
constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept
{
vgpr += other.vgpr;
sgpr += other.sgpr;
return *this;
}
constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept
{
vgpr -= other.vgpr;
sgpr -= other.sgpr;
return *this;
}
constexpr RegisterDemand& operator+=(const Temp t) noexcept
{
if (t.type() == RegType::sgpr)
sgpr += t.size();
else
vgpr += t.size();
return *this;
}
constexpr RegisterDemand& operator-=(const Temp t) noexcept
{
if (t.type() == RegType::sgpr)
sgpr -= t.size();
else
vgpr -= t.size();
return *this;
}
constexpr void update(const RegisterDemand other) noexcept
{
vgpr = std::max(vgpr, other.vgpr);
sgpr = std::max(sgpr, other.sgpr);
}
};
struct Block {
float_mode fp_mode;
unsigned index;
unsigned offset = 0;
std::vector<aco_ptr<Instruction>> instructions;
std::vector<unsigned> logical_preds;
std::vector<unsigned> linear_preds;
std::vector<unsigned> logical_succs;
std::vector<unsigned> linear_succs;
RegisterDemand register_demand = RegisterDemand();
uint16_t loop_nest_depth = 0;
uint16_t divergent_if_logical_depth = 0;
uint16_t uniform_if_depth = 0;
uint16_t kind = 0;
int logical_idom = -1;
int linear_idom = -1;
bool scc_live_out = false;
PhysReg scratch_sgpr = PhysReg();
Block() : index(0) {}
};
enum class SWStage : uint8_t {
None = 0,
VS = 1 << 0,
GS = 1 << 1,
TCS = 1 << 2,
TES = 1 << 3,
FS = 1 << 4,
CS = 1 << 5,
GSCopy = 1 << 6,
VS_GS = VS | GS,
VS_TCS = VS | TCS,
TES_GS = TES | GS,
};
constexpr SWStage
operator|(SWStage a, SWStage b)
{
return static_cast<SWStage>(static_cast<uint8_t>(a) | static_cast<uint8_t>(b));
}
enum class HWStage : uint8_t {
VS,
ES,
GS,
NGG,
LS,
HS,
FS,
CS,
};
struct Stage {
constexpr Stage() = default;
explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
constexpr bool has(SWStage stage) const
{
return (static_cast<uint8_t>(sw) & static_cast<uint8_t>(stage));
}
unsigned num_sw_stages() const { return util_bitcount(static_cast<uint8_t>(sw)); }
constexpr bool operator==(const Stage& other) const { return sw == other.sw && hw == other.hw; }
constexpr bool operator!=(const Stage& other) const { return sw != other.sw || hw != other.hw; }
SWStage sw = SWStage::None;
HWStage hw{};
};
static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
static constexpr Stage gs_copy_vs(HWStage::VS, SWStage::GSCopy);
static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS);
static constexpr Stage vertex_es(HWStage::ES, SWStage::VS);
static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
static constexpr Stage tess_eval_es(HWStage::ES,
SWStage::TES);
static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
enum statistic {
statistic_hash,
statistic_instructions,
statistic_copies,
statistic_branches,
statistic_latency,
statistic_inv_throughput,
statistic_vmem_clauses,
statistic_smem_clauses,
statistic_sgpr_presched,
statistic_vgpr_presched,
num_statistics
};
struct DeviceInfo {
uint16_t lds_encoding_granule;
uint16_t lds_alloc_granule;
uint32_t lds_limit;
bool has_16bank_lds;
uint16_t physical_sgprs;
uint16_t physical_vgprs;
uint16_t vgpr_limit;
uint16_t sgpr_limit;
uint16_t sgpr_alloc_granule;
uint16_t vgpr_alloc_granule;
unsigned max_wave64_per_simd;
unsigned simd_per_cu;
bool has_fast_fma32 = false;
bool xnack_enabled = false;
bool sram_ecc_enabled = false;
};
enum class CompilationProgress {
after_isel,
after_spilling,
after_ra,
};
class Program final {
public:
std::vector<Block> blocks;
std::vector<RegClass> temp_rc = {s1};
RegisterDemand max_reg_demand = RegisterDemand();
uint16_t num_waves = 0;
uint16_t max_waves = 0;
ac_shader_config* config;
struct radv_shader_info* info;
enum chip_class chip_class;
enum radeon_family family;
DeviceInfo dev;
unsigned wave_size;
RegClass lane_mask;
Stage stage;
bool needs_exact = false;
bool needs_wqm = false;
std::vector<uint8_t> constant_data;
Temp private_segment_buffer;
Temp scratch_offset;
uint16_t min_waves = 0;
unsigned workgroup_size;
bool wgp_mode;
bool early_rast = false;
bool needs_vcc = false;
bool needs_flat_scr = false;
CompilationProgress progress;
bool collect_statistics = false;
uint32_t statistics[num_statistics];
float_mode next_fp_mode;
unsigned next_loop_depth = 0;
unsigned next_divergent_if_logical_depth = 0;
unsigned next_uniform_if_depth = 0;
struct {
FILE* output = stderr;
bool shorten_messages = false;
void (*func)(void* private_data, enum radv_compiler_debug_level level, const char* message);
void* private_data;
} debug;
uint32_t allocateId(RegClass rc)
{
assert(allocationID <= 16777215);
temp_rc.push_back(rc);
return allocationID++;
}
void allocateRange(unsigned amount)
{
assert(allocationID + amount <= 16777216);
temp_rc.resize(temp_rc.size() + amount);
allocationID += amount;
}
Temp allocateTmp(RegClass rc) { return Temp(allocateId(rc), rc); }
uint32_t peekAllocationId() { return allocationID; }
friend void reindex_ssa(Program* program);
friend void reindex_ssa(Program* program, std::vector<IDSet>& live_out);
Block* create_and_insert_block()
{
Block block;
return insert_block(std::move(block));
}
Block* insert_block(Block&& block)
{
block.index = blocks.size();
block.fp_mode = next_fp_mode;
block.loop_nest_depth = next_loop_depth;
block.divergent_if_logical_depth = next_divergent_if_logical_depth;
block.uniform_if_depth = next_uniform_if_depth;
blocks.emplace_back(std::move(block));
return &blocks.back();
}
private:
uint32_t allocationID = 1;
};
struct live {
std::vector<IDSet> live_out;
std::vector<std::vector<RegisterDemand>> register_demand;
};
struct ra_test_policy {
bool skip_optimistic_path = false;
};
void init();
void init_program(Program* program, Stage stage, struct radv_shader_info* info,
enum chip_class chip_class, enum radeon_family family, bool wgp_mode,
ac_shader_config* config);
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, struct radv_shader_args* args);
void select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
struct radv_shader_args* args);
void select_trap_handler_shader(Program* program, struct nir_shader* shader,
ac_shader_config* config, struct radv_shader_args* args);
void lower_phis(Program* program);
void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
live live_var_analysis(Program* program);
std::vector<uint16_t> dead_code_analysis(Program* program);
void dominator_tree(Program* program);
void insert_exec_mask(Program* program);
void value_numbering(Program* program);
void optimize(Program* program);
void optimize_postRA(Program* program);
void setup_reduce_temp(Program* program);
void lower_to_cssa(Program* program, live& live_vars);
void register_allocation(Program* program, std::vector<IDSet>& live_out_per_block,
ra_test_policy = {});
void ssa_elimination(Program* program);
void lower_to_hw_instr(Program* program);
void schedule_program(Program* program, live& live_vars);
void spill(Program* program, live& live_vars);
void insert_wait_states(Program* program);
void insert_NOPs(Program* program);
void form_hard_clauses(Program* program);
unsigned emit_program(Program* program, std::vector<uint32_t>& code);
bool print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output);
bool validate_ir(Program* program);
bool validate_ra(Program* program);
#ifndef NDEBUG
void perfwarn(Program* program, bool cond, const char* msg, Instruction* instr = NULL);
#else
#define perfwarn(program, cond, msg, ...) \
do { \
} while (0)
#endif
void collect_presched_stats(Program* program);
void collect_preasm_stats(Program* program);
void collect_postasm_stats(Program* program, const std::vector<uint32_t>& code);
enum print_flags {
print_no_ssa = 0x1,
print_perf_info = 0x2,
print_kill = 0x4,
print_live_vars = 0x8,
};
void aco_print_operand(const Operand* operand, FILE* output, unsigned flags = 0);
void aco_print_instr(const Instruction* instr, FILE* output, unsigned flags = 0);
void aco_print_program(const Program* program, FILE* output, unsigned flags = 0);
void aco_print_program(const Program* program, FILE* output, const live& live_vars,
unsigned flags = 0);
void _aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...);
void _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...);
#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
#define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr,
aco_ptr<Instruction>& instr_before);
uint16_t get_extra_sgprs(Program* program);
uint16_t get_sgpr_alloc(Program* program, uint16_t addressable_sgprs);
uint16_t get_vgpr_alloc(Program* program, uint16_t addressable_vgprs);
uint16_t get_addr_sgpr_from_waves(Program* program, uint16_t max_waves);
uint16_t get_addr_vgpr_from_waves(Program* program, uint16_t max_waves);
typedef struct {
const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
const char* name[static_cast<int>(aco_opcode::num_opcodes)];
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
const unsigned operand_size[static_cast<int>(aco_opcode::num_opcodes)];
const unsigned definition_size[static_cast<int>(aco_opcode::num_opcodes)];
const instr_class classes[static_cast<int>(aco_opcode::num_opcodes)];
} Info;
extern const Info instr_info;
}
#endif