#include "ir3.h"
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util/bitscan.h"
#include "util/half_float.h"
#include "util/ralloc.h"
#include "util/u_math.h"
#include "instr-a3xx.h"
#include "ir3_shader.h"
void *
ir3_alloc(struct ir3 *shader, int sz)
{
return rzalloc_size(shader, sz);
}
struct ir3 *
ir3_create(struct ir3_compiler *compiler, struct ir3_shader_variant *v)
{
struct ir3 *shader = rzalloc(v, struct ir3);
shader->compiler = compiler;
shader->type = v->type;
list_inithead(&shader->block_list);
list_inithead(&shader->array_list);
return shader;
}
void
ir3_destroy(struct ir3 *shader)
{
ralloc_free(shader);
}
static void
collect_reg_info(struct ir3_instruction *instr, struct ir3_register *reg,
struct ir3_info *info)
{
struct ir3_shader_variant *v = info->data;
unsigned repeat = instr->repeat;
if (reg->flags & IR3_REG_IMMED) {
return;
}
if (!(reg->flags & IR3_REG_R)) {
repeat = 0;
}
unsigned components;
int16_t max;
if (reg->flags & IR3_REG_RELATIV) {
components = reg->size;
max = (reg->array.base + components - 1);
} else {
components = util_last_bit(reg->wrmask);
max = (reg->num + repeat + components - 1);
}
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max >> 2);
} else if (max < regid(48, 0)) {
if (reg->flags & IR3_REG_HALF) {
if (v->mergedregs) {
info->max_reg = MAX2(info->max_reg, max >> 3);
} else {
info->max_half_reg = MAX2(info->max_half_reg, max >> 2);
}
} else {
info->max_reg = MAX2(info->max_reg, max >> 2);
}
}
}
bool
ir3_should_double_threadsize(struct ir3_shader_variant *v, unsigned regs_count)
{
const struct ir3_compiler *compiler = v->shader->compiler;
if (MIN2(v->branchstack, compiler->threadsize_base * 2) >
compiler->branchstack_size) {
return false;
}
switch (v->type) {
case MESA_SHADER_COMPUTE: {
unsigned threads_per_wg =
v->local_size[0] * v->local_size[1] * v->local_size[2];
if (compiler->gpu_id < 600) {
return v->local_size_variable ||
threads_per_wg >
compiler->threadsize_base * compiler->max_waves;
}
if (!v->local_size_variable) {
if (threads_per_wg <= compiler->threadsize_base)
return false;
}
}
FALLTHROUGH;
case MESA_SHADER_FRAGMENT: {
return regs_count * 2 <= compiler->reg_size_vec4;
}
default:
return false;
}
}
unsigned
ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
bool double_threadsize)
{
const struct ir3_compiler *compiler = v->shader->compiler;
unsigned max_waves = compiler->max_waves;
if (v->type == MESA_SHADER_COMPUTE) {
unsigned shared_per_wg = ALIGN_POT(v->shared_size, 1024);
if (shared_per_wg > 0 && !v->local_size_variable) {
unsigned wgs_per_core = compiler->local_mem_size / shared_per_wg;
unsigned threads_per_wg =
v->local_size[0] * v->local_size[1] * v->local_size[2];
unsigned waves_per_wg =
DIV_ROUND_UP(threads_per_wg, compiler->threadsize_base *
(double_threadsize ? 2 : 1) *
compiler->wave_granularity);
max_waves = MIN2(max_waves, waves_per_wg * wgs_per_core *
compiler->wave_granularity);
}
}
if (v->branchstack > 0) {
unsigned branchstack_max_waves = compiler->branchstack_size /
v->branchstack *
compiler->wave_granularity;
max_waves = MIN2(max_waves, branchstack_max_waves);
}
return max_waves;
}
unsigned
ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
unsigned reg_count, bool double_threadsize)
{
return reg_count ? (compiler->reg_size_vec4 /
(reg_count * (double_threadsize ? 2 : 1)) *
compiler->wave_granularity)
: compiler->max_waves;
}
void
ir3_collect_info(struct ir3_shader_variant *v)
{
struct ir3_info *info = &v->info;
struct ir3 *shader = v->ir;
const struct ir3_compiler *compiler = v->shader->compiler;
memset(info, 0, sizeof(*info));
info->data = v;
info->max_reg = -1;
info->max_half_reg = -1;
info->max_const = -1;
info->multi_dword_ldp_stp = false;
uint32_t instr_count = 0;
foreach_block (block, &shader->block_list) {
foreach_instr (instr, &block->instr_list) {
instr_count++;
}
}
v->instrlen = DIV_ROUND_UP(instr_count, compiler->instr_align);
info->size = MAX2(v->instrlen * compiler->instr_align, instr_count + 4) * 8;
info->sizedwords = info->size / 4;
foreach_block (block, &shader->block_list) {
int sfu_delay = 0;
foreach_instr (instr, &block->instr_list) {
foreach_src (reg, instr) {
collect_reg_info(instr, reg, info);
}
foreach_dst (reg, instr) {
if (is_dest_gpr(reg)) {
collect_reg_info(instr, reg, info);
}
}
if ((instr->opc == OPC_STP || instr->opc == OPC_LDP)) {
struct ir3_register *base =
(instr->opc == OPC_STP) ? instr->srcs[2] : instr->srcs[1];
if (base->iim_val * type_size(instr->cat6.type) > 32) {
info->multi_dword_ldp_stp = true;
}
}
if ((instr->opc == OPC_BARY_F) && (instr->dsts[0]->flags & IR3_REG_EI))
info->last_baryf = info->instrs_count;
unsigned instrs_count = 1 + instr->repeat + instr->nop;
unsigned nops_count = instr->nop;
if (instr->opc == OPC_NOP) {
nops_count = 1 + instr->repeat;
info->instrs_per_cat[0] += nops_count;
} else {
info->instrs_per_cat[opc_cat(instr->opc)] += 1 + instr->repeat;
info->instrs_per_cat[0] += nops_count;
}
if (instr->opc == OPC_MOV) {
if (instr->cat1.src_type == instr->cat1.dst_type) {
info->mov_count += 1 + instr->repeat;
} else {
info->cov_count += 1 + instr->repeat;
}
}
info->instrs_count += instrs_count;
info->nops_count += nops_count;
if (instr->flags & IR3_INSTR_SS) {
info->ss++;
info->sstall += sfu_delay;
sfu_delay = 0;
}
if (instr->flags & IR3_INSTR_SY)
info->sy++;
if (is_sfu(instr)) {
sfu_delay = 10;
} else {
int n = MIN2(sfu_delay, 1 + instr->repeat + instr->nop);
sfu_delay -= n;
}
}
}
unsigned regs_count =
info->max_reg + 1 +
(compiler->gpu_id >= 600 ? ((info->max_half_reg + 2) / 2) : 0);
info->double_threadsize = ir3_should_double_threadsize(v, regs_count);
unsigned reg_independent_max_waves =
ir3_get_reg_independent_max_waves(v, info->double_threadsize);
unsigned reg_dependent_max_waves = ir3_get_reg_dependent_max_waves(
compiler, regs_count, info->double_threadsize);
info->max_waves = MIN2(reg_independent_max_waves, reg_dependent_max_waves);
assert(info->max_waves <= v->shader->compiler->max_waves);
}
static struct ir3_register *
reg_create(struct ir3 *shader, int num, int flags)
{
struct ir3_register *reg = ir3_alloc(shader, sizeof(struct ir3_register));
reg->wrmask = 1;
reg->flags = flags;
reg->num = num;
return reg;
}
static void
insert_instr(struct ir3_block *block, struct ir3_instruction *instr)
{
struct ir3 *shader = block->shader;
instr->serialno = ++shader->instr_count;
list_addtail(&instr->node, &block->instr_list);
if (is_input(instr))
array_insert(shader, shader->baryfs, instr);
}
struct ir3_block *
ir3_block_create(struct ir3 *shader)
{
struct ir3_block *block = ir3_alloc(shader, sizeof(*block));
#ifdef DEBUG
block->serialno = ++shader->block_count;
#endif
block->shader = shader;
list_inithead(&block->node);
list_inithead(&block->instr_list);
return block;
}
void
ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred)
{
array_insert(block, block->predecessors, pred);
}
void
ir3_block_add_physical_predecessor(struct ir3_block *block,
struct ir3_block *pred)
{
array_insert(block, block->physical_predecessors, pred);
}
void
ir3_block_remove_predecessor(struct ir3_block *block, struct ir3_block *pred)
{
for (unsigned i = 0; i < block->predecessors_count; i++) {
if (block->predecessors[i] == pred) {
if (i < block->predecessors_count - 1) {
block->predecessors[i] =
block->predecessors[block->predecessors_count - 1];
}
block->predecessors_count--;
return;
}
}
}
unsigned
ir3_block_get_pred_index(struct ir3_block *block, struct ir3_block *pred)
{
for (unsigned i = 0; i < block->predecessors_count; i++) {
if (block->predecessors[i] == pred) {
return i;
}
}
unreachable("ir3_block_get_pred_index() invalid predecessor");
}
static struct ir3_instruction *
instr_create(struct ir3_block *block, opc_t opc, int ndst, int nsrc)
{
if (1 <= opc_cat(opc))
nsrc += 2;
struct ir3_instruction *instr;
unsigned sz = sizeof(*instr) + (ndst * sizeof(instr->dsts[0])) +
(nsrc * sizeof(instr->srcs[0]));
char *ptr = ir3_alloc(block->shader, sz);
instr = (struct ir3_instruction *)ptr;
ptr += sizeof(*instr);
instr->dsts = (struct ir3_register **)ptr;
instr->srcs = instr->dsts + ndst;
#ifdef DEBUG
instr->dsts_max = ndst;
instr->srcs_max = nsrc;
#endif
return instr;
}
struct ir3_instruction *
ir3_instr_create(struct ir3_block *block, opc_t opc, int ndst, int nsrc)
{
struct ir3_instruction *instr = instr_create(block, opc, ndst, nsrc);
instr->block = block;
instr->opc = opc;
insert_instr(block, instr);
return instr;
}
struct ir3_instruction *
ir3_instr_clone(struct ir3_instruction *instr)
{
struct ir3_instruction *new_instr = instr_create(
instr->block, instr->opc, instr->dsts_count, instr->srcs_count);
struct ir3_register **dsts, **srcs;
dsts = new_instr->dsts;
srcs = new_instr->srcs;
*new_instr = *instr;
new_instr->dsts = dsts;
new_instr->srcs = srcs;
insert_instr(instr->block, new_instr);
new_instr->dsts_count = 0;
new_instr->srcs_count = 0;
foreach_dst (reg, instr) {
struct ir3_register *new_reg =
ir3_dst_create(new_instr, reg->num, reg->flags);
*new_reg = *reg;
if (new_reg->instr)
new_reg->instr = new_instr;
}
foreach_src (reg, instr) {
struct ir3_register *new_reg =
ir3_src_create(new_instr, reg->num, reg->flags);
*new_reg = *reg;
}
return new_instr;
}
void
ir3_instr_add_dep(struct ir3_instruction *instr, struct ir3_instruction *dep)
{
for (unsigned i = 0; i < instr->deps_count; i++) {
if (instr->deps[i] == dep)
return;
}
array_insert(instr, instr->deps, dep);
}
struct ir3_register *
ir3_src_create(struct ir3_instruction *instr, int num, int flags)
{
struct ir3 *shader = instr->block->shader;
#ifdef DEBUG
debug_assert(instr->srcs_count < instr->srcs_max);
#endif
struct ir3_register *reg = reg_create(shader, num, flags);
instr->srcs[instr->srcs_count++] = reg;
return reg;
}
struct ir3_register *
ir3_dst_create(struct ir3_instruction *instr, int num, int flags)
{
struct ir3 *shader = instr->block->shader;
#ifdef DEBUG
debug_assert(instr->dsts_count < instr->dsts_max);
#endif
struct ir3_register *reg = reg_create(shader, num, flags);
instr->dsts[instr->dsts_count++] = reg;
return reg;
}
struct ir3_register *
ir3_reg_clone(struct ir3 *shader, struct ir3_register *reg)
{
struct ir3_register *new_reg = reg_create(shader, 0, 0);
*new_reg = *reg;
return new_reg;
}
void
ir3_reg_set_last_array(struct ir3_instruction *instr, struct ir3_register *reg,
struct ir3_register *last_write)
{
assert(reg->flags & IR3_REG_ARRAY);
struct ir3_register *new_reg = ir3_src_create(instr, 0, 0);
*new_reg = *reg;
new_reg->def = last_write;
ir3_reg_tie(reg, new_reg);
}
void
ir3_instr_set_address(struct ir3_instruction *instr,
struct ir3_instruction *addr)
{
if (!instr->address) {
struct ir3 *ir = instr->block->shader;
debug_assert(instr->block == addr->block);
instr->address =
ir3_src_create(instr, addr->dsts[0]->num, addr->dsts[0]->flags);
instr->address->def = addr->dsts[0];
debug_assert(reg_num(addr->dsts[0]) == REG_A0);
unsigned comp = reg_comp(addr->dsts[0]);
if (comp == 0) {
array_insert(ir, ir->a0_users, instr);
} else {
debug_assert(comp == 1);
array_insert(ir, ir->a1_users, instr);
}
} else {
debug_assert(instr->address->def->instr == addr);
}
}
void
ir3_block_clear_mark(struct ir3_block *block)
{
foreach_instr (instr, &block->instr_list)
instr->flags &= ~IR3_INSTR_MARK;
}
void
ir3_clear_mark(struct ir3 *ir)
{
foreach_block (block, &ir->block_list) {
ir3_block_clear_mark(block);
}
}
unsigned
ir3_count_instructions(struct ir3 *ir)
{
unsigned cnt = 1;
foreach_block (block, &ir->block_list) {
block->start_ip = cnt;
foreach_instr (instr, &block->instr_list) {
instr->ip = cnt++;
}
block->end_ip = cnt;
}
return cnt;
}
unsigned
ir3_count_instructions_ra(struct ir3 *ir)
{
unsigned cnt = 1;
foreach_block (block, &ir->block_list) {
block->start_ip = cnt++;
foreach_instr (instr, &block->instr_list) {
instr->ip = cnt++;
}
block->end_ip = cnt++;
}
return cnt;
}
struct ir3_array *
ir3_lookup_array(struct ir3 *ir, unsigned id)
{
foreach_array (arr, &ir->array_list)
if (arr->id == id)
return arr;
return NULL;
}
void
ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps)
{
foreach_block (block, &ir->block_list)
foreach_instr (instr, &block->instr_list)
instr->uses = NULL;
foreach_block (block, &ir->block_list) {
foreach_instr (instr, &block->instr_list) {
foreach_ssa_src_n (src, n, instr) {
if (__is_false_dep(instr, n) && !falsedeps)
continue;
if (!src->uses)
src->uses = _mesa_pointer_set_create(mem_ctx);
_mesa_set_add(src->uses, instr);
}
}
}
}
void
ir3_set_dst_type(struct ir3_instruction *instr, bool half)
{
if (half) {
instr->dsts[0]->flags |= IR3_REG_HALF;
} else {
instr->dsts[0]->flags &= ~IR3_REG_HALF;
}
switch (opc_cat(instr->opc)) {
case 1:
if (half) {
instr->cat1.dst_type = half_type(instr->cat1.dst_type);
} else {
instr->cat1.dst_type = full_type(instr->cat1.dst_type);
}
break;
case 4:
if (half) {
instr->opc = cat4_half_opc(instr->opc);
} else {
instr->opc = cat4_full_opc(instr->opc);
}
break;
case 5:
if (half) {
instr->cat5.type = half_type(instr->cat5.type);
} else {
instr->cat5.type = full_type(instr->cat5.type);
}
break;
}
}
void
ir3_fixup_src_type(struct ir3_instruction *instr)
{
switch (opc_cat(instr->opc)) {
case 1:
if (instr->srcs[0]->flags & IR3_REG_HALF) {
instr->cat1.src_type = half_type(instr->cat1.src_type);
} else {
instr->cat1.src_type = full_type(instr->cat1.src_type);
}
break;
case 3:
if (instr->srcs[0]->flags & IR3_REG_HALF) {
instr->opc = cat3_half_opc(instr->opc);
} else {
instr->opc = cat3_full_opc(instr->opc);
}
break;
}
}
int
ir3_flut(struct ir3_register *src_reg)
{
static const struct {
uint32_t f32;
uint16_t f16;
} flut[] = {
{ .f32 = 0x00000000, .f16 = 0x0000 },
{ .f32 = 0x3f000000, .f16 = 0x3800 },
{ .f32 = 0x3f800000, .f16 = 0x3c00 },
{ .f32 = 0x40000000, .f16 = 0x4000 },
{ .f32 = 0x402df854, .f16 = 0x4170 },
{ .f32 = 0x40490fdb, .f16 = 0x4248 },
{ .f32 = 0x3ea2f983, .f16 = 0x3518 },
{ .f32 = 0x3f317218, .f16 = 0x398c },
{ .f32 = 0x3fb8aa3b, .f16 = 0x3dc5 },
{ .f32 = 0x3e9a209b, .f16 = 0x34d1 },
{ .f32 = 0x40549a78, .f16 = 0x42a5 },
{ .f32 = 0x40800000, .f16 = 0x4400 },
};
if (src_reg->flags & IR3_REG_HALF) {
uint32_t imm = src_reg->uim_val;
for (unsigned i = 0; i < ARRAY_SIZE(flut); i++) {
if (flut[i].f16 == imm) {
return i;
}
}
} else {
uint32_t imm = src_reg->uim_val;
for (unsigned i = 0; i < ARRAY_SIZE(flut); i++) {
if (flut[i].f32 == imm) {
return i;
}
}
}
return -1;
}
static unsigned
cp_flags(unsigned flags)
{
flags &= (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_FNEG | IR3_REG_FABS |
IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT | IR3_REG_RELATIV |
IR3_REG_SHARED);
return flags;
}
bool
ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags)
{
struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
if ((flags & IR3_REG_SHARED) && opc_cat(instr->opc) > 3)
return false;
flags = cp_flags(flags);
if (instr->dsts_count > 0 && (instr->dsts[0]->flags & IR3_REG_RELATIV) &&
(flags & IR3_REG_RELATIV))
return false;
if (flags & IR3_REG_RELATIV) {
if (compiler->gpu_id < 600)
return false;
if (instr->srcs[n]->flags & IR3_REG_SSA) {
struct ir3_instruction *src = ssa(instr->srcs[n]);
if (src->address->def->instr->block != instr->block)
return false;
}
}
if (is_meta(instr)) {
if (flags & ~(IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_SHARED))
return false;
if ((flags & IR3_REG_SHARED) && !(instr->dsts[0]->flags & IR3_REG_SHARED))
return false;
return true;
}
switch (opc_cat(instr->opc)) {
case 0:
return flags == 0;
case 1:
switch (instr->opc) {
case OPC_MOVMSK:
case OPC_SWZ:
case OPC_SCT:
case OPC_GAT:
valid_flags = IR3_REG_SHARED;
break;
default:
valid_flags =
IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV | IR3_REG_SHARED;
}
if (flags & ~valid_flags)
return false;
break;
case 2:
valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST |
IR3_REG_RELATIV | IR3_REG_IMMED | IR3_REG_SHARED;
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SHARED)) {
unsigned m = n ^ 1;
if (m < instr->srcs_count) {
struct ir3_register *reg = instr->srcs[m];
if ((flags & (IR3_REG_CONST | IR3_REG_SHARED)) &&
(reg->flags & (IR3_REG_CONST | IR3_REG_SHARED)))
return false;
if ((flags & IR3_REG_IMMED) && reg->flags & (IR3_REG_IMMED))
return false;
}
}
break;
case 3:
valid_flags =
ir3_cat3_absneg(instr->opc) | IR3_REG_RELATIV | IR3_REG_SHARED;
if (instr->opc == OPC_SHLG_B16) {
valid_flags |= IR3_REG_IMMED;
if (flags & IR3_REG_RELATIV)
valid_flags |= IR3_REG_CONST;
} else {
valid_flags |= IR3_REG_CONST;
}
if (flags & ~valid_flags)
return false;
if (flags & (IR3_REG_CONST | IR3_REG_SHARED | IR3_REG_RELATIV)) {
if (n == 1)
return false;
}
break;
case 4:
if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
return false;
if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
return false;
break;
case 5:
if (flags)
return false;
break;
case 6:
valid_flags = IR3_REG_IMMED;
if (flags & ~valid_flags)
return false;
if (flags & IR3_REG_IMMED) {
if (is_store(instr) && (instr->opc != OPC_STG) && (n == 1))
return false;
if ((instr->opc == OPC_LDL) && (n == 0))
return false;
if ((instr->opc == OPC_STL) && (n != 2))
return false;
if ((instr->opc == OPC_LDP) && (n == 0))
return false;
if ((instr->opc == OPC_STP) && (n != 2))
return false;
if (instr->opc == OPC_STLW && n == 0)
return false;
if (instr->opc == OPC_LDLW && n == 0)
return false;
if (is_atomic(instr->opc) && (n != 0))
return false;
if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
return false;
if (instr->opc == OPC_STG && (n == 2))
return false;
if (instr->opc == OPC_STG_A && (n == 4))
return false;
switch (instr->opc) {
case OPC_LDIB:
case OPC_STIB:
case OPC_LDC:
case OPC_RESINFO:
if (n != 0)
return false;
break;
default:
break;
}
}
break;
}
return true;
}