Path: blob/21.2-virgl/src/panfrost/midgard/midgard_address.c
4564 views
/*1* Copyright (C) 2019 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors (Collabora):23* Alyssa Rosenzweig <[email protected]>24*/2526#include "compiler.h"2728/* Midgard's generic load/store instructions, particularly to implement SSBOs29* and globals, have support for address arithmetic natively. In particularly,30* they take two indirect arguments A, B and two immediates #s, #c, calculating31* the address:32*33* A + (zext?(B) << #s) + #c34*35* This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.36*/3738struct mir_address {39nir_ssa_scalar A;40nir_ssa_scalar B;4142midgard_index_address_format type;43unsigned shift;44unsigned bias;45};4647static bool48mir_args_ssa(nir_ssa_scalar s, unsigned count)49{50nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);5152if (count > nir_op_infos[alu->op].num_inputs)53return false;5455for (unsigned i = 0; i < count; ++i) {56if (!alu->src[i].src.is_ssa)57return false;58}5960return true;61}6263/* Matches a constant in either slot and moves it to the bias */6465static void66mir_match_constant(struct mir_address *address)67{68if (address->A.def && nir_ssa_scalar_is_const(address->A)) {69address->bias += nir_ssa_scalar_as_uint(address->A);70address->A.def = NULL;71}7273if (address->B.def && nir_ssa_scalar_is_const(address->B)) {74address->bias += nir_ssa_scalar_as_uint(address->B);75address->B.def = NULL;76}77}7879/* Matches an iadd when there is a free slot or constant */8081/* The offset field is a 18-bit signed integer */82#define MAX_POSITIVE_OFFSET ((1 << 17) - 1)8384static void85mir_match_iadd(struct mir_address *address, bool first_free)86{87if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))88return;8990if (!mir_args_ssa(address->B, 2))91return;9293nir_op op = nir_ssa_scalar_alu_op(address->B);9495if (op != nir_op_iadd) return;9697nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);98nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);99100if (nir_ssa_scalar_is_const(op1) &&101nir_ssa_scalar_as_uint(op1) <= MAX_POSITIVE_OFFSET) {102address->bias += nir_ssa_scalar_as_uint(op1);103address->B = op2;104} else if (nir_ssa_scalar_is_const(op2) &&105nir_ssa_scalar_as_uint(op2) <= MAX_POSITIVE_OFFSET) {106address->bias += nir_ssa_scalar_as_uint(op2);107address->B = op1;108} else if (!nir_ssa_scalar_is_const(op1) &&109!nir_ssa_scalar_is_const(op2) &&110first_free && !address->A.def) {111address->A = op1;112address->B = op2;113}114}115116/* Matches u2u64 and sets type */117118static void119mir_match_u2u64(struct mir_address *address)120{121if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))122return;123124if (!mir_args_ssa(address->B, 1))125return;126127nir_op op = nir_ssa_scalar_alu_op(address->B);128if (op != nir_op_u2u64) return;129nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);130131address->B = arg;132address->type = midgard_index_address_u32;133}134135/* Matches i2i64 and sets type */136137static void138mir_match_i2i64(struct mir_address *address)139{140if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))141return;142143if (!mir_args_ssa(address->B, 1))144return;145146nir_op op = nir_ssa_scalar_alu_op(address->B);147if (op != nir_op_i2i64) return;148nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);149150address->B = arg;151address->type = midgard_index_address_s32;152}153154/* Matches ishl to shift */155156static void157mir_match_ishl(struct mir_address *address)158{159if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))160return;161162if (!mir_args_ssa(address->B, 2))163return;164165nir_op op = nir_ssa_scalar_alu_op(address->B);166if (op != nir_op_ishl) return;167nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);168nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);169170if (!nir_ssa_scalar_is_const(op2)) return;171172unsigned shift = nir_ssa_scalar_as_uint(op2);173if (shift > 0x7) return;174175address->B = op1;176address->shift = shift;177}178179/* Strings through mov which can happen from NIR vectorization */180181static void182mir_match_mov(struct mir_address *address)183{184if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {185nir_op op = nir_ssa_scalar_alu_op(address->A);186187if (op == nir_op_mov && mir_args_ssa(address->A, 1))188address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);189}190191if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {192nir_op op = nir_ssa_scalar_alu_op(address->B);193194if (op == nir_op_mov && mir_args_ssa(address->B, 1))195address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);196}197}198199/* Tries to pattern match into mir_address */200201static struct mir_address202mir_match_offset(nir_ssa_def *offset, bool first_free, bool extend)203{204struct mir_address address = {205.B = { .def = offset },206.type = extend ? midgard_index_address_u64 : midgard_index_address_u32,207};208209mir_match_mov(&address);210mir_match_constant(&address);211mir_match_mov(&address);212mir_match_iadd(&address, first_free);213mir_match_mov(&address);214215if (extend) {216mir_match_u2u64(&address);217mir_match_i2i64(&address);218mir_match_mov(&address);219}220221mir_match_ishl(&address);222223return address;224}225226void227mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, unsigned seg)228{229for(unsigned i = 0; i < 16; ++i) {230ins->swizzle[1][i] = 0;231ins->swizzle[2][i] = 0;232}233234/* Sign extend instead of zero extend in case the address is something235* like `base + offset + 20`, where offset could be negative. */236bool force_sext = (nir_src_bit_size(*offset) < 64);237238if (!offset->is_ssa) {239ins->load_store.bitsize_toggle = true;240ins->load_store.arg_comp = seg & 0x3;241ins->load_store.arg_reg = (seg >> 2) & 0x7;242ins->src[2] = nir_src_index(ctx, offset);243ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);244245if (force_sext)246ins->load_store.index_format = midgard_index_address_s32;247else248ins->load_store.index_format = midgard_index_address_u64;249250return;251}252253bool first_free = (seg == LDST_GLOBAL);254255struct mir_address match = mir_match_offset(offset->ssa, first_free, true);256257if (match.A.def) {258ins->src[1] = nir_ssa_index(match.A.def);259ins->swizzle[1][0] = match.A.comp;260ins->src_types[1] = nir_type_uint | match.A.def->bit_size;261} else {262ins->load_store.bitsize_toggle = true;263ins->load_store.arg_comp = seg & 0x3;264ins->load_store.arg_reg = (seg >> 2) & 0x7;265}266267if (match.B.def) {268ins->src[2] = nir_ssa_index(match.B.def);269ins->swizzle[2][0] = match.B.comp;270ins->src_types[2] = nir_type_uint | match.B.def->bit_size;271} else272ins->load_store.index_reg = REGISTER_LDST_ZERO;273274if (force_sext)275match.type = midgard_index_address_s32;276277ins->load_store.index_format = match.type;278279assert(match.shift <= 7);280ins->load_store.index_shift = match.shift;281282ins->constants.u32[0] = match.bias;283}284285286void287mir_set_ubo_offset(midgard_instruction *ins, nir_src *src, unsigned bias)288{289assert(src->is_ssa);290struct mir_address match = mir_match_offset(src->ssa, false, false);291292if (match.B.def) {293ins->src[2] = nir_ssa_index(match.B.def);294295for (unsigned i = 0; i < ARRAY_SIZE(ins->swizzle[2]); ++i)296ins->swizzle[2][i] = match.B.comp;297}298299ins->load_store.index_shift = match.shift;300ins->constants.u32[0] = match.bias + bias;301}302303304