Path: blob/21.2-virgl/src/panfrost/bifrost/bi_opt_push_ubo.c
4564 views
/*1* Copyright (C) 2021 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "compiler.h"24#include "bi_builder.h"2526/* This optimization pass, intended to run once after code emission but before27* copy propagation, analyzes direct word-aligned UBO reads and promotes a28* subset to moves from FAU. It is the sole populator of the UBO push data29* structure returned back to the command stream. */3031static bool32bi_is_ubo(bi_instr *ins)33{34return (bi_opcode_props[ins->op].message == BIFROST_MESSAGE_LOAD) &&35(ins->seg == BI_SEG_UBO);36}3738static bool39bi_is_direct_aligned_ubo(bi_instr *ins)40{41return bi_is_ubo(ins) &&42(ins->src[0].type == BI_INDEX_CONSTANT) &&43(ins->src[1].type == BI_INDEX_CONSTANT) &&44((ins->src[0].value & 0x3) == 0);45}4647/* Represents use data for a single UBO */4849#define MAX_UBO_WORDS (65536 / 16)5051struct bi_ubo_block {52BITSET_DECLARE(pushed, MAX_UBO_WORDS);53uint8_t range[MAX_UBO_WORDS];54};5556struct bi_ubo_analysis {57/* Per block analysis */58unsigned nr_blocks;59struct bi_ubo_block *blocks;60};6162static struct bi_ubo_analysis63bi_analyze_ranges(bi_context *ctx)64{65struct bi_ubo_analysis res = {66.nr_blocks = ctx->nir->info.num_ubos + 1,67};6869res.blocks = calloc(res.nr_blocks, sizeof(struct bi_ubo_block));7071bi_foreach_instr_global(ctx, ins) {72if (!bi_is_direct_aligned_ubo(ins)) continue;7374unsigned ubo = ins->src[1].value;75unsigned word = ins->src[0].value / 4;76unsigned channels = bi_opcode_props[ins->op].sr_count;7778assert(ubo < res.nr_blocks);79assert(channels > 0 && channels <= 4);8081if (word < MAX_UBO_WORDS)82res.blocks[ubo].range[word] = channels;83}8485return res;86}8788/* Select UBO words to push. A sophisticated implementation would consider the89* number of uses and perhaps the control flow to estimate benefit. This is not90* sophisticated. Select from the last UBO first to prioritize sysvals. */9192static void93bi_pick_ubo(struct panfrost_ubo_push *push, struct bi_ubo_analysis *analysis)94{95for (signed ubo = analysis->nr_blocks - 1; ubo >= 0; --ubo) {96struct bi_ubo_block *block = &analysis->blocks[ubo];9798for (unsigned r = 0; r < MAX_UBO_WORDS; ++r) {99unsigned range = block->range[r];100101/* Don't push something we don't access */102if (range == 0) continue;103104/* Don't push more than possible */105if (push->count > PAN_MAX_PUSH - range)106return;107108for (unsigned offs = 0; offs < range; ++offs) {109struct panfrost_ubo_word word = {110.ubo = ubo,111.offset = (r + offs) * 4112};113114push->words[push->count++] = word;115}116117/* Mark it as pushed so we can rewrite */118BITSET_SET(block->pushed, r);119}120}121}122123void124bi_opt_push_ubo(bi_context *ctx)125{126if (ctx->inputs->no_ubo_to_push) {127/* If nothing is pushed, all UBOs need to be uploaded */128ctx->ubo_mask = ~0;129return;130}131132/* This pass only runs once */133assert(ctx->info->push.count == 0);134135struct bi_ubo_analysis analysis = bi_analyze_ranges(ctx);136bi_pick_ubo(&ctx->info->push, &analysis);137138ctx->ubo_mask = 0;139140bi_foreach_instr_global_safe(ctx, ins) {141if (!bi_is_ubo(ins)) continue;142143unsigned ubo = ins->src[1].value;144unsigned offset = ins->src[0].value;145146if (!bi_is_direct_aligned_ubo(ins)) {147/* The load can't be pushed, so this UBO needs to be148* uploaded conventionally */149if (ins->src[1].type == BI_INDEX_CONSTANT)150ctx->ubo_mask |= BITSET_BIT(ubo);151else152ctx->ubo_mask = ~0;153154continue;155}156157/* Check if we decided to push this */158assert(ubo < analysis.nr_blocks);159if (!BITSET_TEST(analysis.blocks[ubo].pushed, offset / 4)) {160ctx->ubo_mask |= BITSET_BIT(ubo);161continue;162}163164/* Replace the UBO load with moves from FAU */165bi_builder b = bi_init_builder(ctx, bi_after_instr(ins));166167unsigned channels = bi_opcode_props[ins->op].sr_count;168169for (unsigned w = 0; w < channels; ++w) {170/* FAU is grouped in pairs (2 x 4-byte) */171unsigned base =172pan_lookup_pushed_ubo(&ctx->info->push, ubo,173(offset + 4 * w));174175unsigned fau_idx = (base >> 1);176unsigned fau_hi = (base & 1);177178bi_mov_i32_to(&b,179bi_word(ins->dest[0], w),180bi_fau(BIR_FAU_UNIFORM | fau_idx, fau_hi));181}182183bi_remove_instruction(ins);184}185186free(analysis.blocks);187}188189190