Path: blob/21.2-virgl/src/broadcom/compiler/v3d_nir_lower_scratch.c
4564 views
/*1* Copyright © 2018 Intel Corporation2* Copyright © 2018 Broadcom3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING20* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS21* IN THE SOFTWARE.22*/2324#include "v3d_compiler.h"25#include "compiler/nir/nir_builder.h"26#include "compiler/nir/nir_format_convert.h"2728/** @file v3d_nir_lower_scratch.c29*30* Swizzles around the addresses of31* nir_intrinsic_load_scratch/nir_intrinsic_store_scratch so that a QPU stores32* a cacheline at a time per dword of scratch access, scalarizing and removing33* writemasks in the process.34*/3536static nir_ssa_def *37v3d_nir_scratch_offset(nir_builder *b, nir_intrinsic_instr *instr)38{39bool is_store = instr->intrinsic == nir_intrinsic_store_scratch;40nir_ssa_def *offset = nir_ssa_for_src(b, instr->src[is_store ? 1 : 0], 1);4142assert(nir_intrinsic_align_mul(instr) >= 4);43assert(nir_intrinsic_align_offset(instr) == 0);4445/* The spill_offset register will already have the subgroup ID (EIDX)46* shifted and ORed in at bit 2, so all we need to do is to move the47* dword index up above V3D_CHANNELS.48*/49return nir_imul_imm(b, offset, V3D_CHANNELS);50}5152static void53v3d_nir_lower_load_scratch(nir_builder *b, nir_intrinsic_instr *instr)54{55b->cursor = nir_before_instr(&instr->instr);5657nir_ssa_def *offset = v3d_nir_scratch_offset(b,instr);5859nir_ssa_def *chans[NIR_MAX_VEC_COMPONENTS];60for (int i = 0; i < instr->num_components; i++) {61nir_ssa_def *chan_offset =62nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);6364nir_intrinsic_instr *chan_instr =65nir_intrinsic_instr_create(b->shader, instr->intrinsic);66chan_instr->num_components = 1;67nir_ssa_dest_init(&chan_instr->instr, &chan_instr->dest, 1,68instr->dest.ssa.bit_size, NULL);6970chan_instr->src[0] = nir_src_for_ssa(chan_offset);7172nir_intrinsic_set_align(chan_instr, 4, 0);7374nir_builder_instr_insert(b, &chan_instr->instr);7576chans[i] = &chan_instr->dest.ssa;77}7879nir_ssa_def *result = nir_vec(b, chans, instr->num_components);80nir_ssa_def_rewrite_uses(&instr->dest.ssa, result);81nir_instr_remove(&instr->instr);82}8384static void85v3d_nir_lower_store_scratch(nir_builder *b, nir_intrinsic_instr *instr)86{87b->cursor = nir_before_instr(&instr->instr);8889nir_ssa_def *offset = v3d_nir_scratch_offset(b, instr);90nir_ssa_def *value = nir_ssa_for_src(b, instr->src[0],91instr->num_components);9293for (int i = 0; i < instr->num_components; i++) {94if (!(nir_intrinsic_write_mask(instr) & (1 << i)))95continue;9697nir_ssa_def *chan_offset =98nir_iadd_imm(b, offset, V3D_CHANNELS * i * 4);99100nir_intrinsic_instr *chan_instr =101nir_intrinsic_instr_create(b->shader, instr->intrinsic);102chan_instr->num_components = 1;103104chan_instr->src[0] = nir_src_for_ssa(nir_channel(b,105value,106i));107chan_instr->src[1] = nir_src_for_ssa(chan_offset);108nir_intrinsic_set_write_mask(chan_instr, 0x1);109nir_intrinsic_set_align(chan_instr, 4, 0);110111nir_builder_instr_insert(b, &chan_instr->instr);112}113114nir_instr_remove(&instr->instr);115}116117void118v3d_nir_lower_scratch(nir_shader *s)119{120nir_foreach_function(function, s) {121if (!function->impl)122continue;123124nir_builder b;125nir_builder_init(&b, function->impl);126127nir_foreach_block(block, function->impl) {128nir_foreach_instr_safe(instr, block) {129if (instr->type != nir_instr_type_intrinsic)130continue;131132nir_intrinsic_instr *intr =133nir_instr_as_intrinsic(instr);134135switch (intr->intrinsic) {136case nir_intrinsic_load_scratch:137v3d_nir_lower_load_scratch(&b, intr);138break;139case nir_intrinsic_store_scratch:140v3d_nir_lower_store_scratch(&b, intr);141break;142default:143break;144}145}146}147148nir_metadata_preserve(function->impl,149nir_metadata_block_index |150nir_metadata_dominance);151}152}153154155