Path: blob/21.2-virgl/src/intel/vulkan/anv_nir_compute_push_layout.c
4547 views
/*1* Copyright © 2019 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "anv_nir.h"24#include "nir_builder.h"25#include "compiler/brw_nir.h"26#include "util/mesa-sha1.h"2728#define sizeof_field(type, field) sizeof(((type *)0)->field)2930void31anv_nir_compute_push_layout(const struct anv_physical_device *pdevice,32bool robust_buffer_access,33nir_shader *nir,34struct brw_stage_prog_data *prog_data,35struct anv_pipeline_bind_map *map,36void *mem_ctx)37{38const struct brw_compiler *compiler = pdevice->compiler;39const struct intel_device_info *devinfo = compiler->devinfo;40memset(map->push_ranges, 0, sizeof(map->push_ranges));4142bool has_const_ubo = false;43unsigned push_start = UINT_MAX, push_end = 0;44nir_foreach_function(function, nir) {45if (!function->impl)46continue;4748nir_foreach_block(block, function->impl) {49nir_foreach_instr(instr, block) {50if (instr->type != nir_instr_type_intrinsic)51continue;5253nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);54switch (intrin->intrinsic) {55case nir_intrinsic_load_ubo:56if (nir_src_is_const(intrin->src[0]) &&57nir_src_is_const(intrin->src[1]))58has_const_ubo = true;59break;6061case nir_intrinsic_load_push_constant: {62unsigned base = nir_intrinsic_base(intrin);63unsigned range = nir_intrinsic_range(intrin);64push_start = MIN2(push_start, base);65push_end = MAX2(push_end, base + range);66break;67}6869case nir_intrinsic_load_desc_set_address_intel:70push_start = MIN2(push_start,71offsetof(struct anv_push_constants, desc_sets));72push_end = MAX2(push_end, push_start +73sizeof_field(struct anv_push_constants, desc_sets));74break;7576default:77break;78}79}80}81}8283const bool has_push_intrinsic = push_start <= push_end;8485const bool push_ubo_ranges =86pdevice->info.verx10 >= 75 &&87has_const_ubo && nir->info.stage != MESA_SHADER_COMPUTE &&88!brw_shader_stage_is_bindless(nir->info.stage);8990if (push_ubo_ranges && robust_buffer_access) {91/* We can't on-the-fly adjust our push ranges because doing so would92* mess up the layout in the shader. When robustBufferAccess is93* enabled, we push a mask into the shader indicating which pushed94* registers are valid and we zero out the invalid ones at the top of95* the shader.96*/97const uint32_t push_reg_mask_start =98offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);99const uint32_t push_reg_mask_end = push_reg_mask_start + sizeof(uint64_t);100push_start = MIN2(push_start, push_reg_mask_start);101push_end = MAX2(push_end, push_reg_mask_end);102}103104if (nir->info.stage == MESA_SHADER_COMPUTE && devinfo->verx10 < 125) {105/* For compute shaders, we always have to have the subgroup ID. The106* back-end compiler will "helpfully" add it for us in the last push107* constant slot. Yes, there is an off-by-one error here but that's108* because the back-end will add it so we want to claim the number of109* push constants one dword less than the full amount including110* gl_SubgroupId.111*/112assert(push_end <= offsetof(struct anv_push_constants, cs.subgroup_id));113push_end = offsetof(struct anv_push_constants, cs.subgroup_id);114}115116/* Align push_start down to a 32B boundary and make it no larger than117* push_end (no push constants is indicated by push_start = UINT_MAX).118*/119push_start = MIN2(push_start, push_end);120push_start = align_down_u32(push_start, 32);121122/* For vec4 our push data size needs to be aligned to a vec4 and for123* scalar, it needs to be aligned to a DWORD.124*/125const unsigned align = compiler->scalar_stage[nir->info.stage] ? 4 : 16;126nir->num_uniforms = ALIGN(push_end - push_start, align);127prog_data->nr_params = nir->num_uniforms / 4;128prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params);129130struct anv_push_range push_constant_range = {131.set = ANV_DESCRIPTOR_SET_PUSH_CONSTANTS,132.start = push_start / 32,133.length = DIV_ROUND_UP(push_end - push_start, 32),134};135136if (has_push_intrinsic) {137nir_foreach_function(function, nir) {138if (!function->impl)139continue;140141nir_builder build, *b = &build;142nir_builder_init(b, function->impl);143144nir_foreach_block(block, function->impl) {145nir_foreach_instr_safe(instr, block) {146if (instr->type != nir_instr_type_intrinsic)147continue;148149nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);150switch (intrin->intrinsic) {151case nir_intrinsic_load_push_constant:152intrin->intrinsic = nir_intrinsic_load_uniform;153nir_intrinsic_set_base(intrin,154nir_intrinsic_base(intrin) -155push_start);156break;157158case nir_intrinsic_load_desc_set_address_intel: {159b->cursor = nir_before_instr(&intrin->instr);160nir_ssa_def *pc_load = nir_load_uniform(b, 1, 64,161nir_imul_imm(b, intrin->src[0].ssa, sizeof(uint64_t)),162.base = offsetof(struct anv_push_constants, desc_sets),163.range = sizeof_field(struct anv_push_constants, desc_sets),164.dest_type = nir_type_uint64);165nir_ssa_def_rewrite_uses(&intrin->dest.ssa, pc_load);166break;167}168169default:170break;171}172}173}174}175}176177if (push_ubo_ranges) {178brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);179180/* The vec4 back-end pushes at most 32 regs while the scalar back-end181* pushes up to 64. This is primarily because the scalar back-end has a182* massively more competent register allocator and so the risk of183* spilling due to UBO pushing isn't nearly as high.184*/185const unsigned max_push_regs =186compiler->scalar_stage[nir->info.stage] ? 64 : 32;187188unsigned total_push_regs = push_constant_range.length;189for (unsigned i = 0; i < 4; i++) {190if (total_push_regs + prog_data->ubo_ranges[i].length > max_push_regs)191prog_data->ubo_ranges[i].length = max_push_regs - total_push_regs;192total_push_regs += prog_data->ubo_ranges[i].length;193}194assert(total_push_regs <= max_push_regs);195196int n = 0;197198if (push_constant_range.length > 0)199map->push_ranges[n++] = push_constant_range;200201if (robust_buffer_access) {202const uint32_t push_reg_mask_offset =203offsetof(struct anv_push_constants, push_reg_mask[nir->info.stage]);204assert(push_reg_mask_offset >= push_start);205prog_data->push_reg_mask_param =206(push_reg_mask_offset - push_start) / 4;207}208209unsigned range_start_reg = push_constant_range.length;210211for (int i = 0; i < 4; i++) {212struct brw_ubo_range *ubo_range = &prog_data->ubo_ranges[i];213if (ubo_range->length == 0)214continue;215216if (n >= 4 || (n == 3 && compiler->constant_buffer_0_is_relative)) {217memset(ubo_range, 0, sizeof(*ubo_range));218continue;219}220221const struct anv_pipeline_binding *binding =222&map->surface_to_descriptor[ubo_range->block];223224map->push_ranges[n++] = (struct anv_push_range) {225.set = binding->set,226.index = binding->index,227.dynamic_offset_index = binding->dynamic_offset_index,228.start = ubo_range->start,229.length = ubo_range->length,230};231232/* We only bother to shader-zero pushed client UBOs */233if (binding->set < MAX_SETS && robust_buffer_access) {234prog_data->zero_push_reg |= BITFIELD64_RANGE(range_start_reg,235ubo_range->length);236}237238range_start_reg += ubo_range->length;239}240} else {241/* For Ivy Bridge, the push constants packets have a different242* rule that would require us to iterate in the other direction243* and possibly mess around with dynamic state base address.244* Don't bother; just emit regular push constants at n = 0.245*246* In the compute case, we don't have multiple push ranges so it's247* better to just provide one in push_ranges[0].248*/249map->push_ranges[0] = push_constant_range;250}251252/* Now that we're done computing the push constant portion of the253* bind map, hash it. This lets us quickly determine if the actual254* mapping has changed and not just a no-op pipeline change.255*/256_mesa_sha1_compute(map->push_ranges,257sizeof(map->push_ranges),258map->push_sha1);259}260261void262anv_nir_validate_push_layout(struct brw_stage_prog_data *prog_data,263struct anv_pipeline_bind_map *map)264{265#ifndef NDEBUG266unsigned prog_data_push_size = DIV_ROUND_UP(prog_data->nr_params, 8);267for (unsigned i = 0; i < 4; i++)268prog_data_push_size += prog_data->ubo_ranges[i].length;269270unsigned bind_map_push_size = 0;271for (unsigned i = 0; i < 4; i++)272bind_map_push_size += map->push_ranges[i].length;273274/* We could go through everything again but it should be enough to assert275* that they push the same number of registers. This should alert us if276* the back-end compiler decides to re-arrange stuff or shrink a range.277*/278assert(prog_data_push_size == bind_map_push_size);279#endif280}281282283