Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shaderlib_nir.c
4570 views
/*1* Copyright 2018 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#define AC_SURFACE_INCLUDE_NIR25#include "ac_surface.h"26#include "si_pipe.h"2728static void *create_nir_cs(struct si_context *sctx, nir_builder *b)29{30nir_shader_gather_info(b->shader, nir_shader_get_entrypoint(b->shader));3132struct pipe_compute_state state = {0};33state.ir_type = PIPE_SHADER_IR_NIR;34state.prog = b->shader;35sctx->b.screen->finalize_nir(sctx->b.screen, (void*)state.prog, false);36return sctx->b.create_compute_state(&sctx->b, &state);37}3839static nir_ssa_def *get_global_ids(nir_builder *b, unsigned num_components)40{41unsigned mask = BITFIELD_MASK(num_components);4243nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);44nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);45nir_ssa_def *block_size = nir_channels(b, nir_load_workgroup_size(b), mask);46return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);47}4849static void unpack_2x16(nir_builder *b, nir_ssa_def *src, nir_ssa_def **x, nir_ssa_def **y)50{51*x = nir_iand(b, src, nir_imm_int(b, 0xffff));52*y = nir_ushr(b, src, nir_imm_int(b, 16));53}5455void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf)56{57const nir_shader_compiler_options *options =58sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);5960nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "dcc_retile");61b.shader->info.workgroup_size[0] = 8;62b.shader->info.workgroup_size[1] = 8;63b.shader->info.workgroup_size[2] = 1;64b.shader->info.cs.user_data_components_amd = 3;65b.shader->info.num_ssbos = 1;6667/* Get user data SGPRs. */68nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b);6970/* Relative offset from the displayable DCC to the non-displayable DCC in the same buffer. */71nir_ssa_def *src_dcc_offset = nir_channel(&b, user_sgprs, 0);7273nir_ssa_def *src_dcc_pitch, *dst_dcc_pitch, *src_dcc_height, *dst_dcc_height;74unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &src_dcc_pitch, &src_dcc_height);75unpack_2x16(&b, nir_channel(&b, user_sgprs, 2), &dst_dcc_pitch, &dst_dcc_height);7677/* Get the 2D coordinates. */78nir_ssa_def *coord = get_global_ids(&b, 2);79nir_ssa_def *zero = nir_imm_int(&b, 0);8081/* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */82coord = nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width,83surf->u.gfx9.color.dcc_block_height));8485nir_ssa_def *src_offset =86ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.dcc_equation,87src_dcc_pitch, src_dcc_height, zero, /* DCC slice size */88nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */89zero, zero, zero); /* z, sample, pipe_xor */90src_offset = nir_iadd(&b, src_offset, src_dcc_offset);91nir_ssa_def *value = nir_load_ssbo(&b, 1, 8, zero, src_offset, .align_mul=1);9293nir_ssa_def *dst_offset =94ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,95dst_dcc_pitch, dst_dcc_height, zero, /* DCC slice size */96nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */97zero, zero, zero); /* z, sample, pipe_xor */98nir_store_ssbo(&b, value, zero, dst_offset, .write_mask=0x1, .align_mul=1);99100return create_nir_cs(sctx, &b);101}102103void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex)104{105const nir_shader_compiler_options *options =106sctx->b.screen->get_compiler_options(sctx->b.screen, PIPE_SHADER_IR_NIR, PIPE_SHADER_COMPUTE);107108nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "clear_dcc_msaa");109b.shader->info.workgroup_size[0] = 8;110b.shader->info.workgroup_size[1] = 8;111b.shader->info.workgroup_size[2] = 1;112b.shader->info.cs.user_data_components_amd = 2;113b.shader->info.num_ssbos = 1;114115/* Get user data SGPRs. */116nir_ssa_def *user_sgprs = nir_load_user_data_amd(&b);117nir_ssa_def *dcc_pitch, *dcc_height, *clear_value, *pipe_xor;118unpack_2x16(&b, nir_channel(&b, user_sgprs, 0), &dcc_pitch, &dcc_height);119unpack_2x16(&b, nir_channel(&b, user_sgprs, 1), &clear_value, &pipe_xor);120clear_value = nir_u2u16(&b, clear_value);121122/* Get the 2D coordinates. */123nir_ssa_def *coord = get_global_ids(&b, 3);124nir_ssa_def *zero = nir_imm_int(&b, 0);125126/* Multiply the coordinates by the DCC block size (they are DCC block coordinates). */127coord = nir_imul(&b, coord,128nir_channels(&b, nir_imm_ivec4(&b, tex->surface.u.gfx9.color.dcc_block_width,129tex->surface.u.gfx9.color.dcc_block_height,130tex->surface.u.gfx9.color.dcc_block_depth, 0), 0x7));131132nir_ssa_def *offset =133ac_nir_dcc_addr_from_coord(&b, &sctx->screen->info, tex->surface.bpe,134&tex->surface.u.gfx9.color.dcc_equation,135dcc_pitch, dcc_height, zero, /* DCC slice size */136nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), /* x, y */137tex->buffer.b.b.array_size > 1 ? nir_channel(&b, coord, 2) : zero, /* z */138zero, pipe_xor); /* sample, pipe_xor */139140/* The trick here is that DCC elements for an even and the next odd sample are next to each other141* in memory, so we only need to compute the address for sample 0 and the next DCC byte is always142* sample 1. That's why the clear value has 2 bytes - we're clearing 2 samples at the same time.143*/144nir_store_ssbo(&b, clear_value, zero, offset, .write_mask=0x1, .align_mul=2);145146return create_nir_cs(sctx, &b);147}148149150