Path: blob/21.2-virgl/src/panfrost/lib/pan_shader.c
4560 views
/*1* Copyright (C) 2018 Alyssa Rosenzweig2* Copyright (C) 2019-2021 Collabora, Ltd.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*/2324#include "pan_device.h"25#include "pan_shader.h"2627#include "panfrost/midgard/midgard_compile.h"28#include "panfrost/bifrost/bifrost_compile.h"2930const nir_shader_compiler_options *31pan_shader_get_compiler_options(const struct panfrost_device *dev)32{33if (pan_is_bifrost(dev))34return &bifrost_nir_options;3536return &midgard_nir_options;37}3839static enum pipe_format40varying_format(nir_alu_type t, unsigned ncomps)41{42#define VARYING_FORMAT(ntype, nsz, ptype, psz) \43{ \44.type = nir_type_ ## ntype ## nsz, \45.formats = { \46PIPE_FORMAT_R ## psz ## _ ## ptype, \47PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \48PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \49PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \50} \51}5253static const struct {54nir_alu_type type;55enum pipe_format formats[4];56} conv[] = {57VARYING_FORMAT(float, 32, FLOAT, 32),58VARYING_FORMAT(int, 32, SINT, 32),59VARYING_FORMAT(uint, 32, UINT, 32),60VARYING_FORMAT(float, 16, FLOAT, 16),61VARYING_FORMAT(int, 16, SINT, 16),62VARYING_FORMAT(uint, 16, UINT, 16),63VARYING_FORMAT(int, 8, SINT, 8),64VARYING_FORMAT(uint, 8, UINT, 8),65VARYING_FORMAT(bool, 32, UINT, 32),66VARYING_FORMAT(bool, 16, UINT, 16),67VARYING_FORMAT(bool, 8, UINT, 8),68VARYING_FORMAT(bool, 1, UINT, 8),69};70#undef VARYING_FORMAT7172assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats));7374for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) {75if (conv[i].type == t)76return conv[i].formats[ncomps - 1];77}7879return PIPE_FORMAT_NONE;80}8182static void83collect_varyings(nir_shader *s, nir_variable_mode varying_mode,84struct pan_shader_varying *varyings,85unsigned *varying_count, bool is_bifrost)86{87*varying_count = 0;8889unsigned comps[MAX_VARYING] = { 0 };9091nir_foreach_variable_with_modes(var, s, varying_mode) {92unsigned loc = var->data.driver_location;93const struct glsl_type *column =94glsl_without_array_or_matrix(var->type);95unsigned chan = glsl_get_components(column);9697/* If we have a fractional location added, we need to increase the size98* so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4.99* We could do better but this is an edge case as it is, normally100* packed varyings will be aligned.101*/102chan += var->data.location_frac;103comps[loc] = MAX2(comps[loc], chan);104}105106nir_foreach_variable_with_modes(var, s, varying_mode) {107unsigned loc = var->data.driver_location;108unsigned sz = glsl_count_attribute_slots(var->type, FALSE);109const struct glsl_type *column =110glsl_without_array_or_matrix(var->type);111enum glsl_base_type base_type = glsl_get_base_type(column);112unsigned chan = comps[loc];113114nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type);115type = nir_alu_type_get_base_type(type);116117/* Can't do type conversion since GLSL IR packs in funny ways */118if (is_bifrost && var->data.interpolation == INTERP_MODE_FLAT)119type = nir_type_uint;120121/* Demote to fp16 where possible. int16 varyings are TODO as the hw122* will saturate instead of wrap which is not conformant, so we need to123* insert i2i16/u2u16 instructions before the st_vary_32i/32u to get124* the intended behaviour.125*/126if (type == nir_type_float &&127(var->data.precision == GLSL_PRECISION_MEDIUM ||128var->data.precision == GLSL_PRECISION_LOW) &&129!s->info.has_transform_feedback_varyings) {130type |= 16;131} else {132type |= 32;133}134135enum pipe_format format = varying_format(type, chan);136assert(format != PIPE_FORMAT_NONE);137138for (int c = 0; c < sz; ++c) {139varyings[loc + c].location = var->data.location + c;140varyings[loc + c].format = format;141}142143*varying_count = MAX2(*varying_count, loc + sz);144}145}146147static enum mali_bifrost_register_file_format148bifrost_blend_type_from_nir(nir_alu_type nir_type)149{150switch(nir_type) {151case 0: /* Render target not in use */152return 0;153case nir_type_float16:154return MALI_BIFROST_REGISTER_FILE_FORMAT_F16;155case nir_type_float32:156return MALI_BIFROST_REGISTER_FILE_FORMAT_F32;157case nir_type_int32:158return MALI_BIFROST_REGISTER_FILE_FORMAT_I32;159case nir_type_uint32:160return MALI_BIFROST_REGISTER_FILE_FORMAT_U32;161case nir_type_int16:162return MALI_BIFROST_REGISTER_FILE_FORMAT_I16;163case nir_type_uint16:164return MALI_BIFROST_REGISTER_FILE_FORMAT_U16;165default:166unreachable("Unsupported blend shader type for NIR alu type");167return 0;168}169}170171void172pan_shader_compile(const struct panfrost_device *dev,173nir_shader *s,174const struct panfrost_compile_inputs *inputs,175struct util_dynarray *binary,176struct pan_shader_info *info)177{178memset(info, 0, sizeof(*info));179180if (pan_is_bifrost(dev))181bifrost_compile_shader_nir(s, inputs, binary, info);182else183midgard_compile_shader_nir(s, inputs, binary, info);184185info->stage = s->info.stage;186info->contains_barrier = s->info.uses_memory_barrier ||187s->info.uses_control_barrier;188info->separable = s->info.separate_shader;189190switch (info->stage) {191case MESA_SHADER_VERTEX:192info->attribute_count = util_bitcount64(s->info.inputs_read);193194bool vertex_id = BITSET_TEST(s->info.system_values_read,195SYSTEM_VALUE_VERTEX_ID_ZERO_BASE);196if (vertex_id && !pan_is_bifrost(dev))197info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1);198199bool instance_id = BITSET_TEST(s->info.system_values_read,200SYSTEM_VALUE_INSTANCE_ID);201if (instance_id && !pan_is_bifrost(dev))202info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1);203204info->vs.writes_point_size =205s->info.outputs_written & (1 << VARYING_SLOT_PSIZ);206collect_varyings(s, nir_var_shader_out, info->varyings.output,207&info->varyings.output_count, pan_is_bifrost(dev));208break;209case MESA_SHADER_FRAGMENT:210if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))211info->fs.writes_depth = true;212if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL))213info->fs.writes_stencil = true;214if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK))215info->fs.writes_coverage = true;216217info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0;218info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0;219220/* EXT_shader_framebuffer_fetch requires per-sample */221info->fs.sample_shading = s->info.fs.uses_sample_shading ||222info->fs.outputs_read;223224info->fs.can_discard = s->info.fs.uses_discard;225info->fs.helper_invocations = s->info.fs.needs_quad_helper_invocations;226info->fs.early_fragment_tests = s->info.fs.early_fragment_tests;227228/* List of reasons we need to execute frag shaders when things229* are masked off */230231info->fs.sidefx = s->info.writes_memory ||232s->info.fs.uses_discard ||233s->info.fs.uses_demote;234235/* With suitable ZSA/blend, is early-z possible? */236info->fs.can_early_z =237!info->fs.sidefx &&238!info->fs.writes_depth &&239!info->fs.writes_stencil &&240!info->fs.writes_coverage;241242/* Similiarly with suitable state, is FPK possible? */243info->fs.can_fpk =244!info->fs.writes_depth &&245!info->fs.writes_stencil &&246!info->fs.writes_coverage &&247!info->fs.can_discard &&248!info->fs.outputs_read;249250info->fs.reads_frag_coord =251(s->info.inputs_read & (1 << VARYING_SLOT_POS)) ||252BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD);253info->fs.reads_point_coord =254s->info.inputs_read & (1 << VARYING_SLOT_PNTC);255info->fs.reads_face =256(s->info.inputs_read & (1 << VARYING_SLOT_FACE)) ||257BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE);258info->fs.reads_sample_id =259BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID);260info->fs.reads_sample_pos =261BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS);262info->fs.reads_sample_mask_in =263BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_SAMPLE_MASK_IN);264info->fs.reads_helper_invocation =265BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_HELPER_INVOCATION);266collect_varyings(s, nir_var_shader_in, info->varyings.input,267&info->varyings.input_count, pan_is_bifrost(dev));268break;269case MESA_SHADER_COMPUTE:270info->wls_size = s->info.shared_size;271break;272default:273unreachable("Unknown shader state");274}275276info->outputs_written = s->info.outputs_written;277278/* Sysvals have dedicated UBO */279if (info->sysvals.sysval_count)280info->ubo_count = MAX2(s->info.num_ubos + 1, inputs->sysval_ubo + 1);281else282info->ubo_count = s->info.num_ubos;283284info->attribute_count += util_last_bit(s->info.images_used);285info->writes_global = s->info.writes_memory;286287info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used);288289/* This is "redundant" information, but is needed in a draw-time hot path */290if (pan_is_bifrost(dev)) {291for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) {292info->bifrost.blend[i].format =293bifrost_blend_type_from_nir(info->bifrost.blend[i].type);294}295}296}297298299