Path: blob/21.2-virgl/src/freedreno/ir3/ir3_compiler.c
4565 views
/*1* Copyright (C) 2015 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "util/ralloc.h"2728#include "freedreno_dev_info.h"2930#include "ir3_compiler.h"3132static const struct debug_named_value shader_debug_options[] = {33/* clang-format off */34{"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},35{"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},36{"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},37{"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"},38{"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},39{"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},40{"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},41{"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"},42{"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},43{"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},44{"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},45{"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},46#ifdef DEBUG47/* DEBUG-only options: */48{"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},49{"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"},50#endif51DEBUG_NAMED_VALUE_END52/* clang-format on */53};5455DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG",56shader_debug_options, 0)57DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH",58NULL)5960enum ir3_shader_debug ir3_shader_debug = 0;61const char *ir3_shader_override_path = NULL;6263void64ir3_compiler_destroy(struct ir3_compiler *compiler)65{66disk_cache_destroy(compiler->disk_cache);67ralloc_free(compiler);68}6970struct ir3_compiler *71ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id,72bool robust_ubo_access)73{74struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);7576ir3_shader_debug = debug_get_option_ir3_shader_debug();77ir3_shader_override_path =78!__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;7980if (ir3_shader_override_path) {81ir3_shader_debug |= IR3_DBG_NOCACHE;82}8384compiler->dev = dev;85compiler->gpu_id = gpu_id;86compiler->robust_ubo_access = robust_ubo_access;8788/* All known GPU's have 32k local memory (aka shared) */89compiler->local_mem_size = 32 * 1024;90/* TODO see if older GPU's were different here */91compiler->branchstack_size = 64;92compiler->wave_granularity = 2;93compiler->max_waves = 16;9495if (compiler->gpu_id >= 600) {96compiler->samgq_workaround = true;97/* a6xx split the pipeline state into geometry and fragment state, in98* order to let the VS run ahead of the FS. As a result there are now99* separate const files for the the fragment shader and everything100* else, and separate limits. There seems to be a shared limit, but101* it's higher than the vert or frag limits.102*103* TODO: The shared limit seems to be different on different on104* different models.105*/106compiler->max_const_pipeline = 640;107compiler->max_const_frag = 512;108compiler->max_const_geom = 512;109compiler->max_const_safe = 128;110111/* Compute shaders don't share a const file with the FS. Instead they112* have their own file, which is smaller than the FS one.113*114* TODO: is this true on earlier gen's?115*/116compiler->max_const_compute = 256;117118/* TODO: implement clip+cull distances on earlier gen's */119compiler->has_clip_cull = true;120121/* TODO: implement private memory on earlier gen's */122compiler->has_pvtmem = true;123124compiler->tess_use_shared =125fd_dev_info(compiler->gpu_id)->a6xx.tess_use_shared;126} else {127compiler->max_const_pipeline = 512;128compiler->max_const_geom = 512;129compiler->max_const_frag = 512;130compiler->max_const_compute = 512;131132/* Note: this will have to change if/when we support tess+GS on133* earlier gen's.134*/135compiler->max_const_safe = 256;136}137138if (compiler->gpu_id >= 600) {139compiler->reg_size_vec4 =140fd_dev_info(compiler->gpu_id)->a6xx.reg_size_vec4;141} else if (compiler->gpu_id >= 400) {142/* On a4xx-a5xx, using r24.x and above requires using the smallest143* threadsize.144*/145compiler->reg_size_vec4 = 48;146} else {147/* TODO: confirm this */148compiler->reg_size_vec4 = 96;149}150151if (compiler->gpu_id >= 600) {152compiler->threadsize_base = 64;153} else if (compiler->gpu_id >= 400) {154/* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan155* 1.1 subgroupSize which is 32.156*/157compiler->threadsize_base = 32;158} else {159compiler->threadsize_base = 8;160}161162if (compiler->gpu_id >= 400) {163/* need special handling for "flat" */164compiler->flat_bypass = true;165compiler->levels_add_one = false;166compiler->unminify_coords = false;167compiler->txf_ms_with_isaml = false;168compiler->array_index_add_half = true;169compiler->instr_align = 16;170compiler->const_upload_unit = 4;171} else {172/* no special handling for "flat" */173compiler->flat_bypass = false;174compiler->levels_add_one = true;175compiler->unminify_coords = true;176compiler->txf_ms_with_isaml = true;177compiler->array_index_add_half = false;178compiler->instr_align = 4;179compiler->const_upload_unit = 8;180}181182ir3_disk_cache_init(compiler);183184return compiler;185}186187188