Path: blob/21.2-virgl/src/gallium/drivers/freedreno/freedreno_screen.c
4570 views
/*1* Copyright (C) 2012 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "pipe/p_defines.h"27#include "pipe/p_screen.h"28#include "pipe/p_state.h"2930#include "util/format/u_format.h"31#include "util/format/u_format_s3tc.h"32#include "util/u_debug.h"33#include "util/u_inlines.h"34#include "util/u_memory.h"35#include "util/u_screen.h"36#include "util/u_string.h"3738#include "util/os_time.h"3940#include <errno.h>41#include <stdio.h>42#include <stdlib.h>43#include "drm-uapi/drm_fourcc.h"44#include <sys/sysinfo.h>4546#include "freedreno_fence.h"47#include "freedreno_perfetto.h"48#include "freedreno_query.h"49#include "freedreno_resource.h"50#include "freedreno_screen.h"51#include "freedreno_util.h"5253#include "a2xx/fd2_screen.h"54#include "a3xx/fd3_screen.h"55#include "a4xx/fd4_screen.h"56#include "a5xx/fd5_screen.h"57#include "a6xx/fd6_screen.h"5859/* for fd_get_driver/device_uuid() */60#include "common/freedreno_uuid.h"6162#include "a2xx/ir2.h"63#include "ir3/ir3_gallium.h"64#include "ir3/ir3_nir.h"6566/* clang-format off */67static const struct debug_named_value fd_debug_options[] = {68{"msgs", FD_DBG_MSGS, "Print debug messages"},69{"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly (a2xx only, see IR3_SHADER_DEBUG)"},70{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},71{"ddraw", FD_DBG_DDRAW, "Mark all state dirty after draw"},72{"noscis", FD_DBG_NOSCIS, "Disable scissor optimization"},73{"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},74{"nobypass", FD_DBG_NOBYPASS, "Disable GMEM bypass"},75{"perf", FD_DBG_PERF, "Enable performance warnings"},76{"nobin", FD_DBG_NOBIN, "Disable hw binning"},77{"nogmem", FD_DBG_NOGMEM, "Disable GMEM rendering (bypass only)"},78{"serialc", FD_DBG_SERIALC,"Disable asynchronous shader compile"},79{"shaderdb", FD_DBG_SHADERDB, "Enable shaderdb output"},80{"flush", FD_DBG_FLUSH, "Force flush after every draw"},81{"deqp", FD_DBG_DEQP, "Enable dEQP hacks"},82{"inorder", FD_DBG_INORDER, "Disable reordering for draws/blits"},83{"bstat", FD_DBG_BSTAT, "Print batch stats at context destroy"},84{"nogrow", FD_DBG_NOGROW, "Disable \"growable\" cmdstream buffers, even if kernel supports it"},85{"lrz", FD_DBG_LRZ, "Enable experimental LRZ support (a5xx)"},86{"noindirect",FD_DBG_NOINDR, "Disable hw indirect draws (emulate on CPU)"},87{"noblit", FD_DBG_NOBLIT, "Disable blitter (fallback to generic blit path)"},88{"hiprio", FD_DBG_HIPRIO, "Force high-priority context"},89{"ttile", FD_DBG_TTILE, "Enable texture tiling (a2xx/a3xx/a5xx)"},90{"perfcntrs", FD_DBG_PERFC, "Expose performance counters"},91{"noubwc", FD_DBG_NOUBWC, "Disable UBWC for all internal buffers"},92{"nolrz", FD_DBG_NOLRZ, "Disable LRZ (a6xx)"},93{"notile", FD_DBG_NOTILE, "Disable tiling for all internal buffers"},94{"layout", FD_DBG_LAYOUT, "Dump resource layouts"},95{"nofp16", FD_DBG_NOFP16, "Disable mediump precision lowering"},96{"nohw", FD_DBG_NOHW, "Disable submitting commands to the HW"},97DEBUG_NAMED_VALUE_END98};99/* clang-format on */100101DEBUG_GET_ONCE_FLAGS_OPTION(fd_mesa_debug, "FD_MESA_DEBUG", fd_debug_options, 0)102103int fd_mesa_debug = 0;104bool fd_binning_enabled = true;105106static const char *107fd_screen_get_name(struct pipe_screen *pscreen)108{109return fd_dev_name(fd_screen(pscreen)->gpu_id);110}111112static const char *113fd_screen_get_vendor(struct pipe_screen *pscreen)114{115return "freedreno";116}117118static const char *119fd_screen_get_device_vendor(struct pipe_screen *pscreen)120{121return "Qualcomm";122}123124static uint64_t125fd_screen_get_timestamp(struct pipe_screen *pscreen)126{127struct fd_screen *screen = fd_screen(pscreen);128129if (screen->has_timestamp) {130uint64_t n;131fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &n);132debug_assert(screen->max_freq > 0);133return n * 1000000000 / screen->max_freq;134} else {135int64_t cpu_time = os_time_get() * 1000;136return cpu_time + screen->cpu_gpu_time_delta;137}138}139140static void141fd_screen_destroy(struct pipe_screen *pscreen)142{143struct fd_screen *screen = fd_screen(pscreen);144145if (screen->pipe)146fd_pipe_del(screen->pipe);147148if (screen->dev) {149fd_device_purge(screen->dev);150fd_device_del(screen->dev);151}152153if (screen->ro)154screen->ro->destroy(screen->ro);155156fd_bc_fini(&screen->batch_cache);157fd_gmem_screen_fini(pscreen);158159slab_destroy_parent(&screen->transfer_pool);160161simple_mtx_destroy(&screen->lock);162163util_idalloc_mt_fini(&screen->buffer_ids);164165u_transfer_helper_destroy(pscreen->transfer_helper);166167if (screen->compiler)168ir3_screen_fini(pscreen);169170free(screen->perfcntr_queries);171free(screen);172}173174/*175TODO either move caps to a2xx/a3xx specific code, or maybe have some176tables for things that differ if the delta is not too much..177*/178static int179fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)180{181struct fd_screen *screen = fd_screen(pscreen);182183/* this is probably not totally correct.. but it's a start: */184switch (param) {185/* Supported features (boolean caps). */186case PIPE_CAP_NPOT_TEXTURES:187case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:188case PIPE_CAP_ANISOTROPIC_FILTER:189case PIPE_CAP_POINT_SPRITE:190case PIPE_CAP_BLEND_EQUATION_SEPARATE:191case PIPE_CAP_TEXTURE_SWIZZLE:192case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:193case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:194case PIPE_CAP_SEAMLESS_CUBE_MAP:195case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:196case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:197case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:198case PIPE_CAP_STRING_MARKER:199case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:200case PIPE_CAP_TEXTURE_BARRIER:201case PIPE_CAP_INVALIDATE_BUFFER:202case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:203case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:204case PIPE_CAP_NIR_COMPACT_ARRAYS:205return 1;206207case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:208return is_a6xx(screen);209210case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:211case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:212case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:213return !is_a2xx(screen);214215case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:216return is_a2xx(screen);217case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:218return !is_a2xx(screen);219220case PIPE_CAP_PACKED_UNIFORMS:221return !is_a2xx(screen);222223case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:224case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:225return screen->has_robustness;226227case PIPE_CAP_VERTEXID_NOBASE:228return is_a3xx(screen) || is_a4xx(screen);229230case PIPE_CAP_COMPUTE:231return has_compute(screen);232233case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:234case PIPE_CAP_PCI_GROUP:235case PIPE_CAP_PCI_BUS:236case PIPE_CAP_PCI_DEVICE:237case PIPE_CAP_PCI_FUNCTION:238return 0;239240case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:241case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:242case PIPE_CAP_VERTEX_SHADER_SATURATE:243case PIPE_CAP_PRIMITIVE_RESTART:244case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:245case PIPE_CAP_TGSI_INSTANCEID:246case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:247case PIPE_CAP_INDEP_BLEND_ENABLE:248case PIPE_CAP_INDEP_BLEND_FUNC:249case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:250case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:251case PIPE_CAP_CONDITIONAL_RENDER:252case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:253case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:254case PIPE_CAP_CLIP_HALFZ:255return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||256is_a6xx(screen);257258case PIPE_CAP_FAKE_SW_MSAA:259return !fd_screen_get_param(pscreen, PIPE_CAP_TEXTURE_MULTISAMPLE);260261case PIPE_CAP_TEXTURE_MULTISAMPLE:262return is_a5xx(screen) || is_a6xx(screen);263264case PIPE_CAP_SURFACE_SAMPLE_COUNT:265return is_a6xx(screen);266267case PIPE_CAP_DEPTH_CLIP_DISABLE:268return is_a3xx(screen) || is_a4xx(screen) || is_a6xx(screen);269270case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:271return is_a6xx(screen);272273case PIPE_CAP_POLYGON_OFFSET_CLAMP:274return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);275276case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:277return 0;278279case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:280if (is_a3xx(screen))281return 16;282if (is_a4xx(screen))283return 32;284if (is_a5xx(screen) || is_a6xx(screen))285return 64;286return 0;287case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:288/* We could possibly emulate more by pretending 2d/rect textures and289* splitting high bits of index into 2nd dimension..290*/291if (is_a3xx(screen))292return 8192;293if (is_a4xx(screen))294return 16384;295296/* Note that the Vulkan blob on a540 and 640 report a297* maxTexelBufferElements of just 65536 (the GLES3.2 and Vulkan298* minimum).299*/300if (is_a5xx(screen) || is_a6xx(screen))301return 1 << 27;302return 0;303304case PIPE_CAP_TEXTURE_FLOAT_LINEAR:305case PIPE_CAP_CUBE_MAP_ARRAY:306case PIPE_CAP_SAMPLER_VIEW_TARGET:307case PIPE_CAP_TEXTURE_QUERY_LOD:308return is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen);309310case PIPE_CAP_START_INSTANCE:311/* Note that a5xx can do this, it just can't (at least with312* current firmware) do draw_indirect with base_instance.313* Since draw_indirect is needed sooner (gles31 and gl40 vs314* gl42), hide base_instance on a5xx. :-/315*/316return is_a4xx(screen);317318case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:319return 64;320321case PIPE_CAP_GLSL_FEATURE_LEVEL:322case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:323if (is_a6xx(screen))324return 330;325else if (is_ir3(screen))326return 140;327else328return 120;329330case PIPE_CAP_ESSL_FEATURE_LEVEL:331/* we can probably enable 320 for a5xx too, but need to test: */332if (is_a6xx(screen))333return 320;334if (is_a5xx(screen))335return 310;336if (is_ir3(screen))337return 300;338return 120;339340case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:341if (is_a6xx(screen))342return 64;343if (is_a5xx(screen))344return 4;345return 0;346347case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:348if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))349return 4;350return 0;351352/* TODO if we need this, do it in nir/ir3 backend to avoid breaking353* precompile: */354case PIPE_CAP_FORCE_PERSAMPLE_INTERP:355return 0;356357case PIPE_CAP_FBFETCH:358if (fd_device_version(screen->dev) >= FD_VERSION_GMEM_BASE &&359is_a6xx(screen))360return 1;361return 0;362case PIPE_CAP_SAMPLE_SHADING:363if (is_a6xx(screen))364return 1;365return 0;366367case PIPE_CAP_CONTEXT_PRIORITY_MASK:368return screen->priority_mask;369370case PIPE_CAP_DRAW_INDIRECT:371if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))372return 1;373return 0;374375case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:376if (is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen))377return 1;378return 0;379380case PIPE_CAP_LOAD_CONSTBUF:381/* name is confusing, but this turns on std430 packing */382if (is_ir3(screen))383return 1;384return 0;385386case PIPE_CAP_NIR_IMAGES_AS_DEREF:387return 0;388389case PIPE_CAP_MAX_VIEWPORTS:390return 1;391392case PIPE_CAP_MAX_VARYINGS:393return is_a6xx(screen) ? 31 : 16;394395case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:396/* We don't really have a limit on this, it all goes into the main397* memory buffer. Needs to be at least 120 / 4 (minimum requirement398* for GL_MAX_TESS_PATCH_COMPONENTS).399*/400return 128;401402case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:403return 64 * 1024 * 1024;404405case PIPE_CAP_SHAREABLE_SHADERS:406case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:407if (is_ir3(screen))408return 1;409return 0;410411/* Geometry shaders.. */412case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:413return 512;414case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:415return 2048;416case PIPE_CAP_MAX_GS_INVOCATIONS:417return 32;418419/* Only a2xx has the half-border clamp mode in HW, just have mesa/st lower420* it for later HW.421*/422case PIPE_CAP_GL_CLAMP:423return is_a2xx(screen);424425case PIPE_CAP_CLIP_PLANES:426/* On a3xx, there is HW support for GL user clip planes that427* occasionally has to fall back to shader key-based lowering to clip428* distances in the VS, and we don't support clip distances so that is429* always shader-based lowering in the FS.430*431* On a4xx, there is no HW support for clip planes, so they are432* always lowered to clip distances. We also lack SW support for the433* HW's clip distances in HW, so we do shader-based lowering in the FS434* in the driver backend.435*436* On a5xx-a6xx, we have the HW clip distances hooked up, so we just let437* mesa/st lower desktop GL's clip planes to clip distances in the last438* vertex shader stage.439*/440return !is_a5xx(screen) && !is_a6xx(screen);441442/* Stream output. */443case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:444if (is_ir3(screen))445return PIPE_MAX_SO_BUFFERS;446return 0;447case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:448case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:449case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:450case PIPE_CAP_TGSI_TEXCOORD:451if (is_ir3(screen))452return 1;453return 0;454case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:455return 1;456case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:457return is_a2xx(screen);458case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:459case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:460if (is_ir3(screen))461return 16 * 4; /* should only be shader out limit? */462return 0;463464/* Texturing. */465case PIPE_CAP_MAX_TEXTURE_2D_SIZE:466if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))467return 16384;468else469return 8192;470case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:471if (is_a6xx(screen) || is_a5xx(screen) || is_a4xx(screen))472return 15;473else474return 14;475case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:476return 11;477478case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:479return (is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||480is_a6xx(screen))481? 256482: 0;483484/* Render targets. */485case PIPE_CAP_MAX_RENDER_TARGETS:486return screen->max_rts;487case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:488return (is_a3xx(screen) || is_a6xx(screen)) ? 1 : 0;489490/* Queries. */491case PIPE_CAP_OCCLUSION_QUERY:492return is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||493is_a6xx(screen);494case PIPE_CAP_QUERY_TIMESTAMP:495case PIPE_CAP_QUERY_TIME_ELAPSED:496/* only a4xx, requires new enough kernel so we know max_freq: */497return (screen->max_freq > 0) &&498(is_a4xx(screen) || is_a5xx(screen) || is_a6xx(screen));499500case PIPE_CAP_VENDOR_ID:501return 0x5143;502case PIPE_CAP_DEVICE_ID:503return 0xFFFFFFFF;504case PIPE_CAP_ACCELERATED:505return 1;506case PIPE_CAP_VIDEO_MEMORY:507DBG("FINISHME: The value returned is incorrect\n");508return 10;509case PIPE_CAP_UMA:510return 1;511case PIPE_CAP_MEMOBJ:512return fd_device_version(screen->dev) >= FD_VERSION_MEMORY_FD;513case PIPE_CAP_NATIVE_FENCE_FD:514return fd_device_version(screen->dev) >= FD_VERSION_FENCE_FD;515case PIPE_CAP_FENCE_SIGNAL:516return screen->has_syncobj;517case PIPE_CAP_CULL_DISTANCE:518return is_a6xx(screen);519case PIPE_CAP_SHADER_STENCIL_EXPORT:520return is_a6xx(screen);521case PIPE_CAP_TWO_SIDED_COLOR:522return 0;523default:524return u_pipe_screen_get_param_defaults(pscreen, param);525}526}527528static float529fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)530{531switch (param) {532case PIPE_CAPF_MAX_LINE_WIDTH:533case PIPE_CAPF_MAX_LINE_WIDTH_AA:534/* NOTE: actual value is 127.0f, but this is working around a deqp535* bug.. dEQP-GLES3.functional.rasterization.primitives.lines_wide536* uses too small of a render target size, and gets confused when537* the lines start going offscreen.538*539* See: https://code.google.com/p/android/issues/detail?id=206513540*/541if (FD_DBG(DEQP))542return 48.0f;543return 127.0f;544case PIPE_CAPF_MAX_POINT_WIDTH:545case PIPE_CAPF_MAX_POINT_WIDTH_AA:546return 4092.0f;547case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:548return 16.0f;549case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:550return 15.0f;551case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:552case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:553case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:554return 0.0f;555}556mesa_loge("unknown paramf %d", param);557return 0;558}559560static int561fd_screen_get_shader_param(struct pipe_screen *pscreen,562enum pipe_shader_type shader,563enum pipe_shader_cap param)564{565struct fd_screen *screen = fd_screen(pscreen);566567switch (shader) {568case PIPE_SHADER_FRAGMENT:569case PIPE_SHADER_VERTEX:570break;571case PIPE_SHADER_TESS_CTRL:572case PIPE_SHADER_TESS_EVAL:573case PIPE_SHADER_GEOMETRY:574if (is_a6xx(screen))575break;576return 0;577case PIPE_SHADER_COMPUTE:578if (has_compute(screen))579break;580return 0;581default:582mesa_loge("unknown shader type %d", shader);583return 0;584}585586/* this is probably not totally correct.. but it's a start: */587switch (param) {588case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:589case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:590case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:591case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:592return 16384;593case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:594return 8; /* XXX */595case PIPE_SHADER_CAP_MAX_INPUTS:596if (shader == PIPE_SHADER_GEOMETRY && is_a6xx(screen))597return 16;598return is_a6xx(screen) ? 32 : 16;599case PIPE_SHADER_CAP_MAX_OUTPUTS:600return is_a6xx(screen) ? 32 : 16;601case PIPE_SHADER_CAP_MAX_TEMPS:602return 64; /* Max native temporaries. */603case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:604/* NOTE: seems to be limit for a3xx is actually 512 but605* split between VS and FS. Use lower limit of 256 to606* avoid getting into impossible situations:607*/608return ((is_a3xx(screen) || is_a4xx(screen) || is_a5xx(screen) ||609is_a6xx(screen))610? 4096611: 64) *612sizeof(float[4]);613case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:614return is_ir3(screen) ? 16 : 1;615case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:616return 1;617case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:618case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:619case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:620case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:621/* a2xx compiler doesn't handle indirect: */622return is_ir3(screen) ? 1 : 0;623case PIPE_SHADER_CAP_SUBROUTINES:624case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:625case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:626case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:627case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:628case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:629case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:630case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:631case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:632case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:633return 0;634case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:635return 1;636case PIPE_SHADER_CAP_INTEGERS:637return is_ir3(screen) ? 1 : 0;638case PIPE_SHADER_CAP_INT64_ATOMICS:639case PIPE_SHADER_CAP_FP16_DERIVATIVES:640case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:641case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:642return 0;643case PIPE_SHADER_CAP_INT16:644case PIPE_SHADER_CAP_FP16:645return (646(is_a5xx(screen) || is_a6xx(screen)) &&647(shader == PIPE_SHADER_COMPUTE || shader == PIPE_SHADER_FRAGMENT) &&648!FD_DBG(NOFP16));649case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:650case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:651return 16;652case PIPE_SHADER_CAP_PREFERRED_IR:653return PIPE_SHADER_IR_NIR;654case PIPE_SHADER_CAP_SUPPORTED_IRS:655return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);656case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:657return 32;658case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:659case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:660if (is_a5xx(screen) || is_a6xx(screen)) {661/* a5xx (and a4xx for that matter) has one state-block662* for compute-shader SSBO's and another that is shared663* by VS/HS/DS/GS/FS.. so to simplify things for now664* just advertise SSBOs for FS and CS. We could possibly665* do what blob does, and partition the space for666* VS/HS/DS/GS/FS. The blob advertises:667*668* GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS: 4669* GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS: 4670* GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS: 4671* GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS: 4672* GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS: 4673* GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS: 24674* GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS: 24675*676* I think that way we could avoid having to patch shaders677* for actual SSBO indexes by using a static partitioning.678*679* Note same state block is used for images and buffers,680* but images also need texture state for read access681* (isam/isam.3d)682*/683switch (shader) {684case PIPE_SHADER_FRAGMENT:685case PIPE_SHADER_COMPUTE:686return 24;687default:688return 0;689}690}691return 0;692}693mesa_loge("unknown shader param %d", param);694return 0;695}696697/* TODO depending on how much the limits differ for a3xx/a4xx, maybe move this698* into per-generation backend?699*/700static int701fd_get_compute_param(struct pipe_screen *pscreen, enum pipe_shader_ir ir_type,702enum pipe_compute_cap param, void *ret)703{704struct fd_screen *screen = fd_screen(pscreen);705const char *const ir = "ir3";706707if (!has_compute(screen))708return 0;709710#define RET(x) \711do { \712if (ret) \713memcpy(ret, x, sizeof(x)); \714return sizeof(x); \715} while (0)716717switch (param) {718case PIPE_COMPUTE_CAP_ADDRESS_BITS:719// don't expose 64b pointer support yet, until ir3 supports 64b720// math, otherwise spir64 target is used and we get 64b pointer721// calculations that we can't do yet722// if (is_a5xx(screen))723// RET((uint32_t []){ 64 });724RET((uint32_t[]){32});725726case PIPE_COMPUTE_CAP_IR_TARGET:727if (ret)728sprintf(ret, "%s", ir);729return strlen(ir) * sizeof(char);730731case PIPE_COMPUTE_CAP_GRID_DIMENSION:732RET((uint64_t[]){3});733734case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:735RET(((uint64_t[]){65535, 65535, 65535}));736737case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:738RET(((uint64_t[]){1024, 1024, 64}));739740case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:741RET((uint64_t[]){1024});742743case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE:744RET((uint64_t[]){screen->ram_size});745746case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE:747RET((uint64_t[]){32768});748749case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:750case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE:751RET((uint64_t[]){4096});752753case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:754RET((uint64_t[]){screen->ram_size});755756case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:757RET((uint32_t[]){screen->max_freq / 1000000});758759case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:760RET((uint32_t[]){9999}); // TODO761762case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:763RET((uint32_t[]){1});764765case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:766RET((uint32_t[]){32}); // TODO767768case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:769RET((uint64_t[]){1024}); // TODO770}771772return 0;773}774775static const void *776fd_get_compiler_options(struct pipe_screen *pscreen, enum pipe_shader_ir ir,777unsigned shader)778{779struct fd_screen *screen = fd_screen(pscreen);780781if (is_ir3(screen))782return ir3_get_compiler_options(screen->compiler);783784return ir2_get_compiler_options();785}786787static struct disk_cache *788fd_get_disk_shader_cache(struct pipe_screen *pscreen)789{790struct fd_screen *screen = fd_screen(pscreen);791792if (is_ir3(screen)) {793struct ir3_compiler *compiler = screen->compiler;794return compiler->disk_cache;795}796797return NULL;798}799800bool801fd_screen_bo_get_handle(struct pipe_screen *pscreen, struct fd_bo *bo,802struct renderonly_scanout *scanout, unsigned stride,803struct winsys_handle *whandle)804{805struct fd_screen *screen = fd_screen(pscreen);806807whandle->stride = stride;808809if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {810return fd_bo_get_name(bo, &whandle->handle) == 0;811} else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {812if (screen->ro) {813return renderonly_get_handle(scanout, whandle);814} else {815whandle->handle = fd_bo_handle(bo);816return true;817}818} else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {819whandle->handle = fd_bo_dmabuf(bo);820return true;821} else {822return false;823}824}825826static void827fd_screen_query_dmabuf_modifiers(struct pipe_screen *pscreen,828enum pipe_format format, int max,829uint64_t *modifiers,830unsigned int *external_only, int *count)831{832struct fd_screen *screen = fd_screen(pscreen);833int i, num = 0;834835max = MIN2(max, screen->num_supported_modifiers);836837if (!max) {838max = screen->num_supported_modifiers;839external_only = NULL;840modifiers = NULL;841}842843for (i = 0; i < max; i++) {844if (modifiers)845modifiers[num] = screen->supported_modifiers[i];846847if (external_only)848external_only[num] = 0;849850num++;851}852853*count = num;854}855856static bool857fd_screen_is_dmabuf_modifier_supported(struct pipe_screen *pscreen,858uint64_t modifier,859enum pipe_format format,860bool *external_only)861{862struct fd_screen *screen = fd_screen(pscreen);863int i;864865for (i = 0; i < screen->num_supported_modifiers; i++) {866if (modifier == screen->supported_modifiers[i]) {867if (external_only)868*external_only = false;869870return true;871}872}873874return false;875}876877struct fd_bo *878fd_screen_bo_from_handle(struct pipe_screen *pscreen,879struct winsys_handle *whandle)880{881struct fd_screen *screen = fd_screen(pscreen);882struct fd_bo *bo;883884if (whandle->type == WINSYS_HANDLE_TYPE_SHARED) {885bo = fd_bo_from_name(screen->dev, whandle->handle);886} else if (whandle->type == WINSYS_HANDLE_TYPE_KMS) {887bo = fd_bo_from_handle(screen->dev, whandle->handle, 0);888} else if (whandle->type == WINSYS_HANDLE_TYPE_FD) {889bo = fd_bo_from_dmabuf(screen->dev, whandle->handle);890} else {891DBG("Attempt to import unsupported handle type %d", whandle->type);892return NULL;893}894895if (!bo) {896DBG("ref name 0x%08x failed", whandle->handle);897return NULL;898}899900return bo;901}902903static void904_fd_fence_ref(struct pipe_screen *pscreen, struct pipe_fence_handle **ptr,905struct pipe_fence_handle *pfence)906{907fd_fence_ref(ptr, pfence);908}909910static void911fd_screen_get_device_uuid(struct pipe_screen *pscreen, char *uuid)912{913struct fd_screen *screen = fd_screen(pscreen);914915fd_get_device_uuid(uuid, screen->gpu_id);916}917918static void919fd_screen_get_driver_uuid(struct pipe_screen *pscreen, char *uuid)920{921fd_get_driver_uuid(uuid);922}923924struct pipe_screen *925fd_screen_create(struct fd_device *dev, struct renderonly *ro)926{927struct fd_screen *screen = CALLOC_STRUCT(fd_screen);928struct pipe_screen *pscreen;929uint64_t val;930931fd_mesa_debug = debug_get_option_fd_mesa_debug();932933if (FD_DBG(NOBIN))934fd_binning_enabled = false;935936if (!screen)937return NULL;938939#ifdef HAVE_PERFETTO940fd_perfetto_init();941#endif942943pscreen = &screen->base;944945screen->dev = dev;946screen->ro = ro;947screen->refcnt = 1;948949// maybe this should be in context?950screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);951if (!screen->pipe) {952DBG("could not create 3d pipe");953goto fail;954}955956if (fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val)) {957DBG("could not get GMEM size");958goto fail;959}960screen->gmemsize_bytes = env_var_as_unsigned("FD_MESA_GMEM", val);961962if (fd_device_version(dev) >= FD_VERSION_GMEM_BASE) {963fd_pipe_get_param(screen->pipe, FD_GMEM_BASE, &screen->gmem_base);964}965966if (fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val)) {967DBG("could not get device-id");968goto fail;969}970screen->device_id = val;971972if (fd_pipe_get_param(screen->pipe, FD_MAX_FREQ, &val)) {973DBG("could not get gpu freq");974/* this limits what performance related queries are975* supported but is not fatal976*/977screen->max_freq = 0;978} else {979screen->max_freq = val;980if (fd_pipe_get_param(screen->pipe, FD_TIMESTAMP, &val) == 0)981screen->has_timestamp = true;982}983984if (fd_pipe_get_param(screen->pipe, FD_GPU_ID, &val)) {985DBG("could not get gpu-id");986goto fail;987}988screen->gpu_id = val;989990if (fd_pipe_get_param(screen->pipe, FD_CHIP_ID, &val)) {991DBG("could not get chip-id");992/* older kernels may not have this property: */993unsigned core = screen->gpu_id / 100;994unsigned major = (screen->gpu_id % 100) / 10;995unsigned minor = screen->gpu_id % 10;996unsigned patch = 0; /* assume the worst */997val = (patch & 0xff) | ((minor & 0xff) << 8) | ((major & 0xff) << 16) |998((core & 0xff) << 24);999}1000screen->chip_id = val;10011002if (fd_pipe_get_param(screen->pipe, FD_NR_RINGS, &val)) {1003DBG("could not get # of rings");1004screen->priority_mask = 0;1005} else {1006/* # of rings equates to number of unique priority values: */1007screen->priority_mask = (1 << val) - 1;1008}10091010if (fd_device_version(dev) >= FD_VERSION_ROBUSTNESS)1011screen->has_robustness = true;10121013screen->has_syncobj = fd_has_syncobj(screen->dev);10141015struct sysinfo si;1016sysinfo(&si);1017screen->ram_size = si.totalram;10181019DBG("Pipe Info:");1020DBG(" GPU-id: %d", screen->gpu_id);1021DBG(" Chip-id: 0x%08x", screen->chip_id);1022DBG(" GMEM size: 0x%08x", screen->gmemsize_bytes);10231024const struct fd_dev_info *info = fd_dev_info(screen->gpu_id);1025if (!info) {1026mesa_loge("unsupported GPU: a%03d", screen->gpu_id);1027goto fail;1028}10291030/* explicitly checking for GPU revisions that are known to work. This1031* may be overly conservative for a3xx, where spoofing the gpu_id with1032* the blob driver seems to generate identical cmdstream dumps. But1033* on a2xx, there seem to be small differences between the GPU revs1034* so it is probably better to actually test first on real hardware1035* before enabling:1036*1037* If you have a different adreno version, feel free to add it to one1038* of the cases below and see what happens. And if it works, please1039* send a patch ;-)1040*/1041switch (screen->gpu_id / 100) {1042case 2:1043fd2_screen_init(pscreen);1044break;1045case 3:1046fd3_screen_init(pscreen);1047break;1048case 4:1049fd4_screen_init(pscreen);1050break;1051case 5:1052fd5_screen_init(pscreen);1053break;1054case 6:1055fd6_screen_init(pscreen);1056break;1057default:1058mesa_loge("unsupported GPU: a%03d", screen->gpu_id);1059goto fail;1060}10611062screen->info = info;10631064if (is_a6xx(screen)) {1065screen->ccu_offset_bypass = screen->info->num_ccu * A6XX_CCU_DEPTH_SIZE;1066screen->ccu_offset_gmem = (screen->gmemsize_bytes -1067screen->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);1068}10691070if (FD_DBG(PERFC)) {1071screen->perfcntr_groups =1072fd_perfcntrs(screen->gpu_id, &screen->num_perfcntr_groups);1073}10741075/* NOTE: don't enable if we have too old of a kernel to support1076* growable cmdstream buffers, since memory requirement for cmdstream1077* buffers would be too much otherwise.1078*/1079if (fd_device_version(dev) >= FD_VERSION_UNLIMITED_CMDS)1080screen->reorder = !FD_DBG(INORDER);10811082fd_bc_init(&screen->batch_cache);10831084list_inithead(&screen->context_list);10851086util_idalloc_mt_init_tc(&screen->buffer_ids);10871088(void)simple_mtx_init(&screen->lock, mtx_plain);10891090pscreen->destroy = fd_screen_destroy;1091pscreen->get_param = fd_screen_get_param;1092pscreen->get_paramf = fd_screen_get_paramf;1093pscreen->get_shader_param = fd_screen_get_shader_param;1094pscreen->get_compute_param = fd_get_compute_param;1095pscreen->get_compiler_options = fd_get_compiler_options;1096pscreen->get_disk_shader_cache = fd_get_disk_shader_cache;10971098fd_resource_screen_init(pscreen);1099fd_query_screen_init(pscreen);1100fd_gmem_screen_init(pscreen);11011102pscreen->get_name = fd_screen_get_name;1103pscreen->get_vendor = fd_screen_get_vendor;1104pscreen->get_device_vendor = fd_screen_get_device_vendor;11051106pscreen->get_timestamp = fd_screen_get_timestamp;11071108pscreen->fence_reference = _fd_fence_ref;1109pscreen->fence_finish = fd_fence_finish;1110pscreen->fence_get_fd = fd_fence_get_fd;11111112pscreen->query_dmabuf_modifiers = fd_screen_query_dmabuf_modifiers;1113pscreen->is_dmabuf_modifier_supported =1114fd_screen_is_dmabuf_modifier_supported;11151116pscreen->get_device_uuid = fd_screen_get_device_uuid;1117pscreen->get_driver_uuid = fd_screen_get_driver_uuid;11181119slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);11201121return pscreen;11221123fail:1124fd_screen_destroy(pscreen);1125return NULL;1126}112711281129