Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nv50/nv50_screen.c
4574 views
/*1* Copyright 2010 Christoph Bumiller2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include <errno.h>23#include <xf86drm.h>24#include <nouveau_drm.h>25#include "util/format/u_format.h"26#include "util/format/u_format_s3tc.h"27#include "util/u_screen.h"28#include "pipe/p_screen.h"29#include "compiler/nir/nir.h"3031#include "nv50/nv50_context.h"32#include "nv50/nv50_screen.h"3334#include "nouveau_vp3_video.h"3536#include "nv_object.xml.h"3738/* affected by LOCAL_WARPS_LOG_ALLOC / LOCAL_WARPS_NO_CLAMP */39#define LOCAL_WARPS_ALLOC 3240/* affected by STACK_WARPS_LOG_ALLOC / STACK_WARPS_NO_CLAMP */41#define STACK_WARPS_ALLOC 324243#define THREADS_IN_WARP 324445static bool46nv50_screen_is_format_supported(struct pipe_screen *pscreen,47enum pipe_format format,48enum pipe_texture_target target,49unsigned sample_count,50unsigned storage_sample_count,51unsigned bindings)52{53if (sample_count > 8)54return false;55if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */56return false;57if (sample_count == 8 && util_format_get_blocksizebits(format) >= 128)58return false;5960if (MAX2(1, sample_count) != MAX2(1, storage_sample_count))61return false;6263/* Short-circuit the rest of the logic -- this is used by the gallium frontend64* to determine valid MS levels in a no-attachments scenario.65*/66if (format == PIPE_FORMAT_NONE && bindings & PIPE_BIND_RENDER_TARGET)67return true;6869switch (format) {70case PIPE_FORMAT_Z16_UNORM:71if (nv50_screen(pscreen)->tesla->oclass < NVA0_3D_CLASS)72return false;73break;74default:75break;76}7778if (bindings & PIPE_BIND_LINEAR)79if (util_format_is_depth_or_stencil(format) ||80(target != PIPE_TEXTURE_1D &&81target != PIPE_TEXTURE_2D &&82target != PIPE_TEXTURE_RECT) ||83sample_count > 1)84return false;8586/* shared is always supported */87bindings &= ~(PIPE_BIND_LINEAR |88PIPE_BIND_SHARED);8990if (bindings & PIPE_BIND_INDEX_BUFFER) {91if (format != PIPE_FORMAT_R8_UINT &&92format != PIPE_FORMAT_R16_UINT &&93format != PIPE_FORMAT_R32_UINT)94return false;95bindings &= ~PIPE_BIND_INDEX_BUFFER;96}9798return (( nv50_format_table[format].usage |99nv50_vertex_format[format].usage) & bindings) == bindings;100}101102static int103nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)104{105const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;106struct nouveau_device *dev = nouveau_screen(pscreen)->device;107static bool debug_cap_printed[PIPE_CAP_LAST] = {};108109switch (param) {110/* non-boolean caps */111case PIPE_CAP_MAX_TEXTURE_2D_SIZE:112return 8192;113case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:114return 12;115case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:116return 14;117case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:118return 512;119case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:120case PIPE_CAP_MIN_TEXEL_OFFSET:121return -8;122case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:123case PIPE_CAP_MAX_TEXEL_OFFSET:124return 7;125case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:126return 128 * 1024 * 1024;127case PIPE_CAP_GLSL_FEATURE_LEVEL:128return 330;129case PIPE_CAP_GLSL_FEATURE_LEVEL_COMPATIBILITY:130return 330;131case PIPE_CAP_ESSL_FEATURE_LEVEL:132return class_3d >= NVA3_3D_CLASS ? 310 : 300;133case PIPE_CAP_MAX_RENDER_TARGETS:134return 8;135case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:136return 1;137case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:138return NV50_MAX_GLOBALS - 1;139case PIPE_CAP_VIEWPORT_SUBPIXEL_BITS:140case PIPE_CAP_RASTERIZER_SUBPIXEL_BITS:141return 8;142case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:143return 4;144case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:145return 64;146case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:147return 4;148case PIPE_CAP_MAX_GEOMETRY_OUTPUT_VERTICES:149case PIPE_CAP_MAX_GEOMETRY_TOTAL_OUTPUT_COMPONENTS:150return 1024;151case PIPE_CAP_MAX_VERTEX_STREAMS:152return 1;153case PIPE_CAP_MAX_GS_INVOCATIONS:154return 0;155case PIPE_CAP_MAX_SHADER_BUFFER_SIZE:156return 1 << 27;157case PIPE_CAP_MAX_VERTEX_ATTRIB_STRIDE:158return 2048;159case PIPE_CAP_MAX_VERTEX_ELEMENT_SRC_OFFSET:160return 2047;161case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:162return 256;163case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:164return 16; /* 256 for binding as RT, but that's not possible in GL */165case PIPE_CAP_SHADER_BUFFER_OFFSET_ALIGNMENT:166return 256; /* the access limit is aligned to 256 */167case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:168return NOUVEAU_MIN_BUFFER_MAP_ALIGN;169case PIPE_CAP_MAX_VIEWPORTS:170return NV50_MAX_VIEWPORTS;171case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:172return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;173case PIPE_CAP_ENDIANNESS:174return PIPE_ENDIAN_LITTLE;175case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:176return (class_3d >= NVA3_3D_CLASS) ? 4 : 0;177case PIPE_CAP_MAX_WINDOW_RECTANGLES:178return NV50_MAX_WINDOW_RECTANGLES;179case PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET:180return 16 * 1024 * 1024;181case PIPE_CAP_MAX_VARYINGS:182return 15;183case PIPE_CAP_MAX_VERTEX_BUFFERS:184return 16;185case PIPE_CAP_GL_BEGIN_END_BUFFER_SIZE:186return 512 * 1024; /* TODO: Investigate tuning this */187case PIPE_CAP_MAX_TEXTURE_MB:188return 0; /* TODO: use 1/2 of VRAM for this? */189190case PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART:191case PIPE_CAP_SUPPORTED_PRIM_MODES:192return BITFIELD_MASK(PIPE_PRIM_MAX);193194/* supported caps */195case PIPE_CAP_TEXTURE_MIRROR_CLAMP:196case PIPE_CAP_TEXTURE_MIRROR_CLAMP_TO_EDGE:197case PIPE_CAP_TEXTURE_SWIZZLE:198case PIPE_CAP_TEXTURE_SHADOW_MAP:199case PIPE_CAP_NPOT_TEXTURES:200case PIPE_CAP_MIXED_FRAMEBUFFER_SIZES:201case PIPE_CAP_MIXED_COLOR_DEPTH_BITS:202case PIPE_CAP_ANISOTROPIC_FILTER:203case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:204case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:205case PIPE_CAP_DEPTH_CLIP_DISABLE:206case PIPE_CAP_POINT_SPRITE:207case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:208case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:209case PIPE_CAP_VERTEX_SHADER_SATURATE:210case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:211case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:212case PIPE_CAP_VERTEX_COLOR_CLAMPED:213case PIPE_CAP_QUERY_TIMESTAMP:214case PIPE_CAP_QUERY_TIME_ELAPSED:215case PIPE_CAP_OCCLUSION_QUERY:216case PIPE_CAP_BLEND_EQUATION_SEPARATE:217case PIPE_CAP_INDEP_BLEND_ENABLE:218case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:219case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:220case PIPE_CAP_PRIMITIVE_RESTART:221case PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX:222case PIPE_CAP_TGSI_INSTANCEID:223case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:224case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:225case PIPE_CAP_CONDITIONAL_RENDER:226case PIPE_CAP_TEXTURE_BARRIER:227case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:228case PIPE_CAP_START_INSTANCE:229case PIPE_CAP_USER_VERTEX_BUFFERS:230case PIPE_CAP_TEXTURE_MULTISAMPLE:231case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:232case PIPE_CAP_TGSI_FS_FINE_DERIVATIVE:233case PIPE_CAP_SAMPLER_VIEW_TARGET:234case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:235case PIPE_CAP_CLIP_HALFZ:236case PIPE_CAP_POLYGON_OFFSET_CLAMP:237case PIPE_CAP_QUERY_PIPELINE_STATISTICS:238case PIPE_CAP_TEXTURE_FLOAT_LINEAR:239case PIPE_CAP_TEXTURE_HALF_FLOAT_LINEAR:240case PIPE_CAP_DEPTH_BOUNDS_TEST:241case PIPE_CAP_TGSI_TXQS:242case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS:243case PIPE_CAP_CLEAR_TEXTURE:244case PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL:245case PIPE_CAP_INVALIDATE_BUFFER:246case PIPE_CAP_STRING_MARKER:247case PIPE_CAP_CULL_DISTANCE:248case PIPE_CAP_TGSI_ARRAY_COMPONENTS:249case PIPE_CAP_TGSI_MUL_ZERO_WINS:250case PIPE_CAP_TGSI_TEX_TXF_LZ:251case PIPE_CAP_TGSI_CLOCK:252case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:253case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:254case PIPE_CAP_DEST_SURFACE_SRGB_CONTROL:255case PIPE_CAP_TGSI_DIV:256case PIPE_CAP_PREFER_IMM_ARRAYS_AS_CONSTBUF:257case PIPE_CAP_FLATSHADE:258case PIPE_CAP_ALPHA_TEST:259case PIPE_CAP_POINT_SIZE_FIXED:260case PIPE_CAP_TWO_SIDED_COLOR:261case PIPE_CAP_CLIP_PLANES:262case PIPE_CAP_PACKED_STREAM_OUTPUT:263case PIPE_CAP_CLEAR_SCISSORED:264case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT:265case PIPE_CAP_COMPUTE:266return 1;267case PIPE_CAP_SEAMLESS_CUBE_MAP:268return 1; /* class_3d >= NVA0_3D_CLASS; */269/* supported on nva0+ */270case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:271return class_3d >= NVA0_3D_CLASS;272/* supported on nva3+ */273case PIPE_CAP_CUBE_MAP_ARRAY:274case PIPE_CAP_INDEP_BLEND_FUNC:275case PIPE_CAP_TEXTURE_QUERY_LOD:276case PIPE_CAP_SAMPLE_SHADING:277case PIPE_CAP_FORCE_PERSAMPLE_INTERP:278return class_3d >= NVA3_3D_CLASS;279280/* unsupported caps */281case PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART:282case PIPE_CAP_DEPTH_CLIP_DISABLE_SEPARATE:283case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:284case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:285case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:286case PIPE_CAP_SHADER_STENCIL_EXPORT:287case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:288case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:289case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:290case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:291case PIPE_CAP_TGSI_TEXCOORD:292case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:293case PIPE_CAP_TEXTURE_GATHER_SM5:294case PIPE_CAP_FAKE_SW_MSAA:295case PIPE_CAP_TEXTURE_GATHER_OFFSETS:296case PIPE_CAP_TGSI_VS_WINDOW_SPACE_POSITION:297case PIPE_CAP_DRAW_INDIRECT:298case PIPE_CAP_MULTI_DRAW_INDIRECT:299case PIPE_CAP_MULTI_DRAW_INDIRECT_PARAMS:300case PIPE_CAP_VERTEXID_NOBASE:301case PIPE_CAP_MULTISAMPLE_Z_RESOLVE: /* potentially supported on some hw */302case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:303case PIPE_CAP_DEVICE_RESET_STATUS_QUERY:304case PIPE_CAP_MAX_SHADER_PATCH_VARYINGS:305case PIPE_CAP_DRAW_PARAMETERS:306case PIPE_CAP_TGSI_PACK_HALF_FLOAT:307case PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL:308case PIPE_CAP_TGSI_FS_POINT_IS_SYSVAL:309case PIPE_CAP_GENERATE_MIPMAP:310case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:311case PIPE_CAP_SURFACE_REINTERPRET_BLOCKS:312case PIPE_CAP_QUERY_BUFFER_OBJECT:313case PIPE_CAP_QUERY_MEMORY_INFO:314case PIPE_CAP_PCI_GROUP:315case PIPE_CAP_PCI_BUS:316case PIPE_CAP_PCI_DEVICE:317case PIPE_CAP_PCI_FUNCTION:318case PIPE_CAP_ROBUST_BUFFER_ACCESS_BEHAVIOR:319case PIPE_CAP_TGSI_VOTE:320case PIPE_CAP_POLYGON_OFFSET_UNITS_UNSCALED:321case PIPE_CAP_STREAM_OUTPUT_INTERLEAVE_BUFFERS:322case PIPE_CAP_TGSI_CAN_READ_OUTPUTS:323case PIPE_CAP_NATIVE_FENCE_FD:324case PIPE_CAP_GLSL_OPTIMIZE_CONSERVATIVELY:325case PIPE_CAP_FBFETCH:326case PIPE_CAP_DOUBLES:327case PIPE_CAP_INT64:328case PIPE_CAP_INT64_DIVMOD:329case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:330case PIPE_CAP_SPARSE_BUFFER_PAGE_SIZE:331case PIPE_CAP_TGSI_BALLOT:332case PIPE_CAP_TGSI_TES_LAYER_VIEWPORT:333case PIPE_CAP_POST_DEPTH_COVERAGE:334case PIPE_CAP_BINDLESS_TEXTURE:335case PIPE_CAP_NIR_SAMPLERS_AS_DEREF:336case PIPE_CAP_QUERY_SO_OVERFLOW:337case PIPE_CAP_MEMOBJ:338case PIPE_CAP_LOAD_CONSTBUF:339case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:340case PIPE_CAP_TILE_RASTER_ORDER:341case PIPE_CAP_FRAMEBUFFER_MSAA_CONSTRAINTS:342case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:343case PIPE_CAP_CONTEXT_PRIORITY_MASK:344case PIPE_CAP_FENCE_SIGNAL:345case PIPE_CAP_CONSTBUF0_FLAGS:346case PIPE_CAP_PACKED_UNIFORMS:347case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:348case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:349case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:350case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:351case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:352case PIPE_CAP_MAX_CONSERVATIVE_RASTER_SUBPIXEL_PRECISION_BIAS:353case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:354case PIPE_CAP_MAX_COMBINED_SHADER_BUFFERS:355case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTERS:356case PIPE_CAP_MAX_COMBINED_HW_ATOMIC_COUNTER_BUFFERS:357case PIPE_CAP_SURFACE_SAMPLE_COUNT:358case PIPE_CAP_TGSI_ATOMFADD:359case PIPE_CAP_QUERY_PIPELINE_STATISTICS_SINGLE:360case PIPE_CAP_RGB_OVERRIDE_DST_ALPHA_BLEND:361case PIPE_CAP_GLSL_TESS_LEVELS_AS_INPUTS:362case PIPE_CAP_NIR_COMPACT_ARRAYS:363case PIPE_CAP_IMAGE_LOAD_FORMATTED:364case PIPE_CAP_COMPUTE_SHADER_DERIVATIVES:365case PIPE_CAP_ATOMIC_FLOAT_MINMAX:366case PIPE_CAP_CONSERVATIVE_RASTER_INNER_COVERAGE:367case PIPE_CAP_FRAGMENT_SHADER_INTERLOCK:368case PIPE_CAP_CS_DERIVED_SYSTEM_VALUES_SUPPORTED:369case PIPE_CAP_FBFETCH_COHERENT:370case PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS:371case PIPE_CAP_TGSI_ATOMINC_WRAP:372case PIPE_CAP_DEMOTE_TO_HELPER_INVOCATION:373case PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE:374case PIPE_CAP_OPENCL_INTEGER_FUNCTIONS:375case PIPE_CAP_INTEGER_MULTIPLY_32X16: /* could be done */376case PIPE_CAP_FRONTEND_NOOP:377case PIPE_CAP_GL_SPIRV:378case PIPE_CAP_SHADER_SAMPLES_IDENTICAL:379case PIPE_CAP_TEXTURE_SHADOW_LOD:380case PIPE_CAP_VIEWPORT_TRANSFORM_LOWERED:381case PIPE_CAP_PSIZ_CLAMPED:382case PIPE_CAP_VIEWPORT_SWIZZLE:383case PIPE_CAP_VIEWPORT_MASK:384case PIPE_CAP_TEXTURE_BUFFER_SAMPLER:385case PIPE_CAP_PREFER_REAL_BUFFER_IN_CONSTBUF0:386case PIPE_CAP_MAP_UNSYNCHRONIZED_THREAD_SAFE: /* when we fix MT stuff */387case PIPE_CAP_ALPHA_TO_COVERAGE_DITHER_CONTROL:388case PIPE_CAP_SHADER_ATOMIC_INT64:389case PIPE_CAP_GLSL_ZERO_INIT:390case PIPE_CAP_BLEND_EQUATION_ADVANCED:391case PIPE_CAP_NO_CLIP_ON_COPY_TEX:392case PIPE_CAP_DEVICE_PROTECTED_CONTENT:393case PIPE_CAP_NIR_IMAGES_AS_DEREF:394return 0;395396case PIPE_CAP_VENDOR_ID:397return 0x10de;398case PIPE_CAP_DEVICE_ID: {399uint64_t device_id;400if (nouveau_getparam(dev, NOUVEAU_GETPARAM_PCI_DEVICE, &device_id)) {401NOUVEAU_ERR("NOUVEAU_GETPARAM_PCI_DEVICE failed.\n");402return -1;403}404return device_id;405}406case PIPE_CAP_ACCELERATED:407return 1;408case PIPE_CAP_VIDEO_MEMORY:409return dev->vram_size >> 20;410case PIPE_CAP_UMA:411return 0;412413default:414if (!debug_cap_printed[param]) {415debug_printf("%s: unhandled cap %d\n", __func__, param);416debug_cap_printed[param] = true;417}418FALLTHROUGH;419/* caps where we want the default value */420case PIPE_CAP_DMABUF:421case PIPE_CAP_THROTTLE:422return u_pipe_screen_get_param_defaults(pscreen, param);423}424}425426static int427nv50_screen_get_shader_param(struct pipe_screen *pscreen,428enum pipe_shader_type shader,429enum pipe_shader_cap param)430{431const struct nouveau_screen *screen = nouveau_screen(pscreen);432433switch (shader) {434case PIPE_SHADER_VERTEX:435case PIPE_SHADER_GEOMETRY:436case PIPE_SHADER_FRAGMENT:437case PIPE_SHADER_COMPUTE:438break;439default:440return 0;441}442443switch (param) {444case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:445case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:446case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:447case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:448return 16384;449case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:450return 4;451case PIPE_SHADER_CAP_MAX_INPUTS:452if (shader == PIPE_SHADER_VERTEX)453return 32;454return 15;455case PIPE_SHADER_CAP_MAX_OUTPUTS:456return 16;457case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:458return 65536;459case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:460return NV50_MAX_PIPE_CONSTBUFS;461case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:462return shader != PIPE_SHADER_FRAGMENT;463case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:464case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:465case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:466return 1;467case PIPE_SHADER_CAP_MAX_TEMPS:468return nv50_screen(pscreen)->max_tls_space / ONE_TEMP_SIZE;469case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:470return 1;471case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:472return 1;473case PIPE_SHADER_CAP_INT64_ATOMICS:474case PIPE_SHADER_CAP_FP16:475case PIPE_SHADER_CAP_FP16_DERIVATIVES:476case PIPE_SHADER_CAP_FP16_CONST_BUFFERS:477case PIPE_SHADER_CAP_INT16:478case PIPE_SHADER_CAP_GLSL_16BIT_CONSTS:479case PIPE_SHADER_CAP_SUBROUTINES:480return 0; /* please inline, or provide function declarations */481case PIPE_SHADER_CAP_INTEGERS:482return 1;483case PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS:484return 1;485case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:486/* The chip could handle more sampler views than samplers */487case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:488return MIN2(16, PIPE_MAX_SAMPLERS);489case PIPE_SHADER_CAP_MAX_SHADER_BUFFERS:490return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;491case PIPE_SHADER_CAP_MAX_SHADER_IMAGES:492return shader == PIPE_SHADER_COMPUTE ? NV50_MAX_GLOBALS - 1 : 0;493case PIPE_SHADER_CAP_PREFERRED_IR:494return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;495case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:496return 32;497case PIPE_SHADER_CAP_SUPPORTED_IRS:498return (1 << PIPE_SHADER_IR_TGSI) | (1 << PIPE_SHADER_IR_NIR);499case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:500case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:501case PIPE_SHADER_CAP_TGSI_LDEXP_SUPPORTED:502case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:503case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:504case PIPE_SHADER_CAP_LOWER_IF_THRESHOLD:505case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS:506case PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTER_BUFFERS:507return 0;508default:509NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);510return 0;511}512}513514static float515nv50_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)516{517switch (param) {518case PIPE_CAPF_MAX_LINE_WIDTH:519case PIPE_CAPF_MAX_LINE_WIDTH_AA:520return 10.0f;521case PIPE_CAPF_MAX_POINT_WIDTH:522case PIPE_CAPF_MAX_POINT_WIDTH_AA:523return 64.0f;524case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:525return 16.0f;526case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:527return 15.0f;528case PIPE_CAPF_MIN_CONSERVATIVE_RASTER_DILATE:529case PIPE_CAPF_MAX_CONSERVATIVE_RASTER_DILATE:530case PIPE_CAPF_CONSERVATIVE_RASTER_DILATE_GRANULARITY:531return 0.0f;532}533534NOUVEAU_ERR("unknown PIPE_CAPF %d\n", param);535return 0.0f;536}537538static int539nv50_screen_get_compute_param(struct pipe_screen *pscreen,540enum pipe_shader_ir ir_type,541enum pipe_compute_cap param, void *data)542{543struct nv50_screen *screen = nv50_screen(pscreen);544545#define RET(x) do { \546if (data) \547memcpy(data, x, sizeof(x)); \548return sizeof(x); \549} while (0)550551switch (param) {552case PIPE_COMPUTE_CAP_GRID_DIMENSION:553RET((uint64_t []) { 3 });554case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:555RET(((uint64_t []) { 65535, 65535, 65535 }));556case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:557RET(((uint64_t []) { 512, 512, 64 }));558case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:559RET((uint64_t []) { 512 });560case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g0-15[] */561RET((uint64_t []) { 1ULL << 32 });562case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */563RET((uint64_t []) { 16 << 10 });564case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */565RET((uint64_t []) { 16 << 10 });566case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */567RET((uint64_t []) { 4096 });568case PIPE_COMPUTE_CAP_SUBGROUP_SIZE:569RET((uint32_t []) { 32 });570case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE:571RET((uint64_t []) { 1ULL << 40 });572case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED:573RET((uint32_t []) { 0 });574case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS:575RET((uint32_t []) { screen->mp_count });576case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY:577RET((uint32_t []) { 512 }); /* FIXME: arbitrary limit */578case PIPE_COMPUTE_CAP_ADDRESS_BITS:579RET((uint32_t []) { 32 });580case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK:581RET((uint64_t []) { 0 });582default:583return 0;584}585586#undef RET587}588589static void590nv50_screen_destroy(struct pipe_screen *pscreen)591{592struct nv50_screen *screen = nv50_screen(pscreen);593594if (!nouveau_drm_screen_unref(&screen->base))595return;596597nouveau_fence_cleanup(&screen->base);598599if (screen->base.pushbuf)600screen->base.pushbuf->user_priv = NULL;601602if (screen->blitter)603nv50_blitter_destroy(screen);604if (screen->pm.prog) {605screen->pm.prog->code = NULL; /* hardcoded, don't FREE */606nv50_program_destroy(NULL, screen->pm.prog);607FREE(screen->pm.prog);608}609610nouveau_bo_ref(NULL, &screen->code);611nouveau_bo_ref(NULL, &screen->tls_bo);612nouveau_bo_ref(NULL, &screen->stack_bo);613nouveau_bo_ref(NULL, &screen->txc);614nouveau_bo_ref(NULL, &screen->uniforms);615nouveau_bo_ref(NULL, &screen->fence.bo);616617nouveau_heap_destroy(&screen->vp_code_heap);618nouveau_heap_destroy(&screen->gp_code_heap);619nouveau_heap_destroy(&screen->fp_code_heap);620621FREE(screen->tic.entries);622623nouveau_object_del(&screen->tesla);624nouveau_object_del(&screen->eng2d);625nouveau_object_del(&screen->m2mf);626nouveau_object_del(&screen->compute);627nouveau_object_del(&screen->sync);628629nouveau_screen_fini(&screen->base);630631FREE(screen);632}633634static void635nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)636{637struct nv50_screen *screen = nv50_screen(pscreen);638struct nouveau_pushbuf *push = screen->base.pushbuf;639640/* we need to do it after possible flush in MARK_RING */641*sequence = ++screen->base.fence.sequence;642643assert(PUSH_AVAIL(push) + push->rsvd_kick >= 5);644PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));645PUSH_DATAh(push, screen->fence.bo->offset);646PUSH_DATA (push, screen->fence.bo->offset);647PUSH_DATA (push, *sequence);648PUSH_DATA (push, NV50_3D_QUERY_GET_MODE_WRITE_UNK0 |649NV50_3D_QUERY_GET_UNK4 |650NV50_3D_QUERY_GET_UNIT_CROP |651NV50_3D_QUERY_GET_TYPE_QUERY |652NV50_3D_QUERY_GET_QUERY_SELECT_ZERO |653NV50_3D_QUERY_GET_SHORT);654}655656static u32657nv50_screen_fence_update(struct pipe_screen *pscreen)658{659return nv50_screen(pscreen)->fence.map[0];660}661662static void663nv50_screen_init_hwctx(struct nv50_screen *screen)664{665struct nouveau_pushbuf *push = screen->base.pushbuf;666struct nv04_fifo *fifo;667unsigned i;668669fifo = (struct nv04_fifo *)screen->base.channel->data;670671BEGIN_NV04(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);672PUSH_DATA (push, screen->m2mf->handle);673BEGIN_NV04(push, SUBC_M2MF(NV03_M2MF_DMA_NOTIFY), 3);674PUSH_DATA (push, screen->sync->handle);675PUSH_DATA (push, fifo->vram);676PUSH_DATA (push, fifo->vram);677678BEGIN_NV04(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);679PUSH_DATA (push, screen->eng2d->handle);680BEGIN_NV04(push, NV50_2D(DMA_NOTIFY), 4);681PUSH_DATA (push, screen->sync->handle);682PUSH_DATA (push, fifo->vram);683PUSH_DATA (push, fifo->vram);684PUSH_DATA (push, fifo->vram);685BEGIN_NV04(push, NV50_2D(OPERATION), 1);686PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);687BEGIN_NV04(push, NV50_2D(CLIP_ENABLE), 1);688PUSH_DATA (push, 0);689BEGIN_NV04(push, NV50_2D(COLOR_KEY_ENABLE), 1);690PUSH_DATA (push, 0);691BEGIN_NV04(push, NV50_2D(SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP), 1);692PUSH_DATA (push, 1);693BEGIN_NV04(push, NV50_2D(COND_MODE), 1);694PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);695696BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);697PUSH_DATA (push, screen->tesla->handle);698699BEGIN_NV04(push, NV50_3D(COND_MODE), 1);700PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);701702BEGIN_NV04(push, NV50_3D(DMA_NOTIFY), 1);703PUSH_DATA (push, screen->sync->handle);704BEGIN_NV04(push, NV50_3D(DMA_ZETA), 11);705for (i = 0; i < 11; ++i)706PUSH_DATA(push, fifo->vram);707BEGIN_NV04(push, NV50_3D(DMA_COLOR(0)), NV50_3D_DMA_COLOR__LEN);708for (i = 0; i < NV50_3D_DMA_COLOR__LEN; ++i)709PUSH_DATA(push, fifo->vram);710711BEGIN_NV04(push, NV50_3D(REG_MODE), 1);712PUSH_DATA (push, NV50_3D_REG_MODE_STRIPED);713BEGIN_NV04(push, NV50_3D(UNK1400_LANES), 1);714PUSH_DATA (push, 0xf);715716if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", true)) {717BEGIN_NV04(push, NV50_3D(WATCHDOG_TIMER), 1);718PUSH_DATA (push, 0x18);719}720721BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);722PUSH_DATA(push, screen->base.drm->version >= 0x01000101);723724BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);725for (i = 0; i < 8; ++i)726PUSH_DATA(push, screen->base.drm->version >= 0x01000101);727728BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);729PUSH_DATA (push, 1);730731BEGIN_NV04(push, NV50_3D(CSAA_ENABLE), 1);732PUSH_DATA (push, 0);733BEGIN_NV04(push, NV50_3D(MULTISAMPLE_ENABLE), 1);734PUSH_DATA (push, 0);735BEGIN_NV04(push, NV50_3D(MULTISAMPLE_MODE), 1);736PUSH_DATA (push, NV50_3D_MULTISAMPLE_MODE_MS1);737BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);738PUSH_DATA (push, 0);739BEGIN_NV04(push, NV50_3D(PRIM_RESTART_WITH_DRAW_ARRAYS), 1);740PUSH_DATA (push, 1);741BEGIN_NV04(push, NV50_3D(BLEND_SEPARATE_ALPHA), 1);742PUSH_DATA (push, 1);743744if (screen->tesla->oclass >= NVA0_3D_CLASS) {745BEGIN_NV04(push, SUBC_3D(NVA0_3D_TEX_MISC), 1);746PUSH_DATA (push, 0);747}748749BEGIN_NV04(push, NV50_3D(SCREEN_Y_CONTROL), 1);750PUSH_DATA (push, 0);751BEGIN_NV04(push, NV50_3D(WINDOW_OFFSET_X), 2);752PUSH_DATA (push, 0);753PUSH_DATA (push, 0);754BEGIN_NV04(push, NV50_3D(ZCULL_REGION), 1);755PUSH_DATA (push, 0x3f);756757BEGIN_NV04(push, NV50_3D(VP_ADDRESS_HIGH), 2);758PUSH_DATAh(push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));759PUSH_DATA (push, screen->code->offset + (0 << NV50_CODE_BO_SIZE_LOG2));760761BEGIN_NV04(push, NV50_3D(FP_ADDRESS_HIGH), 2);762PUSH_DATAh(push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));763PUSH_DATA (push, screen->code->offset + (1 << NV50_CODE_BO_SIZE_LOG2));764765BEGIN_NV04(push, NV50_3D(GP_ADDRESS_HIGH), 2);766PUSH_DATAh(push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));767PUSH_DATA (push, screen->code->offset + (2 << NV50_CODE_BO_SIZE_LOG2));768769BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);770PUSH_DATAh(push, screen->tls_bo->offset);771PUSH_DATA (push, screen->tls_bo->offset);772PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));773774BEGIN_NV04(push, NV50_3D(STACK_ADDRESS_HIGH), 3);775PUSH_DATAh(push, screen->stack_bo->offset);776PUSH_DATA (push, screen->stack_bo->offset);777PUSH_DATA (push, 4);778779BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);780PUSH_DATAh(push, screen->uniforms->offset + (0 << 16));781PUSH_DATA (push, screen->uniforms->offset + (0 << 16));782PUSH_DATA (push, (NV50_CB_PVP << 16) | 0x0000);783784BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);785PUSH_DATAh(push, screen->uniforms->offset + (1 << 16));786PUSH_DATA (push, screen->uniforms->offset + (1 << 16));787PUSH_DATA (push, (NV50_CB_PGP << 16) | 0x0000);788789BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);790PUSH_DATAh(push, screen->uniforms->offset + (2 << 16));791PUSH_DATA (push, screen->uniforms->offset + (2 << 16));792PUSH_DATA (push, (NV50_CB_PFP << 16) | 0x0000);793794BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);795PUSH_DATAh(push, screen->uniforms->offset + (4 << 16));796PUSH_DATA (push, screen->uniforms->offset + (4 << 16));797PUSH_DATA (push, (NV50_CB_AUX << 16) | (NV50_CB_AUX_SIZE & 0xffff));798799BEGIN_NI04(push, NV50_3D(SET_PROGRAM_CB), 3);800PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf01);801PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf21);802PUSH_DATA (push, (NV50_CB_AUX << 12) | 0xf31);803804/* return { 0.0, 0.0, 0.0, 0.0 } on out-of-bounds vtxbuf access */805BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);806PUSH_DATA (push, (NV50_CB_AUX_RUNOUT_OFFSET << (8 - 2)) | NV50_CB_AUX);807BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 4);808PUSH_DATAf(push, 0.0f);809PUSH_DATAf(push, 0.0f);810PUSH_DATAf(push, 0.0f);811PUSH_DATAf(push, 0.0f);812BEGIN_NV04(push, NV50_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);813PUSH_DATAh(push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);814PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_RUNOUT_OFFSET);815816/* set the membar offset */817BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);818PUSH_DATA (push, (NV50_CB_AUX_MEMBAR_OFFSET << (8 - 2)) | NV50_CB_AUX);819BEGIN_NI04(push, NV50_3D(CB_DATA(0)), 1);820PUSH_DATA (push, screen->uniforms->offset + (4 << 16) + NV50_CB_AUX_MEMBAR_OFFSET);821822nv50_upload_ms_info(push);823824/* max TIC (bits 4:8) & TSC bindings, per program type */825for (i = 0; i < NV50_MAX_3D_SHADER_STAGES; ++i) {826BEGIN_NV04(push, NV50_3D(TEX_LIMITS(i)), 1);827PUSH_DATA (push, 0x54);828}829830BEGIN_NV04(push, NV50_3D(TIC_ADDRESS_HIGH), 3);831PUSH_DATAh(push, screen->txc->offset);832PUSH_DATA (push, screen->txc->offset);833PUSH_DATA (push, NV50_TIC_MAX_ENTRIES - 1);834835BEGIN_NV04(push, NV50_3D(TSC_ADDRESS_HIGH), 3);836PUSH_DATAh(push, screen->txc->offset + 65536);837PUSH_DATA (push, screen->txc->offset + 65536);838PUSH_DATA (push, NV50_TSC_MAX_ENTRIES - 1);839840BEGIN_NV04(push, NV50_3D(LINKED_TSC), 1);841PUSH_DATA (push, 0);842843BEGIN_NV04(push, NV50_3D(CLIP_RECTS_EN), 1);844PUSH_DATA (push, 0);845BEGIN_NV04(push, NV50_3D(CLIP_RECTS_MODE), 1);846PUSH_DATA (push, NV50_3D_CLIP_RECTS_MODE_INSIDE_ANY);847BEGIN_NV04(push, NV50_3D(CLIP_RECT_HORIZ(0)), 8 * 2);848for (i = 0; i < 8 * 2; ++i)849PUSH_DATA(push, 0);850BEGIN_NV04(push, NV50_3D(CLIPID_ENABLE), 1);851PUSH_DATA (push, 0);852853BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);854PUSH_DATA (push, 1);855for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {856BEGIN_NV04(push, NV50_3D(DEPTH_RANGE_NEAR(i)), 2);857PUSH_DATAf(push, 0.0f);858PUSH_DATAf(push, 1.0f);859BEGIN_NV04(push, NV50_3D(VIEWPORT_HORIZ(i)), 2);860PUSH_DATA (push, 8192 << 16);861PUSH_DATA (push, 8192 << 16);862}863864BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);865#ifdef NV50_SCISSORS_CLIPPING866PUSH_DATA (push, 0x0000);867#else868PUSH_DATA (push, 0x1080);869#endif870871BEGIN_NV04(push, NV50_3D(CLEAR_FLAGS), 1);872PUSH_DATA (push, NV50_3D_CLEAR_FLAGS_CLEAR_RECT_VIEWPORT);873874/* We use scissors instead of exact view volume clipping,875* so they're always enabled.876*/877for (i = 0; i < NV50_MAX_VIEWPORTS; i++) {878BEGIN_NV04(push, NV50_3D(SCISSOR_ENABLE(i)), 3);879PUSH_DATA (push, 1);880PUSH_DATA (push, 8192 << 16);881PUSH_DATA (push, 8192 << 16);882}883884BEGIN_NV04(push, NV50_3D(RASTERIZE_ENABLE), 1);885PUSH_DATA (push, 1);886BEGIN_NV04(push, NV50_3D(POINT_RASTER_RULES), 1);887PUSH_DATA (push, NV50_3D_POINT_RASTER_RULES_OGL);888BEGIN_NV04(push, NV50_3D(FRAG_COLOR_CLAMP_EN), 1);889PUSH_DATA (push, 0x11111111);890BEGIN_NV04(push, NV50_3D(EDGEFLAG), 1);891PUSH_DATA (push, 1);892893BEGIN_NV04(push, NV50_3D(VB_ELEMENT_BASE), 1);894PUSH_DATA (push, 0);895if (screen->base.class_3d >= NV84_3D_CLASS) {896BEGIN_NV04(push, NV84_3D(VERTEX_ID_BASE), 1);897PUSH_DATA (push, 0);898}899900BEGIN_NV04(push, NV50_3D(UNK0FDC), 1);901PUSH_DATA (push, 1);902BEGIN_NV04(push, NV50_3D(UNK19C0), 1);903PUSH_DATA (push, 1);904905PUSH_KICK (push);906}907908static int nv50_tls_alloc(struct nv50_screen *screen, unsigned tls_space,909uint64_t *tls_size)910{911struct nouveau_device *dev = screen->base.device;912int ret;913914screen->cur_tls_space = util_next_power_of_two(tls_space / ONE_TEMP_SIZE) *915ONE_TEMP_SIZE;916if (nouveau_mesa_debug)917debug_printf("allocating space for %u temps\n",918util_next_power_of_two(tls_space / ONE_TEMP_SIZE));919*tls_size = screen->cur_tls_space * util_next_power_of_two(screen->TPs) *920screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP;921922ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,923*tls_size, NULL, &screen->tls_bo);924if (ret) {925NOUVEAU_ERR("Failed to allocate local bo: %d\n", ret);926return ret;927}928929return 0;930}931932int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)933{934struct nouveau_pushbuf *push = screen->base.pushbuf;935int ret;936uint64_t tls_size;937938if (tls_space < screen->cur_tls_space)939return 0;940if (tls_space > screen->max_tls_space) {941/* fixable by limiting number of warps (LOCAL_WARPS_LOG_ALLOC /942* LOCAL_WARPS_NO_CLAMP) */943NOUVEAU_ERR("Unsupported number of temporaries (%u > %u). Fixable if someone cares.\n",944(unsigned)(tls_space / ONE_TEMP_SIZE),945(unsigned)(screen->max_tls_space / ONE_TEMP_SIZE));946return -ENOMEM;947}948949nouveau_bo_ref(NULL, &screen->tls_bo);950ret = nv50_tls_alloc(screen, tls_space, &tls_size);951if (ret)952return ret;953954BEGIN_NV04(push, NV50_3D(LOCAL_ADDRESS_HIGH), 3);955PUSH_DATAh(push, screen->tls_bo->offset);956PUSH_DATA (push, screen->tls_bo->offset);957PUSH_DATA (push, util_logbase2(screen->cur_tls_space / 8));958959return 1;960}961962static const nir_shader_compiler_options nir_options = {963.fuse_ffma16 = false, /* nir doesn't track mad vs fma */964.fuse_ffma32 = false, /* nir doesn't track mad vs fma */965.fuse_ffma64 = false, /* nir doesn't track mad vs fma */966.lower_flrp32 = true,967.lower_flrp64 = true,968.lower_fpow = false,969.lower_uadd_carry = true,970.lower_usub_borrow = true,971.lower_ffract = true,972.lower_pack_half_2x16 = true,973.lower_pack_unorm_2x16 = true,974.lower_pack_snorm_2x16 = true,975.lower_pack_unorm_4x8 = true,976.lower_pack_snorm_4x8 = true,977.lower_unpack_half_2x16 = true,978.lower_unpack_unorm_2x16 = true,979.lower_unpack_snorm_2x16 = true,980.lower_unpack_unorm_4x8 = true,981.lower_unpack_snorm_4x8 = true,982.lower_extract_byte = true,983.lower_extract_word = true,984.lower_insert_byte = true,985.lower_insert_word = true,986.lower_all_io_to_temps = false,987.lower_cs_local_index_from_id = true,988.lower_rotate = true,989.lower_to_scalar = true,990.use_interpolated_input_intrinsics = true,991.max_unroll_iterations = 32,992};993994static const void *995nv50_screen_get_compiler_options(struct pipe_screen *pscreen,996enum pipe_shader_ir ir,997enum pipe_shader_type shader)998{999if (ir == PIPE_SHADER_IR_NIR)1000return &nir_options;1001return NULL;1002}10031004struct nouveau_screen *1005nv50_screen_create(struct nouveau_device *dev)1006{1007struct nv50_screen *screen;1008struct pipe_screen *pscreen;1009struct nouveau_object *chan;1010uint64_t value;1011uint32_t tesla_class;1012unsigned stack_size;1013int ret;10141015screen = CALLOC_STRUCT(nv50_screen);1016if (!screen)1017return NULL;1018pscreen = &screen->base.base;1019pscreen->destroy = nv50_screen_destroy;10201021ret = nouveau_screen_init(&screen->base, dev);1022if (ret) {1023NOUVEAU_ERR("nouveau_screen_init failed: %d\n", ret);1024goto fail;1025}10261027/* TODO: Prevent FIFO prefetch before transfer of index buffers and1028* admit them to VRAM.1029*/1030screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |1031PIPE_BIND_VERTEX_BUFFER;1032screen->base.sysmem_bindings |=1033PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;10341035screen->base.pushbuf->user_priv = screen;1036screen->base.pushbuf->rsvd_kick = 5;10371038chan = screen->base.channel;10391040pscreen->context_create = nv50_create;1041pscreen->is_format_supported = nv50_screen_is_format_supported;1042pscreen->get_param = nv50_screen_get_param;1043pscreen->get_shader_param = nv50_screen_get_shader_param;1044pscreen->get_paramf = nv50_screen_get_paramf;1045pscreen->get_compute_param = nv50_screen_get_compute_param;1046pscreen->get_driver_query_info = nv50_screen_get_driver_query_info;1047pscreen->get_driver_query_group_info = nv50_screen_get_driver_query_group_info;10481049/* nir stuff */1050pscreen->get_compiler_options = nv50_screen_get_compiler_options;10511052nv50_screen_init_resource_functions(pscreen);10531054if (screen->base.device->chipset < 0x84 ||1055debug_get_bool_option("NOUVEAU_PMPEG", false)) {1056/* PMPEG */1057nouveau_screen_init_vdec(&screen->base);1058} else if (screen->base.device->chipset < 0x98 ||1059screen->base.device->chipset == 0xa0) {1060/* VP2 */1061screen->base.base.get_video_param = nv84_screen_get_video_param;1062screen->base.base.is_video_format_supported = nv84_screen_video_supported;1063} else {1064/* VP3/4 */1065screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;1066screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;1067}10681069ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096,1070NULL, &screen->fence.bo);1071if (ret) {1072NOUVEAU_ERR("Failed to allocate fence bo: %d\n", ret);1073goto fail;1074}10751076nouveau_bo_map(screen->fence.bo, 0, NULL);1077screen->fence.map = screen->fence.bo->map;1078screen->base.fence.emit = nv50_screen_fence_emit;1079screen->base.fence.update = nv50_screen_fence_update;10801081ret = nouveau_object_new(chan, 0xbeef0301, NOUVEAU_NOTIFIER_CLASS,1082&(struct nv04_notify){ .length = 32 },1083sizeof(struct nv04_notify), &screen->sync);1084if (ret) {1085NOUVEAU_ERR("Failed to allocate notifier: %d\n", ret);1086goto fail;1087}10881089ret = nouveau_object_new(chan, 0xbeef5039, NV50_M2MF_CLASS,1090NULL, 0, &screen->m2mf);1091if (ret) {1092NOUVEAU_ERR("Failed to allocate PGRAPH context for M2MF: %d\n", ret);1093goto fail;1094}10951096ret = nouveau_object_new(chan, 0xbeef502d, NV50_2D_CLASS,1097NULL, 0, &screen->eng2d);1098if (ret) {1099NOUVEAU_ERR("Failed to allocate PGRAPH context for 2D: %d\n", ret);1100goto fail;1101}11021103switch (dev->chipset & 0xf0) {1104case 0x50:1105tesla_class = NV50_3D_CLASS;1106break;1107case 0x80:1108case 0x90:1109tesla_class = NV84_3D_CLASS;1110break;1111case 0xa0:1112switch (dev->chipset) {1113case 0xa0:1114case 0xaa:1115case 0xac:1116tesla_class = NVA0_3D_CLASS;1117break;1118case 0xaf:1119tesla_class = NVAF_3D_CLASS;1120break;1121default:1122tesla_class = NVA3_3D_CLASS;1123break;1124}1125break;1126default:1127NOUVEAU_ERR("Not a known NV50 chipset: NV%02x\n", dev->chipset);1128goto fail;1129}1130screen->base.class_3d = tesla_class;11311132ret = nouveau_object_new(chan, 0xbeef5097, tesla_class,1133NULL, 0, &screen->tesla);1134if (ret) {1135NOUVEAU_ERR("Failed to allocate PGRAPH context for 3D: %d\n", ret);1136goto fail;1137}11381139/* This over-allocates by a page. The GP, which would execute at the end of1140* the last page, would trigger faults. The going theory is that it1141* prefetches up to a certain amount.1142*/1143ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16,1144(3 << NV50_CODE_BO_SIZE_LOG2) + 0x1000,1145NULL, &screen->code);1146if (ret) {1147NOUVEAU_ERR("Failed to allocate code bo: %d\n", ret);1148goto fail;1149}11501151nouveau_heap_init(&screen->vp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);1152nouveau_heap_init(&screen->gp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);1153nouveau_heap_init(&screen->fp_code_heap, 0, 1 << NV50_CODE_BO_SIZE_LOG2);11541155nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);11561157screen->TPs = util_bitcount(value & 0xffff);1158screen->MPsInTP = util_bitcount(value & 0x0f000000);11591160screen->mp_count = screen->TPs * screen->MPsInTP;11611162stack_size = util_next_power_of_two(screen->TPs) * screen->MPsInTP *1163STACK_WARPS_ALLOC * 64 * 8;11641165ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, stack_size, NULL,1166&screen->stack_bo);1167if (ret) {1168NOUVEAU_ERR("Failed to allocate stack bo: %d\n", ret);1169goto fail;1170}11711172uint64_t size_of_one_temp = util_next_power_of_two(screen->TPs) *1173screen->MPsInTP * LOCAL_WARPS_ALLOC * THREADS_IN_WARP *1174ONE_TEMP_SIZE;1175screen->max_tls_space = dev->vram_size / size_of_one_temp * ONE_TEMP_SIZE;1176screen->max_tls_space /= 2; /* half of vram */11771178/* hw can address max 64 KiB */1179screen->max_tls_space = MIN2(screen->max_tls_space, 64 << 10);11801181uint64_t tls_size;1182unsigned tls_space = 4/*temps*/ * ONE_TEMP_SIZE;1183ret = nv50_tls_alloc(screen, tls_space, &tls_size);1184if (ret)1185goto fail;11861187if (nouveau_mesa_debug)1188debug_printf("TPs = %u, MPsInTP = %u, VRAM = %"PRIu64" MiB, tls_size = %"PRIu64" KiB\n",1189screen->TPs, screen->MPsInTP, dev->vram_size >> 20, tls_size >> 10);11901191ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 5 << 16, NULL,1192&screen->uniforms);1193if (ret) {1194NOUVEAU_ERR("Failed to allocate uniforms bo: %d\n", ret);1195goto fail;1196}11971198ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 16, 3 << 16, NULL,1199&screen->txc);1200if (ret) {1201NOUVEAU_ERR("Failed to allocate TIC/TSC bo: %d\n", ret);1202goto fail;1203}12041205screen->tic.entries = CALLOC(4096, sizeof(void *));1206screen->tsc.entries = screen->tic.entries + 2048;12071208if (!nv50_blitter_create(screen))1209goto fail;12101211nv50_screen_init_hwctx(screen);12121213ret = nv50_screen_compute_setup(screen, screen->base.pushbuf);1214if (ret) {1215NOUVEAU_ERR("Failed to init compute context: %d\n", ret);1216goto fail;1217}12181219nouveau_fence_new(&screen->base, &screen->base.fence.current);12201221return &screen->base;12221223fail:1224screen->base.base.context_create = NULL;1225return &screen->base;1226}12271228int1229nv50_screen_tic_alloc(struct nv50_screen *screen, void *entry)1230{1231int i = screen->tic.next;12321233while (screen->tic.lock[i / 32] & (1 << (i % 32)))1234i = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);12351236screen->tic.next = (i + 1) & (NV50_TIC_MAX_ENTRIES - 1);12371238if (screen->tic.entries[i])1239nv50_tic_entry(screen->tic.entries[i])->id = -1;12401241screen->tic.entries[i] = entry;1242return i;1243}12441245int1246nv50_screen_tsc_alloc(struct nv50_screen *screen, void *entry)1247{1248int i = screen->tsc.next;12491250while (screen->tsc.lock[i / 32] & (1 << (i % 32)))1251i = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);12521253screen->tsc.next = (i + 1) & (NV50_TSC_MAX_ENTRIES - 1);12541255if (screen->tsc.entries[i])1256nv50_tsc_entry(screen->tsc.entries[i])->id = -1;12571258screen->tsc.entries[i] = entry;1259return i;1260}126112621263