Path: blob/21.2-virgl/src/gallium/drivers/vc4/kernel/vc4_validate.c
4574 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* Command list validator for VC4.25*26* The VC4 has no IOMMU between it and system memory. So, a user with27* access to execute command lists could escalate privilege by28* overwriting system memory (drawing to it as a framebuffer) or29* reading system memory it shouldn't (reading it as a texture, or30* uniform data, or vertex data).31*32* This validates command lists to ensure that all accesses are within33* the bounds of the GEM objects referenced. It explicitly whitelists34* packets, and looks at the offsets in any address fields to make35* sure they're constrained within the BOs they reference.36*37* Note that because of the validation that's happening anyway, this38* is where GEM relocation processing happens.39*/4041#include "vc4_drv.h"42#include "vc4_packet.h"4344#define VALIDATE_ARGS \45struct vc4_exec_info *exec, \46void *validated, \47void *untrusted4849/** Return the width in pixels of a 64-byte microtile. */50static uint32_t51utile_width(int cpp)52{53switch (cpp) {54case 1:55case 2:56return 8;57case 4:58return 4;59case 8:60return 2;61default:62DRM_ERROR("unknown cpp: %d\n", cpp);63return 1;64}65}6667/** Return the height in pixels of a 64-byte microtile. */68static uint32_t69utile_height(int cpp)70{71switch (cpp) {72case 1:73return 8;74case 2:75case 4:76case 8:77return 4;78default:79DRM_ERROR("unknown cpp: %d\n", cpp);80return 1;81}82}8384/**85* The texture unit decides what tiling format a particular miplevel is using86* this function, so we lay out our miptrees accordingly.87*/88static bool89size_is_lt(uint32_t width, uint32_t height, int cpp)90{91return (width <= 4 * utile_width(cpp) ||92height <= 4 * utile_height(cpp));93}9495struct drm_gem_cma_object *96vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)97{98struct drm_gem_cma_object *obj;99struct drm_vc4_bo *bo;100101if (hindex >= exec->bo_count) {102DRM_ERROR("BO index %d greater than BO count %d\n",103hindex, exec->bo_count);104return NULL;105}106obj = exec->bo[hindex];107bo = to_vc4_bo(&obj->base);108109if (bo->validated_shader) {110DRM_ERROR("Trying to use shader BO as something other than "111"a shader\n");112return NULL;113}114115return obj;116}117118static struct drm_gem_cma_object *119vc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index)120{121return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]);122}123124static bool125validate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos)126{127/* Note that the untrusted pointer passed to these functions is128* incremented past the packet byte.129*/130return (untrusted - 1 == exec->bin_u + pos);131}132133static uint32_t134gl_shader_rec_size(uint32_t pointer_bits)135{136uint32_t attribute_count = pointer_bits & 7;137bool extended = pointer_bits & 8;138139if (attribute_count == 0)140attribute_count = 8;141142if (extended)143return 100 + attribute_count * 4;144else145return 36 + attribute_count * 8;146}147148bool149vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,150uint32_t offset, uint8_t tiling_format,151uint32_t width, uint32_t height, uint8_t cpp)152{153uint32_t aligned_width, aligned_height, stride, size;154uint32_t utile_w = utile_width(cpp);155uint32_t utile_h = utile_height(cpp);156157/* The shaded vertex format stores signed 12.4 fixed point158* (-2048,2047) offsets from the viewport center, so we should159* never have a render target larger than 4096. The texture160* unit can only sample from 2048x2048, so it's even more161* restricted. This lets us avoid worrying about overflow in162* our math.163*/164if (width > 4096 || height > 4096) {165DRM_ERROR("Surface dimesions (%d,%d) too large", width, height);166return false;167}168169switch (tiling_format) {170case VC4_TILING_FORMAT_LINEAR:171aligned_width = round_up(width, utile_w);172aligned_height = height;173break;174case VC4_TILING_FORMAT_T:175aligned_width = round_up(width, utile_w * 8);176aligned_height = round_up(height, utile_h * 8);177break;178case VC4_TILING_FORMAT_LT:179aligned_width = round_up(width, utile_w);180aligned_height = round_up(height, utile_h);181break;182default:183DRM_ERROR("buffer tiling %d unsupported\n", tiling_format);184return false;185}186187stride = aligned_width * cpp;188size = stride * aligned_height;189190if (size + offset < size ||191size + offset > fbo->base.size) {192DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",193width, height,194aligned_width, aligned_height,195size, offset, fbo->base.size);196return false;197}198199return true;200}201202static int203validate_flush(VALIDATE_ARGS)204{205if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) {206DRM_ERROR("Bin CL must end with VC4_PACKET_FLUSH\n");207return -EINVAL;208}209exec->found_flush = true;210211return 0;212}213214static int215validate_start_tile_binning(VALIDATE_ARGS)216{217if (exec->found_start_tile_binning_packet) {218DRM_ERROR("Duplicate VC4_PACKET_START_TILE_BINNING\n");219return -EINVAL;220}221exec->found_start_tile_binning_packet = true;222223if (!exec->found_tile_binning_mode_config_packet) {224DRM_ERROR("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");225return -EINVAL;226}227228return 0;229}230231static int232validate_increment_semaphore(VALIDATE_ARGS)233{234if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) {235DRM_ERROR("Bin CL must end with "236"VC4_PACKET_INCREMENT_SEMAPHORE\n");237return -EINVAL;238}239exec->found_increment_semaphore_packet = true;240241return 0;242}243244static int245validate_indexed_prim_list(VALIDATE_ARGS)246{247struct drm_gem_cma_object *ib;248uint32_t length = *(uint32_t *)(untrusted + 1);249uint32_t offset = *(uint32_t *)(untrusted + 5);250uint32_t max_index = *(uint32_t *)(untrusted + 9);251uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1;252struct vc4_shader_state *shader_state;253254/* Check overflow condition */255if (exec->shader_state_count == 0) {256DRM_ERROR("shader state must precede primitives\n");257return -EINVAL;258}259shader_state = &exec->shader_state[exec->shader_state_count - 1];260261if (max_index > shader_state->max_index)262shader_state->max_index = max_index;263264ib = vc4_use_handle(exec, 0);265if (!ib)266return -EINVAL;267268if (offset > ib->base.size ||269(ib->base.size - offset) / index_size < length) {270DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",271offset, length, index_size, ib->base.size);272return -EINVAL;273}274275*(uint32_t *)(validated + 5) = ib->paddr + offset;276277return 0;278}279280static int281validate_gl_array_primitive(VALIDATE_ARGS)282{283uint32_t length = *(uint32_t *)(untrusted + 1);284uint32_t base_index = *(uint32_t *)(untrusted + 5);285uint32_t max_index;286struct vc4_shader_state *shader_state;287288/* Check overflow condition */289if (exec->shader_state_count == 0) {290DRM_ERROR("shader state must precede primitives\n");291return -EINVAL;292}293shader_state = &exec->shader_state[exec->shader_state_count - 1];294295if (length + base_index < length) {296DRM_ERROR("primitive vertex count overflow\n");297return -EINVAL;298}299max_index = length + base_index - 1;300301if (max_index > shader_state->max_index)302shader_state->max_index = max_index;303304return 0;305}306307static int308validate_gl_shader_state(VALIDATE_ARGS)309{310uint32_t i = exec->shader_state_count++;311312if (i >= exec->shader_state_size) {313DRM_ERROR("More requests for shader states than declared\n");314return -EINVAL;315}316317exec->shader_state[i].addr = *(uint32_t *)untrusted;318exec->shader_state[i].max_index = 0;319320if (exec->shader_state[i].addr & ~0xf) {321DRM_ERROR("high bits set in GL shader rec reference\n");322return -EINVAL;323}324325*(uint32_t *)validated = (exec->shader_rec_p +326exec->shader_state[i].addr);327328exec->shader_rec_p +=329roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16);330331return 0;332}333334static int335validate_tile_binning_config(VALIDATE_ARGS)336{337struct drm_device *dev = exec->exec_bo->base.dev;338uint8_t flags;339uint32_t tile_state_size, tile_alloc_size;340uint32_t tile_count;341342if (exec->found_tile_binning_mode_config_packet) {343DRM_ERROR("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n");344return -EINVAL;345}346exec->found_tile_binning_mode_config_packet = true;347348exec->bin_tiles_x = *(uint8_t *)(untrusted + 12);349exec->bin_tiles_y = *(uint8_t *)(untrusted + 13);350tile_count = exec->bin_tiles_x * exec->bin_tiles_y;351flags = *(uint8_t *)(untrusted + 14);352353if (exec->bin_tiles_x == 0 ||354exec->bin_tiles_y == 0) {355DRM_ERROR("Tile binning config of %dx%d too small\n",356exec->bin_tiles_x, exec->bin_tiles_y);357return -EINVAL;358}359360if (flags & (VC4_BIN_CONFIG_DB_NON_MS |361VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {362DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);363return -EINVAL;364}365366/* The tile state data array is 48 bytes per tile, and we put it at367* the start of a BO containing both it and the tile alloc.368*/369tile_state_size = 48 * tile_count;370371/* Since the tile alloc array will follow us, align. */372exec->tile_alloc_offset = roundup(tile_state_size, 4096);373374*(uint8_t *)(validated + 14) =375((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK |376VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) |377VC4_BIN_CONFIG_AUTO_INIT_TSDA |378VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32,379VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) |380VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128,381VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE));382383/* Initial block size. */384tile_alloc_size = 32 * tile_count;385386/*387* The initial allocation gets rounded to the next 256 bytes before388* the hardware starts fulfilling further allocations.389*/390tile_alloc_size = roundup(tile_alloc_size, 256);391392/* Add space for the extra allocations. This is what gets used first,393* before overflow memory. It must have at least 4096 bytes, but we394* want to avoid overflow memory usage if possible.395*/396tile_alloc_size += 1024 * 1024;397398exec->tile_bo = drm_gem_cma_create(dev, exec->tile_alloc_offset +399tile_alloc_size);400if (!exec->tile_bo)401return -ENOMEM;402list_addtail(&to_vc4_bo(&exec->tile_bo->base)->unref_head,403&exec->unref_list);404405/* tile alloc address. */406*(uint32_t *)(validated + 0) = (exec->tile_bo->paddr +407exec->tile_alloc_offset);408/* tile alloc size. */409*(uint32_t *)(validated + 4) = tile_alloc_size;410/* tile state address. */411*(uint32_t *)(validated + 8) = exec->tile_bo->paddr;412413return 0;414}415416static int417validate_gem_handles(VALIDATE_ARGS)418{419memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index));420return 0;421}422423#define VC4_DEFINE_PACKET(packet, func) \424[packet] = { packet ## _SIZE, #packet, func }425426static const struct cmd_info {427uint16_t len;428const char *name;429int (*func)(struct vc4_exec_info *exec, void *validated,430void *untrusted);431} cmd_info[] = {432VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),433VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),434VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),435VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),436VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,437validate_start_tile_binning),438VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,439validate_increment_semaphore),440441VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,442validate_indexed_prim_list),443VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,444validate_gl_array_primitive),445446VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),447448VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),449450VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),451VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),452VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),453VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),454VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),455VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),456VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),457VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),458VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),459/* Note: The docs say this was also 105, but it was 106 in the460* initial userland code drop.461*/462VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),463464VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,465validate_tile_binning_config),466467VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),468};469470int471vc4_validate_bin_cl(struct drm_device *dev,472void *validated,473void *unvalidated,474struct vc4_exec_info *exec)475{476uint32_t len = exec->args->bin_cl_size;477uint32_t dst_offset = 0;478uint32_t src_offset = 0;479480while (src_offset < len) {481void *dst_pkt = validated + dst_offset;482void *src_pkt = unvalidated + src_offset;483u8 cmd = *(uint8_t *)src_pkt;484const struct cmd_info *info;485486if (cmd >= ARRAY_SIZE(cmd_info)) {487DRM_ERROR("0x%08x: packet %d out of bounds\n",488src_offset, cmd);489return -EINVAL;490}491492info = &cmd_info[cmd];493if (!info->name) {494DRM_ERROR("0x%08x: packet %d invalid\n",495src_offset, cmd);496return -EINVAL;497}498499if (src_offset + info->len > len) {500DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "501"exceeds bounds (0x%08x)\n",502src_offset, cmd, info->name, info->len,503src_offset + len);504return -EINVAL;505}506507if (cmd != VC4_PACKET_GEM_HANDLES)508memcpy(dst_pkt, src_pkt, info->len);509510if (info->func && info->func(exec,511dst_pkt + 1,512src_pkt + 1)) {513DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",514src_offset, cmd, info->name);515return -EINVAL;516}517518src_offset += info->len;519/* GEM handle loading doesn't produce HW packets. */520if (cmd != VC4_PACKET_GEM_HANDLES)521dst_offset += info->len;522523/* When the CL hits halt, it'll stop reading anything else. */524if (cmd == VC4_PACKET_HALT)525break;526}527528exec->ct0ea = exec->ct0ca + dst_offset;529530if (!exec->found_start_tile_binning_packet) {531DRM_ERROR("Bin CL missing VC4_PACKET_START_TILE_BINNING\n");532return -EINVAL;533}534535/* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The536* semaphore is used to trigger the render CL to start up, and the537* FLUSH is what caps the bin lists with538* VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main539* render CL when they get called to) and actually triggers the queued540* semaphore increment.541*/542if (!exec->found_increment_semaphore_packet || !exec->found_flush) {543DRM_ERROR("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + "544"VC4_PACKET_FLUSH\n");545return -EINVAL;546}547548return 0;549}550551static bool552reloc_tex(struct vc4_exec_info *exec,553void *uniform_data_u,554struct vc4_texture_sample_info *sample,555uint32_t texture_handle_index)556557{558struct drm_gem_cma_object *tex;559uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]);560uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]);561uint32_t p2 = (sample->p_offset[2] != ~0 ?562*(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0);563uint32_t p3 = (sample->p_offset[3] != ~0 ?564*(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0);565uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0];566uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK;567uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS);568uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH);569uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT);570uint32_t cpp, tiling_format, utile_w, utile_h;571uint32_t i;572uint32_t cube_map_stride = 0;573enum vc4_texture_data_type type;574575tex = vc4_use_bo(exec, texture_handle_index);576if (!tex)577return false;578579if (sample->is_direct) {580uint32_t remaining_size = tex->base.size - p0;581582if (p0 > tex->base.size - 4) {583DRM_ERROR("UBO offset greater than UBO size\n");584goto fail;585}586if (p1 > remaining_size - 4) {587DRM_ERROR("UBO clamp would allow reads "588"outside of UBO\n");589goto fail;590}591*validated_p0 = tex->paddr + p0;592return true;593}594595if (width == 0)596width = 2048;597if (height == 0)598height = 2048;599600if (p0 & VC4_TEX_P0_CMMODE_MASK) {601if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) ==602VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE)603cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK;604if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) ==605VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) {606if (cube_map_stride) {607DRM_ERROR("Cube map stride set twice\n");608goto fail;609}610611cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK;612}613if (!cube_map_stride) {614DRM_ERROR("Cube map stride not set\n");615goto fail;616}617}618619type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) |620(VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4));621622switch (type) {623case VC4_TEXTURE_TYPE_RGBA8888:624case VC4_TEXTURE_TYPE_RGBX8888:625case VC4_TEXTURE_TYPE_RGBA32R:626cpp = 4;627break;628case VC4_TEXTURE_TYPE_RGBA4444:629case VC4_TEXTURE_TYPE_RGBA5551:630case VC4_TEXTURE_TYPE_RGB565:631case VC4_TEXTURE_TYPE_LUMALPHA:632case VC4_TEXTURE_TYPE_S16F:633case VC4_TEXTURE_TYPE_S16:634cpp = 2;635break;636case VC4_TEXTURE_TYPE_LUMINANCE:637case VC4_TEXTURE_TYPE_ALPHA:638case VC4_TEXTURE_TYPE_S8:639cpp = 1;640break;641case VC4_TEXTURE_TYPE_ETC1:642/* ETC1 is arranged as 64-bit blocks, where each block is 4x4643* pixels.644*/645cpp = 8;646width = (width + 3) >> 2;647height = (height + 3) >> 2;648break;649case VC4_TEXTURE_TYPE_BW1:650case VC4_TEXTURE_TYPE_A4:651case VC4_TEXTURE_TYPE_A1:652case VC4_TEXTURE_TYPE_RGBA64:653case VC4_TEXTURE_TYPE_YUV422R:654default:655DRM_ERROR("Texture format %d unsupported\n", type);656goto fail;657}658utile_w = utile_width(cpp);659utile_h = utile_height(cpp);660661if (type == VC4_TEXTURE_TYPE_RGBA32R) {662tiling_format = VC4_TILING_FORMAT_LINEAR;663} else {664if (size_is_lt(width, height, cpp))665tiling_format = VC4_TILING_FORMAT_LT;666else667tiling_format = VC4_TILING_FORMAT_T;668}669670if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5,671tiling_format, width, height, cpp)) {672goto fail;673}674675/* The mipmap levels are stored before the base of the texture. Make676* sure there is actually space in the BO.677*/678for (i = 1; i <= miplevels; i++) {679uint32_t level_width = max(width >> i, 1u);680uint32_t level_height = max(height >> i, 1u);681uint32_t aligned_width, aligned_height;682uint32_t level_size;683684/* Once the levels get small enough, they drop from T to LT. */685if (tiling_format == VC4_TILING_FORMAT_T &&686size_is_lt(level_width, level_height, cpp)) {687tiling_format = VC4_TILING_FORMAT_LT;688}689690switch (tiling_format) {691case VC4_TILING_FORMAT_T:692aligned_width = round_up(level_width, utile_w * 8);693aligned_height = round_up(level_height, utile_h * 8);694break;695case VC4_TILING_FORMAT_LT:696aligned_width = round_up(level_width, utile_w);697aligned_height = round_up(level_height, utile_h);698break;699default:700aligned_width = round_up(level_width, utile_w);701aligned_height = level_height;702break;703}704705level_size = aligned_width * cpp * aligned_height;706707if (offset < level_size) {708DRM_ERROR("Level %d (%dx%d -> %dx%d) size %db "709"overflowed buffer bounds (offset %d)\n",710i, level_width, level_height,711aligned_width, aligned_height,712level_size, offset);713goto fail;714}715716offset -= level_size;717}718719*validated_p0 = tex->paddr + p0;720721return true;722fail:723DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0);724DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1);725DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2);726DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3);727return false;728}729730static int731validate_gl_shader_rec(struct drm_device *dev,732struct vc4_exec_info *exec,733struct vc4_shader_state *state)734{735uint32_t *src_handles;736void *pkt_u, *pkt_v;737static const uint32_t shader_reloc_offsets[] = {7384, /* fs */73916, /* vs */74028, /* cs */741};742uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets);743struct drm_gem_cma_object *bo[shader_reloc_count + 8];744uint32_t nr_attributes, nr_relocs, packet_size;745int i;746747nr_attributes = state->addr & 0x7;748if (nr_attributes == 0)749nr_attributes = 8;750packet_size = gl_shader_rec_size(state->addr);751752nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes;753if (nr_relocs * 4 > exec->shader_rec_size) {754DRM_ERROR("overflowed shader recs reading %d handles "755"from %d bytes left\n",756nr_relocs, exec->shader_rec_size);757return -EINVAL;758}759src_handles = exec->shader_rec_u;760exec->shader_rec_u += nr_relocs * 4;761exec->shader_rec_size -= nr_relocs * 4;762763if (packet_size > exec->shader_rec_size) {764DRM_ERROR("overflowed shader recs copying %db packet "765"from %d bytes left\n",766packet_size, exec->shader_rec_size);767return -EINVAL;768}769pkt_u = exec->shader_rec_u;770pkt_v = exec->shader_rec_v;771memcpy(pkt_v, pkt_u, packet_size);772exec->shader_rec_u += packet_size;773/* Shader recs have to be aligned to 16 bytes (due to the attribute774* flags being in the low bytes), so round the next validated shader775* rec address up. This should be safe, since we've got so many776* relocations in a shader rec packet.777*/778BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4);779exec->shader_rec_v += roundup(packet_size, 16);780exec->shader_rec_size -= packet_size;781782for (i = 0; i < shader_reloc_count; i++) {783if (src_handles[i] > exec->bo_count) {784DRM_ERROR("Shader handle %d too big\n", src_handles[i]);785return -EINVAL;786}787788bo[i] = exec->bo[src_handles[i]];789if (!bo[i])790return -EINVAL;791}792for (i = shader_reloc_count; i < nr_relocs; i++) {793bo[i] = vc4_use_bo(exec, src_handles[i]);794if (!bo[i])795return -EINVAL;796}797798if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=799to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {800DRM_ERROR("Thread mode of CL and FS do not match\n");801return -EINVAL;802}803804if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||805to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {806DRM_ERROR("cs and vs cannot be threaded\n");807return -EINVAL;808}809810for (i = 0; i < shader_reloc_count; i++) {811struct vc4_validated_shader_info *validated_shader;812uint32_t o = shader_reloc_offsets[i];813uint32_t src_offset = *(uint32_t *)(pkt_u + o);814uint32_t *texture_handles_u;815void *uniform_data_u;816uint32_t tex, uni;817818*(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset;819820if (src_offset != 0) {821DRM_ERROR("Shaders must be at offset 0 of "822"the BO.\n");823return -EINVAL;824}825826validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader;827if (!validated_shader)828return -EINVAL;829830if (validated_shader->uniforms_src_size >831exec->uniforms_size) {832DRM_ERROR("Uniforms src buffer overflow\n");833return -EINVAL;834}835836texture_handles_u = exec->uniforms_u;837uniform_data_u = (texture_handles_u +838validated_shader->num_texture_samples);839840memcpy(exec->uniforms_v, uniform_data_u,841validated_shader->uniforms_size);842843for (tex = 0;844tex < validated_shader->num_texture_samples;845tex++) {846if (!reloc_tex(exec,847uniform_data_u,848&validated_shader->texture_samples[tex],849texture_handles_u[tex])) {850return -EINVAL;851}852}853854/* Fill in the uniform slots that need this shader's855* start-of-uniforms address (used for resetting the uniform856* stream in the presence of control flow).857*/858for (uni = 0;859uni < validated_shader->num_uniform_addr_offsets;860uni++) {861uint32_t o = validated_shader->uniform_addr_offsets[uni];862((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p;863}864865*(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p;866867exec->uniforms_u += validated_shader->uniforms_src_size;868exec->uniforms_v += validated_shader->uniforms_size;869exec->uniforms_p += validated_shader->uniforms_size;870}871872for (i = 0; i < nr_attributes; i++) {873struct drm_gem_cma_object *vbo =874bo[ARRAY_SIZE(shader_reloc_offsets) + i];875uint32_t o = 36 + i * 8;876uint32_t offset = *(uint32_t *)(pkt_u + o + 0);877uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1;878uint32_t stride = *(uint8_t *)(pkt_u + o + 5);879uint32_t max_index;880881if (state->addr & 0x8)882stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff;883884if (vbo->base.size < offset ||885vbo->base.size - offset < attr_size) {886DRM_ERROR("BO offset overflow (%d + %d > %zd)\n",887offset, attr_size, vbo->base.size);888return -EINVAL;889}890891if (stride != 0) {892max_index = ((vbo->base.size - offset - attr_size) /893stride);894if (state->max_index > max_index) {895DRM_ERROR("primitives use index %d out of "896"supplied %d\n",897state->max_index, max_index);898return -EINVAL;899}900}901902*(uint32_t *)(pkt_v + o) = vbo->paddr + offset;903}904905return 0;906}907908int909vc4_validate_shader_recs(struct drm_device *dev,910struct vc4_exec_info *exec)911{912uint32_t i;913int ret = 0;914915for (i = 0; i < exec->shader_state_count; i++) {916ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);917if (ret)918return ret;919}920921return ret;922}923924925