Path: blob/21.2-virgl/src/gallium/drivers/vc4/kernel/vc4_render_cl.c
4574 views
/*1* Copyright © 2014-2015 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* DOC: Render command list generation25*26* In the VC4 driver, render command list generation is performed by the27* kernel instead of userspace. We do this because validating a28* user-submitted command list is hard to get right and has high CPU overhead,29* while the number of valid configurations for render command lists is30* actually fairly low.31*/3233#include "vc4_drv.h"34#include "vc4_packet.h"3536struct vc4_rcl_setup {37struct drm_gem_cma_object *color_read;38struct drm_gem_cma_object *color_write;39struct drm_gem_cma_object *zs_read;40struct drm_gem_cma_object *zs_write;41struct drm_gem_cma_object *msaa_color_write;42struct drm_gem_cma_object *msaa_zs_write;4344struct drm_gem_cma_object *rcl;45u32 next_offset;46};4748static inline void rcl_u8(struct vc4_rcl_setup *setup, u8 val)49{50*(u8 *)(setup->rcl->vaddr + setup->next_offset) = val;51setup->next_offset += 1;52}5354static inline void rcl_u16(struct vc4_rcl_setup *setup, u16 val)55{56*(u16 *)(setup->rcl->vaddr + setup->next_offset) = val;57setup->next_offset += 2;58}5960static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)61{62*(u32 *)(setup->rcl->vaddr + setup->next_offset) = val;63setup->next_offset += 4;64}6566/*67* Emits a no-op STORE_TILE_BUFFER_GENERAL.68*69* If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of70* some sort before another load is triggered.71*/72static void vc4_store_before_load(struct vc4_rcl_setup *setup)73{74rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);75rcl_u16(setup,76VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_NONE,77VC4_LOADSTORE_TILE_BUFFER_BUFFER) |78VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |79VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |80VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR);81rcl_u32(setup, 0); /* no address, since we're in None mode */82}8384/*85* Calculates the physical address of the start of a tile in a RCL surface.86*87* Unlike the other load/store packets,88* VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile89* coordinates packet, and instead just store to the address given.90*/91static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,92struct drm_gem_cma_object *bo,93struct drm_vc4_submit_rcl_surface *surf,94uint8_t x, uint8_t y)95{96return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *97(DIV_ROUND_UP(exec->args->width, exec->tile_width) * y + x);98}99100/*101* Emits a PACKET_TILE_COORDINATES if one isn't already pending.102*103* The tile coordinates packet triggers a pending load if there is one, are104* used for clipping during rendering, and determine where loads/stores happen105* relative to their base address.106*/107static void vc4_tile_coordinates(struct vc4_rcl_setup *setup,108uint32_t x, uint32_t y)109{110rcl_u8(setup, VC4_PACKET_TILE_COORDINATES);111rcl_u8(setup, x);112rcl_u8(setup, y);113}114115static void emit_tile(struct vc4_exec_info *exec,116struct vc4_rcl_setup *setup,117uint8_t x, uint8_t y, bool first, bool last)118{119struct drm_vc4_submit_cl *args = exec->args;120bool has_bin = args->bin_cl_size != 0;121122/* Note that the load doesn't actually occur until the123* tile coords packet is processed, and only one load124* may be outstanding at a time.125*/126if (setup->color_read) {127if (args->color_read.flags &128VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {129rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);130rcl_u32(setup,131vc4_full_res_offset(exec, setup->color_read,132&args->color_read, x, y) |133VC4_LOADSTORE_FULL_RES_DISABLE_ZS);134} else {135rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);136rcl_u16(setup, args->color_read.bits);137rcl_u32(setup, setup->color_read->paddr +138args->color_read.offset);139}140}141142if (setup->zs_read) {143if (setup->color_read) {144/* Exec previous load. */145vc4_tile_coordinates(setup, x, y);146vc4_store_before_load(setup);147}148149if (args->zs_read.flags &150VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {151rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);152rcl_u32(setup,153vc4_full_res_offset(exec, setup->zs_read,154&args->zs_read, x, y) |155VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);156} else {157rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);158rcl_u16(setup, args->zs_read.bits);159rcl_u32(setup, setup->zs_read->paddr +160args->zs_read.offset);161}162}163164/* Clipping depends on tile coordinates having been165* emitted, so we always need one here.166*/167vc4_tile_coordinates(setup, x, y);168169/* Wait for the binner before jumping to the first170* tile's lists.171*/172if (first && has_bin)173rcl_u8(setup, VC4_PACKET_WAIT_ON_SEMAPHORE);174175if (has_bin) {176rcl_u8(setup, VC4_PACKET_BRANCH_TO_SUB_LIST);177rcl_u32(setup, (exec->tile_bo->paddr +178exec->tile_alloc_offset +179(y * exec->bin_tiles_x + x) * 32));180}181182if (setup->msaa_color_write) {183bool last_tile_write = (!setup->msaa_zs_write &&184!setup->zs_write &&185!setup->color_write);186uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;187188if (!last_tile_write)189bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;190else if (last)191bits |= VC4_LOADSTORE_FULL_RES_EOF;192rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);193rcl_u32(setup,194vc4_full_res_offset(exec, setup->msaa_color_write,195&args->msaa_color_write, x, y) |196bits);197}198199if (setup->msaa_zs_write) {200bool last_tile_write = (!setup->zs_write &&201!setup->color_write);202uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;203204if (setup->msaa_color_write)205vc4_tile_coordinates(setup, x, y);206if (!last_tile_write)207bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;208else if (last)209bits |= VC4_LOADSTORE_FULL_RES_EOF;210rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);211rcl_u32(setup,212vc4_full_res_offset(exec, setup->msaa_zs_write,213&args->msaa_zs_write, x, y) |214bits);215}216217if (setup->zs_write) {218bool last_tile_write = !setup->color_write;219220if (setup->msaa_color_write || setup->msaa_zs_write)221vc4_tile_coordinates(setup, x, y);222223rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);224rcl_u16(setup, args->zs_write.bits |225(last_tile_write ?2260 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));227rcl_u32(setup,228(setup->zs_write->paddr + args->zs_write.offset) |229((last && last_tile_write) ?230VC4_LOADSTORE_TILE_BUFFER_EOF : 0));231}232233if (setup->color_write) {234if (setup->msaa_color_write || setup->msaa_zs_write ||235setup->zs_write) {236vc4_tile_coordinates(setup, x, y);237}238239if (last)240rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);241else242rcl_u8(setup, VC4_PACKET_STORE_MS_TILE_BUFFER);243}244}245246static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,247struct vc4_rcl_setup *setup)248{249struct drm_vc4_submit_cl *args = exec->args;250bool has_bin = args->bin_cl_size != 0;251uint8_t min_x_tile = args->min_x_tile;252uint8_t min_y_tile = args->min_y_tile;253uint8_t max_x_tile = args->max_x_tile;254uint8_t max_y_tile = args->max_y_tile;255uint8_t xtiles = max_x_tile - min_x_tile + 1;256uint8_t ytiles = max_y_tile - min_y_tile + 1;257uint8_t xi, yi;258uint32_t size, loop_body_size;259bool positive_x = true;260bool positive_y = true;261262if (args->flags & VC4_SUBMIT_CL_FIXED_RCL_ORDER) {263if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_X))264positive_x = false;265if (!(args->flags & VC4_SUBMIT_CL_RCL_ORDER_INCREASING_Y))266positive_y = false;267}268269size = VC4_PACKET_TILE_RENDERING_MODE_CONFIG_SIZE;270loop_body_size = VC4_PACKET_TILE_COORDINATES_SIZE;271272if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {273size += VC4_PACKET_CLEAR_COLORS_SIZE +274VC4_PACKET_TILE_COORDINATES_SIZE +275VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;276}277278if (setup->color_read) {279if (args->color_read.flags &280VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {281loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;282} else {283loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;284}285}286if (setup->zs_read) {287if (setup->color_read) {288loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;289loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;290}291292if (args->zs_read.flags &293VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {294loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;295} else {296loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;297}298}299300if (has_bin) {301size += VC4_PACKET_WAIT_ON_SEMAPHORE_SIZE;302loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;303}304305if (setup->msaa_color_write)306loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;307if (setup->msaa_zs_write)308loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;309310if (setup->zs_write)311loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;312if (setup->color_write)313loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;314315/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */316loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *317((setup->msaa_color_write != NULL) +318(setup->msaa_zs_write != NULL) +319(setup->color_write != NULL) +320(setup->zs_write != NULL) - 1);321322size += xtiles * ytiles * loop_body_size;323324setup->rcl = drm_gem_cma_create(dev, size);325if (!setup->rcl)326return -ENOMEM;327list_addtail(&to_vc4_bo(&setup->rcl->base)->unref_head,328&exec->unref_list);329330rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);331rcl_u32(setup,332(setup->color_write ? (setup->color_write->paddr +333args->color_write.offset) :3340));335rcl_u16(setup, args->width);336rcl_u16(setup, args->height);337rcl_u16(setup, args->color_write.bits);338339/* The tile buffer gets cleared when the previous tile is stored. If340* the clear values changed between frames, then the tile buffer has341* stale clear values in it, so we have to do a store in None mode (no342* writes) so that we trigger the tile buffer clear.343*/344if (args->flags & VC4_SUBMIT_CL_USE_CLEAR_COLOR) {345rcl_u8(setup, VC4_PACKET_CLEAR_COLORS);346rcl_u32(setup, args->clear_color[0]);347rcl_u32(setup, args->clear_color[1]);348rcl_u32(setup, args->clear_z);349rcl_u8(setup, args->clear_s);350351vc4_tile_coordinates(setup, 0, 0);352353rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);354rcl_u16(setup, VC4_LOADSTORE_TILE_BUFFER_NONE);355rcl_u32(setup, 0); /* no address, since we're in None mode */356}357358for (yi = 0; yi < ytiles; yi++) {359int y = positive_y ? min_y_tile + yi : max_y_tile - yi;360for (xi = 0; xi < xtiles; xi++) {361int x = positive_x ? min_x_tile + xi : max_x_tile - xi;362bool first = (xi == 0 && yi == 0);363bool last = (xi == xtiles - 1 && yi == ytiles - 1);364365emit_tile(exec, setup, x, y, first, last);366}367}368369BUG_ON(setup->next_offset != size);370exec->ct1ca = setup->rcl->paddr;371exec->ct1ea = setup->rcl->paddr + setup->next_offset;372373return 0;374}375376static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,377struct drm_gem_cma_object *obj,378struct drm_vc4_submit_rcl_surface *surf)379{380struct drm_vc4_submit_cl *args = exec->args;381u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width,382exec->tile_width);383384if (surf->offset > obj->base.size) {385DRM_ERROR("surface offset %d > BO size %zd\n",386surf->offset, obj->base.size);387return -EINVAL;388}389390if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <391render_tiles_stride * args->max_y_tile + args->max_x_tile) {392DRM_ERROR("MSAA tile %d, %d out of bounds "393"(bo size %zd, offset %d).\n",394args->max_x_tile, args->max_y_tile,395obj->base.size,396surf->offset);397return -EINVAL;398}399400return 0;401}402403static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,404struct drm_gem_cma_object **obj,405struct drm_vc4_submit_rcl_surface *surf)406{407if (surf->flags != 0 || surf->bits != 0) {408DRM_ERROR("MSAA surface had nonzero flags/bits\n");409return -EINVAL;410}411412if (surf->hindex == ~0)413return 0;414415*obj = vc4_use_bo(exec, surf->hindex);416if (!*obj)417return -EINVAL;418419if (surf->offset & 0xf) {420DRM_ERROR("MSAA write must be 16b aligned.\n");421return -EINVAL;422}423424return vc4_full_res_bounds_check(exec, *obj, surf);425}426427static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,428struct drm_gem_cma_object **obj,429struct drm_vc4_submit_rcl_surface *surf)430{431uint8_t tiling = VC4_GET_FIELD(surf->bits,432VC4_LOADSTORE_TILE_BUFFER_TILING);433uint8_t buffer = VC4_GET_FIELD(surf->bits,434VC4_LOADSTORE_TILE_BUFFER_BUFFER);435uint8_t format = VC4_GET_FIELD(surf->bits,436VC4_LOADSTORE_TILE_BUFFER_FORMAT);437int cpp;438int ret;439440if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {441DRM_ERROR("Extra flags set\n");442return -EINVAL;443}444445if (surf->hindex == ~0)446return 0;447448*obj = vc4_use_bo(exec, surf->hindex);449if (!*obj)450return -EINVAL;451452if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {453if (surf == &exec->args->zs_write) {454DRM_ERROR("general zs write may not be a full-res.\n");455return -EINVAL;456}457458if (surf->bits != 0) {459DRM_ERROR("load/store general bits set with "460"full res load/store.\n");461return -EINVAL;462}463464ret = vc4_full_res_bounds_check(exec, *obj, surf);465if (!ret)466return ret;467468return 0;469}470471if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |472VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |473VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {474DRM_ERROR("Unknown bits in load/store: 0x%04x\n",475surf->bits);476return -EINVAL;477}478479if (tiling > VC4_TILING_FORMAT_LT) {480DRM_ERROR("Bad tiling format\n");481return -EINVAL;482}483484if (buffer == VC4_LOADSTORE_TILE_BUFFER_ZS) {485if (format != 0) {486DRM_ERROR("No color format should be set for ZS\n");487return -EINVAL;488}489cpp = 4;490} else if (buffer == VC4_LOADSTORE_TILE_BUFFER_COLOR) {491switch (format) {492case VC4_LOADSTORE_TILE_BUFFER_BGR565:493case VC4_LOADSTORE_TILE_BUFFER_BGR565_DITHER:494cpp = 2;495break;496case VC4_LOADSTORE_TILE_BUFFER_RGBA8888:497cpp = 4;498break;499default:500DRM_ERROR("Bad tile buffer format\n");501return -EINVAL;502}503} else {504DRM_ERROR("Bad load/store buffer %d.\n", buffer);505return -EINVAL;506}507508if (surf->offset & 0xf) {509DRM_ERROR("load/store buffer must be 16b aligned.\n");510return -EINVAL;511}512513if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,514exec->args->width, exec->args->height, cpp)) {515return -EINVAL;516}517518return 0;519}520521static int522vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,523struct vc4_rcl_setup *setup,524struct drm_gem_cma_object **obj,525struct drm_vc4_submit_rcl_surface *surf)526{527uint8_t tiling = VC4_GET_FIELD(surf->bits,528VC4_RENDER_CONFIG_MEMORY_FORMAT);529uint8_t format = VC4_GET_FIELD(surf->bits,530VC4_RENDER_CONFIG_FORMAT);531int cpp;532533if (surf->flags != 0) {534DRM_ERROR("No flags supported on render config.\n");535return -EINVAL;536}537538if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |539VC4_RENDER_CONFIG_FORMAT_MASK |540VC4_RENDER_CONFIG_MS_MODE_4X |541VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {542DRM_ERROR("Unknown bits in render config: 0x%04x\n",543surf->bits);544return -EINVAL;545}546547if (surf->hindex == ~0)548return 0;549550*obj = vc4_use_bo(exec, surf->hindex);551if (!*obj)552return -EINVAL;553554if (tiling > VC4_TILING_FORMAT_LT) {555DRM_ERROR("Bad tiling format\n");556return -EINVAL;557}558559switch (format) {560case VC4_RENDER_CONFIG_FORMAT_BGR565_DITHERED:561case VC4_RENDER_CONFIG_FORMAT_BGR565:562cpp = 2;563break;564case VC4_RENDER_CONFIG_FORMAT_RGBA8888:565cpp = 4;566break;567default:568DRM_ERROR("Bad tile buffer format\n");569return -EINVAL;570}571572if (!vc4_check_tex_size(exec, *obj, surf->offset, tiling,573exec->args->width, exec->args->height, cpp)) {574return -EINVAL;575}576577return 0;578}579580int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)581{582struct vc4_rcl_setup setup = {0};583struct drm_vc4_submit_cl *args = exec->args;584bool has_bin = args->bin_cl_size != 0;585int ret;586587if (args->min_x_tile > args->max_x_tile ||588args->min_y_tile > args->max_y_tile) {589DRM_ERROR("Bad render tile set (%d,%d)-(%d,%d)\n",590args->min_x_tile, args->min_y_tile,591args->max_x_tile, args->max_y_tile);592return -EINVAL;593}594595if (has_bin &&596(args->max_x_tile > exec->bin_tiles_x ||597args->max_y_tile > exec->bin_tiles_y)) {598DRM_ERROR("Render tiles (%d,%d) outside of bin config "599"(%d,%d)\n",600args->max_x_tile, args->max_y_tile,601exec->bin_tiles_x, exec->bin_tiles_y);602return -EINVAL;603}604605ret = vc4_rcl_render_config_surface_setup(exec, &setup,606&setup.color_write,607&args->color_write);608if (ret)609return ret;610611ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);612if (ret)613return ret;614615ret = vc4_rcl_surface_setup(exec, &setup.zs_read, &args->zs_read);616if (ret)617return ret;618619ret = vc4_rcl_surface_setup(exec, &setup.zs_write, &args->zs_write);620if (ret)621return ret;622623ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,624&args->msaa_color_write);625if (ret)626return ret;627628ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,629&args->msaa_zs_write);630if (ret)631return ret;632633/* We shouldn't even have the job submitted to us if there's no634* surface to write out.635*/636if (!setup.color_write && !setup.zs_write &&637!setup.msaa_color_write && !setup.msaa_zs_write) {638DRM_ERROR("RCL requires color or Z/S write\n");639return -EINVAL;640}641642return vc4_create_rcl_bo(dev, exec, &setup);643}644645646