Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_blorp.c
4570 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file crocus_blorp.c24*25* ============================= GENXML CODE =============================26* [This file is compiled once per generation.]27* =======================================================================28*29* GenX specific code for working with BLORP (blitting, resolves, clears30* on the 3D engine). This provides the driver-specific hooks needed to31* implement the BLORP API.32*33* See crocus_blit.c, crocus_clear.c, and so on.34*/3536#include <assert.h>3738#include "crocus_batch.h"39#include "crocus_resource.h"40#include "crocus_context.h"4142#include "util/u_upload_mgr.h"43#include "intel/common/intel_l3_config.h"4445#include "blorp/blorp_genX_exec.h"4647#if GFX_VER <= 548#include "gen4_blorp_exec.h"49#endif5051static uint32_t *52stream_state(struct crocus_batch *batch,53unsigned size,54unsigned alignment,55uint32_t *out_offset,56struct crocus_bo **out_bo)57{58uint32_t offset = ALIGN(batch->state.used, alignment);5960if (offset + size >= STATE_SZ && !batch->no_wrap) {61crocus_batch_flush(batch);62offset = ALIGN(batch->state.used, alignment);63} else if (offset + size >= batch->state.bo->size) {64const unsigned new_size =65MIN2(batch->state.bo->size + batch->state.bo->size / 2,66MAX_STATE_SIZE);67crocus_grow_buffer(batch, true, batch->state.used, new_size);68assert(offset + size < batch->state.bo->size);69}7071crocus_record_state_size(batch->state_sizes, offset, size);7273batch->state.used = offset + size;74*out_offset = offset;7576/* If the caller has asked for a BO, we leave them the responsibility of77* adding bo->gtt_offset (say, by handing an address to genxml). If not,78* we assume they want the offset from a base address.79*/80if (out_bo)81*out_bo = batch->state.bo;8283return (uint32_t *)batch->state.map + (offset >> 2);84}8586static void *87blorp_emit_dwords(struct blorp_batch *blorp_batch, unsigned n)88{89struct crocus_batch *batch = blorp_batch->driver_batch;90return crocus_get_command_space(batch, n * sizeof(uint32_t));91}9293static uint64_t94blorp_emit_reloc(struct blorp_batch *blorp_batch, UNUSED void *location,95struct blorp_address addr, uint32_t delta)96{97struct crocus_batch *batch = blorp_batch->driver_batch;98uint32_t offset;99100if (GFX_VER < 6 && crocus_ptr_in_state_buffer(batch, location)) {101offset = (char *)location - (char *)batch->state.map;102return crocus_state_reloc(batch, offset,103addr.buffer, addr.offset + delta,104addr.reloc_flags);105}106107assert(!crocus_ptr_in_state_buffer(batch, location));108109offset = (char *)location - (char *)batch->command.map;110return crocus_command_reloc(batch, offset,111addr.buffer, addr.offset + delta,112addr.reloc_flags);113}114115static void116blorp_surface_reloc(struct blorp_batch *blorp_batch, uint32_t ss_offset,117struct blorp_address addr, uint32_t delta)118{119struct crocus_batch *batch = blorp_batch->driver_batch;120struct crocus_bo *bo = addr.buffer;121122uint64_t reloc_val =123crocus_state_reloc(batch, ss_offset, bo, addr.offset + delta,124addr.reloc_flags);125126void *reloc_ptr = (void *)batch->state.map + ss_offset;127*(uint32_t *)reloc_ptr = reloc_val;128}129130static uint64_t131blorp_get_surface_address(struct blorp_batch *blorp_batch,132struct blorp_address addr)133{134/* We'll let blorp_surface_reloc write the address. */135return 0ull;136}137138#if GFX_VER >= 7139static struct blorp_address140blorp_get_surface_base_address(struct blorp_batch *blorp_batch)141{142struct crocus_batch *batch = blorp_batch->driver_batch;143return (struct blorp_address) {144.buffer = batch->state.bo,145.offset = 0146};147}148#endif149150static void *151blorp_alloc_dynamic_state(struct blorp_batch *blorp_batch,152uint32_t size,153uint32_t alignment,154uint32_t *offset)155{156struct crocus_batch *batch = blorp_batch->driver_batch;157158return stream_state(batch, size, alignment, offset, NULL);159}160161static void162blorp_alloc_binding_table(struct blorp_batch *blorp_batch,163unsigned num_entries,164unsigned state_size,165unsigned state_alignment,166uint32_t *bt_offset,167uint32_t *surface_offsets,168void **surface_maps)169{170struct crocus_batch *batch = blorp_batch->driver_batch;171uint32_t *bt_map = stream_state(batch, num_entries * sizeof(uint32_t), 32,172bt_offset, NULL);173174for (unsigned i = 0; i < num_entries; i++) {175surface_maps[i] = stream_state(batch,176state_size, state_alignment,177&(surface_offsets)[i], NULL);178bt_map[i] = surface_offsets[i];179}180}181182static void *183blorp_alloc_vertex_buffer(struct blorp_batch *blorp_batch,184uint32_t size,185struct blorp_address *addr)186{187struct crocus_batch *batch = blorp_batch->driver_batch;188struct crocus_bo *bo;189uint32_t offset;190191void *map = stream_state(batch, size, 64,192&offset, &bo);193194*addr = (struct blorp_address) {195.buffer = bo,196.offset = offset,197.reloc_flags = RELOC_32BIT,198#if GFX_VER >= 7199.mocs = crocus_mocs(bo, &batch->screen->isl_dev),200#endif201};202203return map;204}205206/**207*/208static void209blorp_vf_invalidate_for_vb_48b_transitions(struct blorp_batch *blorp_batch,210const struct blorp_address *addrs,211UNUSED uint32_t *sizes,212unsigned num_vbs)213{214}215216static struct blorp_address217blorp_get_workaround_address(struct blorp_batch *blorp_batch)218{219struct crocus_batch *batch = blorp_batch->driver_batch;220221return (struct blorp_address) {222.buffer = batch->ice->workaround_bo,223.offset = batch->ice->workaround_offset,224};225}226227static void228blorp_flush_range(UNUSED struct blorp_batch *blorp_batch,229UNUSED void *start,230UNUSED size_t size)231{232/* All allocated states come from the batch which we will flush before we233* submit it. There's nothing for us to do here.234*/235}236237#if GFX_VER >= 7238static const struct intel_l3_config *239blorp_get_l3_config(struct blorp_batch *blorp_batch)240{241struct crocus_batch *batch = blorp_batch->driver_batch;242return batch->screen->l3_config_3d;243}244#else /* GFX_VER < 7 */245static void246blorp_emit_urb_config(struct blorp_batch *blorp_batch,247unsigned vs_entry_size,248UNUSED unsigned sf_entry_size)249{250struct crocus_batch *batch = blorp_batch->driver_batch;251#if GFX_VER <= 5252batch->screen->vtbl.calculate_urb_fence(batch, 0, vs_entry_size, sf_entry_size);253#else254genX(crocus_upload_urb)(batch, vs_entry_size, false, vs_entry_size);255#endif256}257#endif258259static void260crocus_blorp_exec(struct blorp_batch *blorp_batch,261const struct blorp_params *params)262{263struct crocus_context *ice = blorp_batch->blorp->driver_ctx;264struct crocus_batch *batch = blorp_batch->driver_batch;265266/* Flush the sampler and render caches. We definitely need to flush the267* sampler cache so that we get updated contents from the render cache for268* the glBlitFramebuffer() source. Also, we are sometimes warned in the269* docs to flush the cache between reinterpretations of the same surface270* data with different formats, which blorp does for stencil and depth271* data.272*/273if (params->src.enabled)274crocus_cache_flush_for_read(batch, params->src.addr.buffer);275if (params->dst.enabled) {276crocus_cache_flush_for_render(batch, params->dst.addr.buffer,277params->dst.view.format,278params->dst.aux_usage);279}280if (params->depth.enabled)281crocus_cache_flush_for_depth(batch, params->depth.addr.buffer);282if (params->stencil.enabled)283crocus_cache_flush_for_depth(batch, params->stencil.addr.buffer);284285crocus_require_command_space(batch, 1400);286crocus_require_statebuffer_space(batch, 600);287batch->no_wrap = true;288289#if GFX_VER == 8290genX(crocus_update_pma_fix)(ice, batch, false);291#endif292293#if GFX_VER == 6294/* Emit workaround flushes when we switch from drawing to blorping. */295crocus_emit_post_sync_nonzero_flush(batch);296#endif297298#if GFX_VER >= 6299crocus_emit_depth_stall_flushes(batch);300#endif301302blorp_emit(blorp_batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) {303rect.ClippedDrawingRectangleXMax = MAX2(params->x1, params->x0) - 1;304rect.ClippedDrawingRectangleYMax = MAX2(params->y1, params->y0) - 1;305}306307batch->screen->vtbl.update_surface_base_address(batch);308crocus_handle_always_flush_cache(batch);309310batch->contains_draw = true;311blorp_exec(blorp_batch, params);312313batch->no_wrap = false;314crocus_handle_always_flush_cache(batch);315316/* We've smashed all state compared to what the normal 3D pipeline317* rendering tracks for GL.318*/319320uint64_t skip_bits = (CROCUS_DIRTY_POLYGON_STIPPLE |321CROCUS_DIRTY_GEN7_SO_BUFFERS |322CROCUS_DIRTY_SO_DECL_LIST |323CROCUS_DIRTY_LINE_STIPPLE |324CROCUS_ALL_DIRTY_FOR_COMPUTE |325CROCUS_DIRTY_GEN6_SCISSOR_RECT |326CROCUS_DIRTY_GEN75_VF |327CROCUS_DIRTY_SF_CL_VIEWPORT);328329uint64_t skip_stage_bits = (CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE |330CROCUS_STAGE_DIRTY_UNCOMPILED_VS |331CROCUS_STAGE_DIRTY_UNCOMPILED_TCS |332CROCUS_STAGE_DIRTY_UNCOMPILED_TES |333CROCUS_STAGE_DIRTY_UNCOMPILED_GS |334CROCUS_STAGE_DIRTY_UNCOMPILED_FS |335CROCUS_STAGE_DIRTY_SAMPLER_STATES_VS |336CROCUS_STAGE_DIRTY_SAMPLER_STATES_TCS |337CROCUS_STAGE_DIRTY_SAMPLER_STATES_TES |338CROCUS_STAGE_DIRTY_SAMPLER_STATES_GS);339340if (!ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]) {341/* BLORP disabled tessellation, that's fine for the next draw */342skip_stage_bits |= CROCUS_STAGE_DIRTY_TCS |343CROCUS_STAGE_DIRTY_TES |344CROCUS_STAGE_DIRTY_CONSTANTS_TCS |345CROCUS_STAGE_DIRTY_CONSTANTS_TES |346CROCUS_STAGE_DIRTY_BINDINGS_TCS |347CROCUS_STAGE_DIRTY_BINDINGS_TES;348}349350if (!ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]) {351/* BLORP disabled geometry shaders, that's fine for the next draw */352skip_stage_bits |= CROCUS_STAGE_DIRTY_GS |353CROCUS_STAGE_DIRTY_CONSTANTS_GS |354CROCUS_STAGE_DIRTY_BINDINGS_GS;355}356357/* we can skip flagging CROCUS_DIRTY_DEPTH_BUFFER, if358* BLORP_BATCH_NO_EMIT_DEPTH_STENCIL is set.359*/360if (blorp_batch->flags & BLORP_BATCH_NO_EMIT_DEPTH_STENCIL)361skip_bits |= CROCUS_DIRTY_DEPTH_BUFFER;362363if (!params->wm_prog_data)364skip_bits |= CROCUS_DIRTY_GEN6_BLEND_STATE;365366ice->state.dirty |= ~skip_bits;367ice->state.stage_dirty |= ~skip_stage_bits;368369ice->urb.vsize = 0;370ice->urb.gs_present = false;371ice->urb.gsize = 0;372ice->urb.tess_present = false;373ice->urb.hsize = 0;374ice->urb.dsize = 0;375376if (params->dst.enabled) {377crocus_render_cache_add_bo(batch, params->dst.addr.buffer,378params->dst.view.format,379params->dst.aux_usage);380}381if (params->depth.enabled)382crocus_depth_cache_add_bo(batch, params->depth.addr.buffer);383if (params->stencil.enabled)384crocus_depth_cache_add_bo(batch, params->stencil.addr.buffer);385}386387static void388blorp_measure_start(struct blorp_batch *blorp_batch,389const struct blorp_params *params)390{391}392393void394genX(crocus_init_blorp)(struct crocus_context *ice)395{396struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;397398blorp_init(&ice->blorp, ice, &screen->isl_dev);399ice->blorp.compiler = screen->compiler;400ice->blorp.lookup_shader = crocus_blorp_lookup_shader;401ice->blorp.upload_shader = crocus_blorp_upload_shader;402ice->blorp.exec = crocus_blorp_exec;403}404405406