Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_draw.c
4570 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/**23* @file crocus_draw.c24*25* The main driver hooks for drawing and launching compute shaders.26*/2728#include <stdio.h>29#include <errno.h>30#include "pipe/p_defines.h"31#include "pipe/p_state.h"32#include "pipe/p_context.h"33#include "pipe/p_screen.h"34#include "util/u_draw.h"35#include "util/u_inlines.h"36#include "util/u_transfer.h"37#include "util/u_upload_mgr.h"38#include "intel/compiler/brw_compiler.h"39#include "intel/compiler/brw_eu_defines.h"40#include "crocus_context.h"41#include "crocus_defines.h"42#include "util/u_prim_restart.h"43#include "indices/u_primconvert.h"44#include "util/u_prim.h"4546static bool47prim_is_points_or_lines(enum pipe_prim_type mode)48{49/* We don't need to worry about adjacency - it can only be used with50* geometry shaders, and we don't care about this info when GS is on.51*/52return mode == PIPE_PRIM_POINTS ||53mode == PIPE_PRIM_LINES ||54mode == PIPE_PRIM_LINE_LOOP ||55mode == PIPE_PRIM_LINE_STRIP;56}5758static bool59can_cut_index_handle_restart_index(struct crocus_context *ice,60const struct pipe_draw_info *draw)61{62switch (draw->index_size) {63case 1:64return draw->restart_index == 0xff;65case 2:66return draw->restart_index == 0xffff;67case 4:68return draw->restart_index == 0xffffffff;69default:70unreachable("illegal index size\n");71}7273return false;74}7576static bool77can_cut_index_handle_prim(struct crocus_context *ice,78const struct pipe_draw_info *draw)79{80struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;81const struct intel_device_info *devinfo = &screen->devinfo;8283/* Haswell can do it all. */84if (devinfo->verx10 >= 75)85return true;8687if (!can_cut_index_handle_restart_index(ice, draw))88return false;8990switch (draw->mode) {91case PIPE_PRIM_POINTS:92case PIPE_PRIM_LINES:93case PIPE_PRIM_LINE_STRIP:94case PIPE_PRIM_TRIANGLES:95case PIPE_PRIM_TRIANGLE_STRIP:96case PIPE_PRIM_LINES_ADJACENCY:97case PIPE_PRIM_LINE_STRIP_ADJACENCY:98case PIPE_PRIM_TRIANGLES_ADJACENCY:99case PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY:100return true;101default:102break;103}104return false;105}106107/**108* Record the current primitive mode and restart information, flagging109* related packets as dirty if necessary.110*111* This must be called before updating compiled shaders, because the patch112* information informs the TCS key.113*/114static void115crocus_update_draw_info(struct crocus_context *ice,116const struct pipe_draw_info *info,117const struct pipe_draw_start_count_bias *draw)118{119struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;120enum pipe_prim_type mode = info->mode;121122if (screen->devinfo.ver < 6) {123/* Slight optimization to avoid the GS program when not needed:124*/125struct pipe_rasterizer_state *rs_state = crocus_get_rast_state(ice);126if (mode == PIPE_PRIM_QUAD_STRIP && !rs_state->flatshade &&127rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&128rs_state->fill_back == PIPE_POLYGON_MODE_FILL)129mode = PIPE_PRIM_TRIANGLE_STRIP;130if (mode == PIPE_PRIM_QUADS &&131draw->count == 4 &&132!rs_state->flatshade &&133rs_state->fill_front == PIPE_POLYGON_MODE_FILL &&134rs_state->fill_back == PIPE_POLYGON_MODE_FILL)135mode = PIPE_PRIM_TRIANGLE_FAN;136}137138if (ice->state.prim_mode != mode) {139ice->state.prim_mode = mode;140141if (screen->devinfo.ver == 8)142ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_TOPOLOGY;143144if (screen->devinfo.ver < 6)145ice->state.dirty |= CROCUS_DIRTY_GEN4_CLIP_PROG | CROCUS_DIRTY_GEN4_SF_PROG;146if (screen->devinfo.ver <= 6)147ice->state.dirty |= CROCUS_DIRTY_GEN4_FF_GS_PROG;148149if (screen->devinfo.ver >= 7)150ice->state.dirty |= CROCUS_DIRTY_GEN7_SBE;151152/* For XY Clip enables */153bool points_or_lines = prim_is_points_or_lines(mode);154if (points_or_lines != ice->state.prim_is_points_or_lines) {155ice->state.prim_is_points_or_lines = points_or_lines;156ice->state.dirty |= CROCUS_DIRTY_CLIP;157}158}159160if (info->mode == PIPE_PRIM_PATCHES &&161ice->state.vertices_per_patch != info->vertices_per_patch) {162ice->state.vertices_per_patch = info->vertices_per_patch;163164if (screen->devinfo.ver == 8)165ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_TOPOLOGY;166/* This is needed for key->input_vertices */167ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_UNCOMPILED_TCS;168169/* Flag constants dirty for gl_PatchVerticesIn if needed. */170const struct shader_info *tcs_info =171crocus_get_shader_info(ice, MESA_SHADER_TESS_CTRL);172if (tcs_info &&173BITSET_TEST(tcs_info->system_values_read, SYSTEM_VALUE_VERTICES_IN)) {174ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_TCS;175ice->state.shaders[MESA_SHADER_TESS_CTRL].sysvals_need_upload = true;176}177}178179const unsigned cut_index = info->primitive_restart ? info->restart_index :180ice->state.cut_index;181if (ice->state.primitive_restart != info->primitive_restart ||182ice->state.cut_index != cut_index) {183if (screen->devinfo.verx10 >= 75)184ice->state.dirty |= CROCUS_DIRTY_GEN75_VF;185ice->state.primitive_restart = info->primitive_restart;186ice->state.cut_index = info->restart_index;187}188}189190/**191* Update shader draw parameters, flagging VF packets as dirty if necessary.192*/193static void194crocus_update_draw_parameters(struct crocus_context *ice,195const struct pipe_draw_info *info,196unsigned drawid_offset,197const struct pipe_draw_indirect_info *indirect,198const struct pipe_draw_start_count_bias *draw)199{200bool changed = false;201202if (ice->state.vs_uses_draw_params) {203struct crocus_state_ref *draw_params = &ice->draw.draw_params;204205if (indirect && indirect->buffer) {206pipe_resource_reference(&draw_params->res, indirect->buffer);207draw_params->offset =208indirect->offset + (info->index_size ? 12 : 8);209210changed = true;211ice->draw.params_valid = false;212} else {213int firstvertex = info->index_size ? draw->index_bias : draw->start;214215if (!ice->draw.params_valid ||216ice->draw.params.firstvertex != firstvertex ||217ice->draw.params.baseinstance != info->start_instance) {218219changed = true;220ice->draw.params.firstvertex = firstvertex;221ice->draw.params.baseinstance = info->start_instance;222ice->draw.params_valid = true;223224u_upload_data(ice->ctx.stream_uploader, 0,225sizeof(ice->draw.params), 4, &ice->draw.params,226&draw_params->offset, &draw_params->res);227}228}229}230231if (ice->state.vs_uses_derived_draw_params) {232struct crocus_state_ref *derived_params = &ice->draw.derived_draw_params;233int is_indexed_draw = info->index_size ? -1 : 0;234235if (ice->draw.derived_params.drawid != drawid_offset ||236ice->draw.derived_params.is_indexed_draw != is_indexed_draw) {237238changed = true;239ice->draw.derived_params.drawid = drawid_offset;240ice->draw.derived_params.is_indexed_draw = is_indexed_draw;241242u_upload_data(ice->ctx.stream_uploader, 0,243sizeof(ice->draw.derived_params), 4,244&ice->draw.derived_params, &derived_params->offset,245&derived_params->res);246}247}248249if (changed) {250struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;251ice->state.dirty |= CROCUS_DIRTY_VERTEX_BUFFERS |252CROCUS_DIRTY_VERTEX_ELEMENTS;253if (screen->devinfo.ver == 8)254ice->state.dirty |= CROCUS_DIRTY_GEN8_VF_SGVS;255}256}257258static void259crocus_indirect_draw_vbo(struct crocus_context *ice,260const struct pipe_draw_info *dinfo,261unsigned drawid_offset,262const struct pipe_draw_indirect_info *dindirect,263const struct pipe_draw_start_count_bias *draws)264{265struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];266struct crocus_screen *screen = batch->screen;267struct pipe_draw_info info = *dinfo;268struct pipe_draw_indirect_info indirect = *dindirect;269const struct intel_device_info *devinfo = &batch->screen->devinfo;270271if (devinfo->verx10 >= 75 && indirect.indirect_draw_count &&272ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {273/* Upload MI_PREDICATE_RESULT to GPR15.*/274screen->vtbl.load_register_reg64(batch, CS_GPR(15), MI_PREDICATE_RESULT);275}276277uint64_t orig_dirty = ice->state.dirty;278uint64_t orig_stage_dirty = ice->state.stage_dirty;279280for (int i = 0; i < indirect.draw_count; i++) {281crocus_batch_maybe_flush(batch, 1500);282crocus_require_statebuffer_space(batch, 2400);283284if (ice->state.vs_uses_draw_params ||285ice->state.vs_uses_derived_draw_params)286crocus_update_draw_parameters(ice, &info, drawid_offset + i, &indirect, draws);287288screen->vtbl.upload_render_state(ice, batch, &info, drawid_offset + i, &indirect, draws);289290ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;291ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;292293indirect.offset += indirect.stride;294}295296if (devinfo->verx10 >= 75 && indirect.indirect_draw_count &&297ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT) {298/* Restore MI_PREDICATE_RESULT. */299screen->vtbl.load_register_reg64(batch, MI_PREDICATE_RESULT, CS_GPR(15));300}301302/* Put this back for post-draw resolves, we'll clear it again after. */303ice->state.dirty = orig_dirty;304ice->state.stage_dirty = orig_stage_dirty;305}306307static void308crocus_simple_draw_vbo(struct crocus_context *ice,309const struct pipe_draw_info *draw,310unsigned drawid_offset,311const struct pipe_draw_indirect_info *indirect,312const struct pipe_draw_start_count_bias *sc)313{314struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];315struct crocus_screen *screen = batch->screen;316317crocus_batch_maybe_flush(batch, 1500);318crocus_require_statebuffer_space(batch, 2400);319320if (ice->state.vs_uses_draw_params ||321ice->state.vs_uses_derived_draw_params)322crocus_update_draw_parameters(ice, draw, drawid_offset, indirect, sc);323324screen->vtbl.upload_render_state(ice, batch, draw, drawid_offset, indirect, sc);325}326327static void328crocus_draw_vbo_get_vertex_count(struct pipe_context *ctx,329const struct pipe_draw_info *info_in,330unsigned drawid_offset,331const struct pipe_draw_indirect_info *indirect)332{333struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;334struct pipe_draw_info info = *info_in;335struct pipe_draw_start_count_bias draw;336337uint32_t val = screen->vtbl.get_so_offset(indirect->count_from_stream_output);338339draw.start = 0;340draw.count = val;341ctx->draw_vbo(ctx, &info, drawid_offset, NULL, &draw, 1);342}343344/**345* The pipe->draw_vbo() driver hook. Performs a draw on the GPU.346*/347void348crocus_draw_vbo(struct pipe_context *ctx,349const struct pipe_draw_info *info,350unsigned drawid_offset,351const struct pipe_draw_indirect_info *indirect,352const struct pipe_draw_start_count_bias *draws,353unsigned num_draws)354{355if (num_draws > 1) {356util_draw_multi(ctx, info, drawid_offset, indirect, draws, num_draws);357return;358}359360if (!indirect && (!draws[0].count || !info->instance_count))361return;362363struct crocus_context *ice = (struct crocus_context *) ctx;364struct crocus_screen *screen = (struct crocus_screen*)ice->ctx.screen;365struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];366367if (!crocus_check_conditional_render(ice))368return;369370if (info->primitive_restart && !can_cut_index_handle_prim(ice, info)) {371util_draw_vbo_without_prim_restart(ctx, info, drawid_offset,372indirect, draws);373return;374}375376if (screen->devinfo.verx10 < 75 &&377indirect && indirect->count_from_stream_output) {378crocus_draw_vbo_get_vertex_count(ctx, info, drawid_offset, indirect);379return;380}381382/**383* The hardware is capable of removing dangling vertices on its own; however,384* prior to Gen6, we sometimes convert quads into trifans (and quad strips385* into tristrips), since pre-Gen6 hardware requires a GS to render quads.386* This function manually trims dangling vertices from a draw call involving387* quads so that those dangling vertices won't get drawn when we convert to388* trifans/tristrips.389*/390if (screen->devinfo.ver < 6) {391if (info->mode == PIPE_PRIM_QUADS || info->mode == PIPE_PRIM_QUAD_STRIP) {392bool trim = u_trim_pipe_prim(info->mode, (unsigned *)&draws[0].count);393if (!trim)394return;395}396}397398/* We can't safely re-emit 3DSTATE_SO_BUFFERS because it may zero the399* write offsets, changing the behavior.400*/401if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {402ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_RENDER & ~CROCUS_DIRTY_GEN7_SO_BUFFERS;403ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;404}405406/* Emit Sandybridge workaround flushes on every primitive, for safety. */407if (screen->devinfo.ver == 6)408crocus_emit_post_sync_nonzero_flush(batch);409410crocus_update_draw_info(ice, info, draws);411412if (!crocus_update_compiled_shaders(ice))413return;414415if (ice->state.dirty & CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES) {416bool draw_aux_buffer_disabled[BRW_MAX_DRAW_BUFFERS] = { };417for (gl_shader_stage stage = 0; stage < MESA_SHADER_COMPUTE; stage++) {418if (ice->shaders.prog[stage])419crocus_predraw_resolve_inputs(ice, batch, draw_aux_buffer_disabled,420stage, true);421}422crocus_predraw_resolve_framebuffer(ice, batch, draw_aux_buffer_disabled);423}424425crocus_handle_always_flush_cache(batch);426427if (indirect && indirect->buffer)428crocus_indirect_draw_vbo(ice, info, drawid_offset, indirect, draws);429else430crocus_simple_draw_vbo(ice, info, drawid_offset, indirect, draws);431432crocus_handle_always_flush_cache(batch);433434crocus_postdraw_update_resolve_tracking(ice, batch);435436ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_RENDER;437ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_RENDER;438}439440static void441crocus_update_grid_size_resource(struct crocus_context *ice,442const struct pipe_grid_info *grid)443{444struct crocus_state_ref *grid_ref = &ice->state.grid_size;445const struct crocus_compiled_shader *shader = ice->shaders.prog[MESA_SHADER_COMPUTE];446bool grid_needs_surface = shader->bt.used_mask[CROCUS_SURFACE_GROUP_CS_WORK_GROUPS];447448if (grid->indirect) {449pipe_resource_reference(&grid_ref->res, grid->indirect);450grid_ref->offset = grid->indirect_offset;451452/* Zero out the grid size so that the next non-indirect grid launch will453* re-upload it properly.454*/455memset(ice->state.last_grid, 0, sizeof(ice->state.last_grid));456} else if (memcmp(ice->state.last_grid, grid->grid, sizeof(grid->grid)) != 0) {457memcpy(ice->state.last_grid, grid->grid, sizeof(grid->grid));458u_upload_data(ice->ctx.const_uploader, 0, sizeof(grid->grid), 4,459grid->grid, &grid_ref->offset, &grid_ref->res);460}461462/* Skip surface upload if we don't need it or we already have one */463if (!grid_needs_surface)464return;465466ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_BINDINGS_CS;467}468469470void471crocus_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *grid)472{473struct crocus_context *ice = (struct crocus_context *) ctx;474struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_COMPUTE];475struct crocus_screen *screen = batch->screen;476477if (!crocus_check_conditional_render(ice))478return;479480if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) {481ice->state.dirty |= CROCUS_ALL_DIRTY_FOR_COMPUTE;482ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;483}484485/* We can't do resolves on the compute engine, so awkwardly, we have to486* do them on the render batch...487*/488if (ice->state.dirty & CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES) {489crocus_predraw_resolve_inputs(ice, &ice->batches[CROCUS_BATCH_RENDER], NULL,490MESA_SHADER_COMPUTE, false);491}492493crocus_batch_maybe_flush(batch, 1500);494crocus_require_statebuffer_space(batch, 2500);495crocus_update_compiled_compute_shader(ice);496497if (memcmp(ice->state.last_block, grid->block, sizeof(grid->block)) != 0) {498memcpy(ice->state.last_block, grid->block, sizeof(grid->block));499ice->state.stage_dirty |= CROCUS_STAGE_DIRTY_CONSTANTS_CS;500ice->state.shaders[MESA_SHADER_COMPUTE].sysvals_need_upload = true;501}502503crocus_update_grid_size_resource(ice, grid);504505if (ice->state.compute_predicate) {506screen->vtbl.emit_compute_predicate(batch);507ice->state.compute_predicate = NULL;508}509510crocus_handle_always_flush_cache(batch);511512screen->vtbl.upload_compute_state(ice, batch, grid);513514crocus_handle_always_flush_cache(batch);515516ice->state.dirty &= ~CROCUS_ALL_DIRTY_FOR_COMPUTE;517ice->state.stage_dirty &= ~CROCUS_ALL_STAGE_DIRTY_FOR_COMPUTE;518519/* Note: since compute shaders can't access the framebuffer, there's520* no need to call crocus_postdraw_update_resolve_tracking.521*/522}523524525