Path: blob/21.2-virgl/src/gallium/drivers/swr/swr_draw.cpp
4570 views
/****************************************************************************1* Copyright (C) 2015 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21***************************************************************************/2223#include "swr_screen.h"24#include "swr_context.h"25#include "swr_resource.h"26#include "swr_fence.h"27#include "swr_query.h"28#include "jit_api.h"2930#include "util/u_draw.h"31#include "util/u_prim.h"3233#include <algorithm>34#include <iostream>35/*36* Draw vertex arrays, with optional indexing, optional instancing.37*/38static void39swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info,40unsigned drawid_offset,41const struct pipe_draw_indirect_info *indirect,42const struct pipe_draw_start_count_bias *draws,43unsigned num_draws)44{45if (num_draws > 1) {46struct pipe_draw_info tmp_info = *info;47unsigned drawid = drawid_offset;4849for (unsigned i = 0; i < num_draws; i++) {50swr_draw_vbo(pipe, &tmp_info, drawid, indirect, &draws[i], 1);51if (tmp_info.increment_draw_id)52drawid++;53}54return;55}5657if (!indirect && (!draws[0].count || !info->instance_count))58return;5960struct swr_context *ctx = swr_context(pipe);6162if (!indirect &&63!info->primitive_restart &&64!u_trim_pipe_prim(info->mode, (unsigned*)&draws[0].count))65return;6667if (!swr_check_render_cond(pipe))68return;6970if (indirect && indirect->buffer) {71util_draw_indirect(pipe, info, indirect);72return;73}7475/* If indexed draw, force vertex validation since index buffer comes76* from draw info. */77if (info->index_size)78ctx->dirty |= SWR_NEW_VERTEX;7980/* Update derived state, pass draw info to update function. */81swr_update_derived(pipe, info, draws);8283swr_update_draw_context(ctx);8485struct pipe_draw_info resolved_info;86struct pipe_draw_start_count_bias resolved_draw;87/* DrawTransformFeedback */88if (indirect && indirect->count_from_stream_output) {89// trick copied from softpipe to modify const struct *info90memcpy(&resolved_info, (void*)info, sizeof(struct pipe_draw_info));91resolved_draw.start = draws[0].start;92resolved_draw.count = ctx->so_primCounter * resolved_info.vertices_per_patch;93resolved_info.max_index = resolved_draw.count - 1;94info = &resolved_info;95indirect = NULL;96draws = &resolved_draw;97}9899if (ctx->vs->pipe.stream_output.num_outputs) {100if (!ctx->vs->soFunc[info->mode]) {101STREAMOUT_COMPILE_STATE state = {0};102struct pipe_stream_output_info *so = &ctx->vs->pipe.stream_output;103104state.numVertsPerPrim = u_vertices_per_prim(info->mode);105106uint32_t offsets[MAX_SO_STREAMS] = {0};107uint32_t num = 0;108109for (uint32_t i = 0; i < so->num_outputs; i++) {110assert(so->output[i].stream == 0); // @todo111uint32_t output_buffer = so->output[i].output_buffer;112if (so->output[i].dst_offset != offsets[output_buffer]) {113// hole - need to fill114state.stream.decl[num].bufferIndex = output_buffer;115state.stream.decl[num].hole = true;116state.stream.decl[num].componentMask =117(1 << (so->output[i].dst_offset - offsets[output_buffer]))118- 1;119num++;120offsets[output_buffer] = so->output[i].dst_offset;121}122123unsigned attrib_slot = so->output[i].register_index;124attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);125126state.stream.decl[num].bufferIndex = output_buffer;127state.stream.decl[num].attribSlot = attrib_slot;128state.stream.decl[num].componentMask =129((1 << so->output[i].num_components) - 1)130<< so->output[i].start_component;131state.stream.decl[num].hole = false;132num++;133134offsets[output_buffer] += so->output[i].num_components;135}136137state.stream.numDecls = num;138139HANDLE hJitMgr = swr_screen(pipe->screen)->hJitMgr;140ctx->vs->soFunc[info->mode] = JitCompileStreamout(hJitMgr, state);141debug_printf("so shader %p\n", ctx->vs->soFunc[info->mode]);142assert(ctx->vs->soFunc[info->mode] && "Error: SoShader = NULL");143}144145ctx->api.pfnSwrSetSoFunc(ctx->swrContext, ctx->vs->soFunc[info->mode], 0);146}147148struct swr_vertex_element_state *velems = ctx->velems;149if (info->primitive_restart)150velems->fsState.cutIndex = info->restart_index;151else152velems->fsState.cutIndex = 0;153velems->fsState.bEnableCutIndex = info->primitive_restart;154velems->fsState.bPartialVertexBuffer = (info->index_bounds_valid && info->min_index > 0);155156swr_jit_fetch_key key;157swr_generate_fetch_key(key, velems);158auto search = velems->map.find(key);159if (search != velems->map.end()) {160velems->fsFunc = search->second;161} else {162HANDLE hJitMgr = swr_screen(ctx->pipe.screen)->hJitMgr;163velems->fsFunc = JitCompileFetch(hJitMgr, velems->fsState);164165debug_printf("fetch shader %p\n", velems->fsFunc);166assert(velems->fsFunc && "Error: FetchShader = NULL");167168velems->map.insert(std::make_pair(key, velems->fsFunc));169}170171ctx->api.pfnSwrSetFetchFunc(ctx->swrContext, velems->fsFunc);172173/* Set up frontend state174* XXX setup provokingVertex & topologyProvokingVertex */175SWR_FRONTEND_STATE feState = {0};176177// feState.vsVertexSize seeds the PA size that is used as an interface178// between all the shader stages, so it has to be large enough to179// incorporate all interfaces between stages180181// max of frontend shaders num_outputs182feState.vsVertexSize = ctx->vs->info.base.num_outputs;183if (ctx->gs) {184feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->gs->info.base.num_outputs);185}186if (ctx->tcs) {187feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tcs->info.base.num_outputs);188}189if (ctx->tes) {190feState.vsVertexSize = std::max(feState.vsVertexSize, (uint32_t)ctx->tes->info.base.num_outputs);191}192193194if (ctx->vs->info.base.num_outputs) {195// gs does not adjust for position in SGV slot at input from vs196if (!ctx->gs && !ctx->tcs && !ctx->tes)197feState.vsVertexSize--;198}199200// other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT201feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;202203// The PA in the clipper does not handle BE vertex sizes204// different from FE. Increase vertexsize only for the cases that needed it205206// primid needs a slot207if (ctx->fs->info.base.uses_primid)208feState.vsVertexSize++;209// sprite coord enable210if (ctx->rasterizer->sprite_coord_enable)211feState.vsVertexSize++;212213if (ctx->rasterizer->flatshade_first) {214feState.provokingVertex = {1, 0, 0};215} else {216feState.provokingVertex = {2, 1, 2};217}218219enum pipe_prim_type topology;220if (ctx->gs)221topology = (pipe_prim_type)ctx->gs->info.base.properties[TGSI_PROPERTY_GS_OUTPUT_PRIM];222else223topology = info->mode;224225switch (topology) {226case PIPE_PRIM_TRIANGLE_FAN:227feState.topologyProvokingVertex = feState.provokingVertex.triFan;228break;229case PIPE_PRIM_TRIANGLE_STRIP:230case PIPE_PRIM_TRIANGLES:231feState.topologyProvokingVertex = feState.provokingVertex.triStripList;232break;233case PIPE_PRIM_QUAD_STRIP:234case PIPE_PRIM_QUADS:235if (ctx->rasterizer->flatshade_first)236feState.topologyProvokingVertex = 0;237else238feState.topologyProvokingVertex = 3;239break;240case PIPE_PRIM_LINES:241case PIPE_PRIM_LINE_LOOP:242case PIPE_PRIM_LINE_STRIP:243feState.topologyProvokingVertex = feState.provokingVertex.lineStripList;244break;245default:246feState.topologyProvokingVertex = 0;247}248249feState.bEnableCutIndex = info->primitive_restart;250ctx->api.pfnSwrSetFrontendState(ctx->swrContext, &feState);251252if (info->index_size)253ctx->api.pfnSwrDrawIndexedInstanced(ctx->swrContext,254swr_convert_prim_topology(info->mode, info->vertices_per_patch),255draws[0].count,256info->instance_count,257draws[0].start,258draws->index_bias,259info->start_instance);260else261ctx->api.pfnSwrDrawInstanced(ctx->swrContext,262swr_convert_prim_topology(info->mode, info->vertices_per_patch),263draws[0].count,264info->instance_count,265draws[0].start,266info->start_instance);267268/* On client-buffer draw, we used client buffer directly, without269* copy. Block until draw is finished.270* VMD is an example application that benefits from this. */271if (ctx->dirty & SWR_BLOCK_CLIENT_DRAW) {272struct swr_screen *screen = swr_screen(pipe->screen);273swr_fence_submit(ctx, screen->flush_fence);274swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);275}276}277278279static void280swr_flush(struct pipe_context *pipe,281struct pipe_fence_handle **fence,282unsigned flags)283{284struct swr_context *ctx = swr_context(pipe);285struct swr_screen *screen = swr_screen(pipe->screen);286287for (int i=0; i < ctx->framebuffer.nr_cbufs; i++) {288struct pipe_surface *cb = ctx->framebuffer.cbufs[i];289if (cb) {290swr_store_dirty_resource(pipe, cb->texture, SWR_TILE_RESOLVED);291}292}293if (ctx->framebuffer.zsbuf) {294swr_store_dirty_resource(pipe, ctx->framebuffer.zsbuf->texture,295SWR_TILE_RESOLVED);296}297298if (fence)299swr_fence_reference(pipe->screen, fence, screen->flush_fence);300}301302void303swr_finish(struct pipe_context *pipe)304{305struct pipe_fence_handle *fence = nullptr;306307swr_flush(pipe, &fence, 0);308swr_fence_finish(pipe->screen, NULL, fence, 0);309swr_fence_reference(pipe->screen, &fence, NULL);310}311312/*313* Invalidate tiles so they can be reloaded back when needed314*/315void316swr_invalidate_render_target(struct pipe_context *pipe,317uint32_t attachment,318uint16_t width, uint16_t height)319{320struct swr_context *ctx = swr_context(pipe);321322/* grab the rect from the passed in arguments */323swr_update_draw_context(ctx);324SWR_RECT full_rect =325{0, 0, (int32_t)width, (int32_t)height};326ctx->api.pfnSwrInvalidateTiles(ctx->swrContext,3271 << attachment,328full_rect);329}330331332/*333* Store SWR HotTiles back to renderTarget surface.334*/335void336swr_store_render_target(struct pipe_context *pipe,337uint32_t attachment,338enum SWR_TILE_STATE post_tile_state)339{340struct swr_context *ctx = swr_context(pipe);341struct swr_draw_context *pDC = &ctx->swrDC;342struct SWR_SURFACE_STATE *renderTarget = &pDC->renderTargets[attachment];343344/* Only proceed if there's a valid surface to store to */345if (renderTarget->xpBaseAddress) {346swr_update_draw_context(ctx);347SWR_RECT full_rect =348{0, 0,349(int32_t)u_minify(renderTarget->width, renderTarget->lod),350(int32_t)u_minify(renderTarget->height, renderTarget->lod)};351ctx->api.pfnSwrStoreTiles(ctx->swrContext,3521 << attachment,353post_tile_state,354full_rect);355}356}357358void359swr_store_dirty_resource(struct pipe_context *pipe,360struct pipe_resource *resource,361enum SWR_TILE_STATE post_tile_state)362{363/* Only store resource if it has been written to */364if (swr_resource(resource)->status & SWR_RESOURCE_WRITE) {365struct swr_context *ctx = swr_context(pipe);366struct swr_screen *screen = swr_screen(pipe->screen);367struct swr_resource *spr = swr_resource(resource);368369swr_draw_context *pDC = &ctx->swrDC;370SWR_SURFACE_STATE *renderTargets = pDC->renderTargets;371for (uint32_t i = 0; i < SWR_NUM_ATTACHMENTS; i++)372if (renderTargets[i].xpBaseAddress == spr->swr.xpBaseAddress ||373(spr->secondary.xpBaseAddress &&374renderTargets[i].xpBaseAddress == spr->secondary.xpBaseAddress)) {375swr_store_render_target(pipe, i, post_tile_state);376377/* Mesa thinks depth/stencil are fused, so we'll never get an378* explicit resource for stencil. So, if checking depth, then379* also check for stencil. */380if (spr->has_stencil && (i == SWR_ATTACHMENT_DEPTH)) {381swr_store_render_target(382pipe, SWR_ATTACHMENT_STENCIL, post_tile_state);383}384385/* This fence signals StoreTiles completion */386swr_fence_submit(ctx, screen->flush_fence);387388break;389}390}391}392393void394swr_draw_init(struct pipe_context *pipe)395{396pipe->draw_vbo = swr_draw_vbo;397pipe->flush = swr_flush;398}399400401