Path: blob/21.2-virgl/src/gallium/drivers/swr/swr_context.cpp
4570 views
/****************************************************************************1* Copyright (C) 2015 Intel Corporation. All Rights Reserved.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21***************************************************************************/2223#include "swr_context.h"24#include "swr_memory.h"25#include "swr_screen.h"26#include "swr_resource.h"27#include "swr_scratch.h"28#include "swr_query.h"29#include "swr_fence.h"3031#include "util/u_memory.h"32#include "util/u_inlines.h"33#include "util/format/u_format.h"34#include "util/u_atomic.h"35#include "util/u_upload_mgr.h"36#include "util/u_transfer.h"37#include "util/u_surface.h"3839#include "api.h"40#include "backend.h"41#include "knobs.h"4243static struct pipe_surface *44swr_create_surface(struct pipe_context *pipe,45struct pipe_resource *pt,46const struct pipe_surface *surf_tmpl)47{48struct pipe_surface *ps;4950ps = CALLOC_STRUCT(pipe_surface);51if (ps) {52pipe_reference_init(&ps->reference, 1);53pipe_resource_reference(&ps->texture, pt);54ps->context = pipe;55ps->format = surf_tmpl->format;56if (pt->target != PIPE_BUFFER) {57assert(surf_tmpl->u.tex.level <= pt->last_level);58ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);59ps->height = u_minify(pt->height0, surf_tmpl->u.tex.level);60ps->u.tex.level = surf_tmpl->u.tex.level;61ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;62ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;63} else {64/* setting width as number of elements should get us correct65* renderbuffer width */66ps->width = surf_tmpl->u.buf.last_element67- surf_tmpl->u.buf.first_element + 1;68ps->height = pt->height0;69ps->u.buf.first_element = surf_tmpl->u.buf.first_element;70ps->u.buf.last_element = surf_tmpl->u.buf.last_element;71assert(ps->u.buf.first_element <= ps->u.buf.last_element);72assert(ps->u.buf.last_element < ps->width);73}74}75return ps;76}7778static void79swr_surface_destroy(struct pipe_context *pipe, struct pipe_surface *surf)80{81assert(surf->texture);82struct pipe_resource *resource = surf->texture;8384/* If the resource has been drawn to, store tiles. */85swr_store_dirty_resource(pipe, resource, SWR_TILE_RESOLVED);8687pipe_resource_reference(&resource, NULL);88FREE(surf);89}909192static void *93swr_transfer_map(struct pipe_context *pipe,94struct pipe_resource *resource,95unsigned level,96unsigned usage,97const struct pipe_box *box,98struct pipe_transfer **transfer)99{100struct swr_screen *screen = swr_screen(pipe->screen);101struct swr_resource *spr = swr_resource(resource);102struct pipe_transfer *pt;103enum pipe_format format = resource->format;104105assert(resource);106assert(level <= resource->last_level);107108/* If mapping an attached rendertarget, store tiles to surface and set109* postStoreTileState to SWR_TILE_INVALID so tiles get reloaded on next use110* and nothing needs to be done at unmap. */111swr_store_dirty_resource(pipe, resource, SWR_TILE_INVALID);112113if (!(usage & PIPE_MAP_UNSYNCHRONIZED)) {114/* If resource is in use, finish fence before mapping.115* Unless requested not to block, then if not done return NULL map */116if (usage & PIPE_MAP_DONTBLOCK) {117if (swr_is_fence_pending(screen->flush_fence))118return NULL;119} else {120if (spr->status) {121/* But, if there's no fence pending, submit one.122* XXX: Remove once draw timestamps are finished. */123if (!swr_is_fence_pending(screen->flush_fence))124swr_fence_submit(swr_context(pipe), screen->flush_fence);125126swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);127swr_resource_unused(resource);128}129}130}131132pt = CALLOC_STRUCT(pipe_transfer);133if (!pt)134return NULL;135pipe_resource_reference(&pt->resource, resource);136pt->usage = (pipe_map_flags)usage;137pt->level = level;138pt->box = *box;139pt->stride = spr->swr.pitch;140pt->layer_stride = spr->swr.qpitch * spr->swr.pitch;141142/* if we're mapping the depth/stencil, copy in stencil for the section143* being read in144*/145if (usage & PIPE_MAP_READ && spr->has_depth && spr->has_stencil) {146size_t zbase, sbase;147for (int z = box->z; z < box->z + box->depth; z++) {148zbase = (z * spr->swr.qpitch + box->y) * spr->swr.pitch +149spr->mip_offsets[level];150sbase = (z * spr->secondary.qpitch + box->y) * spr->secondary.pitch +151spr->secondary_mip_offsets[level];152for (int y = box->y; y < box->y + box->height; y++) {153if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {154for (int x = box->x; x < box->x + box->width; x++)155((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3] =156((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];157} else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {158for (int x = box->x; x < box->x + box->width; x++)159((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4] =160((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x];161}162zbase += spr->swr.pitch;163sbase += spr->secondary.pitch;164}165}166}167168unsigned offset = box->z * pt->layer_stride +169util_format_get_nblocksy(format, box->y) * pt->stride +170util_format_get_stride(format, box->x);171172*transfer = pt;173174return (void*)(spr->swr.xpBaseAddress + offset + spr->mip_offsets[level]);175}176177static void178swr_transfer_flush_region(struct pipe_context *pipe,179struct pipe_transfer *transfer,180const struct pipe_box *flush_box)181{182assert(transfer->resource);183assert(transfer->usage & PIPE_MAP_WRITE);184185struct swr_resource *spr = swr_resource(transfer->resource);186if (!spr->has_depth || !spr->has_stencil)187return;188189size_t zbase, sbase;190struct pipe_box box = *flush_box;191box.x += transfer->box.x;192box.y += transfer->box.y;193box.z += transfer->box.z;194for (int z = box.z; z < box.z + box.depth; z++) {195zbase = (z * spr->swr.qpitch + box.y) * spr->swr.pitch +196spr->mip_offsets[transfer->level];197sbase = (z * spr->secondary.qpitch + box.y) * spr->secondary.pitch +198spr->secondary_mip_offsets[transfer->level];199for (int y = box.y; y < box.y + box.height; y++) {200if (spr->base.format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {201for (int x = box.x; x < box.x + box.width; x++)202((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =203((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 4 * x + 3];204} else if (spr->base.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {205for (int x = box.x; x < box.x + box.width; x++)206((uint8_t*)(spr->secondary.xpBaseAddress))[sbase + x] =207((uint8_t*)(spr->swr.xpBaseAddress))[zbase + 8 * x + 4];208}209zbase += spr->swr.pitch;210sbase += spr->secondary.pitch;211}212}213}214215static void216swr_transfer_unmap(struct pipe_context *pipe, struct pipe_transfer *transfer)217{218assert(transfer->resource);219220struct swr_resource *spr = swr_resource(transfer->resource);221/* if we're mapping the depth/stencil, copy in stencil for the section222* being written out223*/224if (transfer->usage & PIPE_MAP_WRITE &&225!(transfer->usage & PIPE_MAP_FLUSH_EXPLICIT) &&226spr->has_depth && spr->has_stencil) {227struct pipe_box box;228u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height,229transfer->box.depth, &box);230swr_transfer_flush_region(pipe, transfer, &box);231}232233pipe_resource_reference(&transfer->resource, NULL);234FREE(transfer);235}236237238static void239swr_resource_copy(struct pipe_context *pipe,240struct pipe_resource *dst,241unsigned dst_level,242unsigned dstx,243unsigned dsty,244unsigned dstz,245struct pipe_resource *src,246unsigned src_level,247const struct pipe_box *src_box)248{249struct swr_screen *screen = swr_screen(pipe->screen);250251/* If either the src or dst is a renderTarget, store tiles before copy */252swr_store_dirty_resource(pipe, src, SWR_TILE_RESOLVED);253swr_store_dirty_resource(pipe, dst, SWR_TILE_RESOLVED);254255swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);256swr_resource_unused(src);257swr_resource_unused(dst);258259if ((dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER)260|| (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER)) {261util_resource_copy_region(262pipe, dst, dst_level, dstx, dsty, dstz, src, src_level, src_box);263return;264}265266debug_printf("unhandled swr_resource_copy\n");267}268269270static void271swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)272{273struct swr_context *ctx = swr_context(pipe);274/* Make a copy of the const blit_info, so we can modify it */275struct pipe_blit_info info = *blit_info;276277if (info.render_condition_enable && !swr_check_render_cond(pipe))278return;279280if (info.src.resource->nr_samples > 1 && info.dst.resource->nr_samples <= 1281&& !util_format_is_depth_or_stencil(info.src.resource->format)282&& !util_format_is_pure_integer(info.src.resource->format)) {283debug_printf("swr_blit: color resolve : %d -> %d\n",284info.src.resource->nr_samples, info.dst.resource->nr_samples);285286/* Resolve is done as part of the surface store. */287swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);288289struct pipe_resource *src_resource = info.src.resource;290struct pipe_resource *resolve_target =291swr_resource(src_resource)->resolve_target;292293/* The resolve target becomes the new source for the blit. */294info.src.resource = resolve_target;295}296297if (util_try_blit_via_copy_region(pipe, &info)) {298return; /* done */299}300301if (info.mask & PIPE_MASK_S) {302debug_printf("swr: cannot blit stencil, skipping\n");303info.mask &= ~PIPE_MASK_S;304}305306if (!util_blitter_is_blit_supported(ctx->blitter, &info)) {307debug_printf("swr: blit unsupported %s -> %s\n",308util_format_short_name(info.src.resource->format),309util_format_short_name(info.dst.resource->format));310return;311}312313if (ctx->active_queries) {314ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, FALSE);315ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, FALSE);316}317318util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertex_buffer);319util_blitter_save_vertex_elements(ctx->blitter, (void *)ctx->velems);320util_blitter_save_vertex_shader(ctx->blitter, (void *)ctx->vs);321util_blitter_save_geometry_shader(ctx->blitter, (void*)ctx->gs);322util_blitter_save_tessctrl_shader(ctx->blitter, (void*)ctx->tcs);323util_blitter_save_tesseval_shader(ctx->blitter, (void*)ctx->tes);324util_blitter_save_so_targets(325ctx->blitter,326ctx->num_so_targets,327(struct pipe_stream_output_target **)ctx->so_targets);328util_blitter_save_rasterizer(ctx->blitter, (void *)ctx->rasterizer);329util_blitter_save_viewport(ctx->blitter, &ctx->viewports[0]);330util_blitter_save_scissor(ctx->blitter, &ctx->scissors[0]);331util_blitter_save_fragment_shader(ctx->blitter, ctx->fs);332util_blitter_save_blend(ctx->blitter, (void *)ctx->blend);333util_blitter_save_depth_stencil_alpha(ctx->blitter,334(void *)ctx->depth_stencil);335util_blitter_save_stencil_ref(ctx->blitter, &ctx->stencil_ref);336util_blitter_save_sample_mask(ctx->blitter, ctx->sample_mask);337util_blitter_save_framebuffer(ctx->blitter, &ctx->framebuffer);338util_blitter_save_fragment_sampler_states(339ctx->blitter,340ctx->num_samplers[PIPE_SHADER_FRAGMENT],341(void **)ctx->samplers[PIPE_SHADER_FRAGMENT]);342util_blitter_save_fragment_sampler_views(343ctx->blitter,344ctx->num_sampler_views[PIPE_SHADER_FRAGMENT],345ctx->sampler_views[PIPE_SHADER_FRAGMENT]);346util_blitter_save_render_condition(ctx->blitter,347ctx->render_cond_query,348ctx->render_cond_cond,349ctx->render_cond_mode);350351util_blitter_blit(ctx->blitter, &info);352353if (ctx->active_queries) {354ctx->api.pfnSwrEnableStatsFE(ctx->swrContext, TRUE);355ctx->api.pfnSwrEnableStatsBE(ctx->swrContext, TRUE);356}357}358359360static void361swr_destroy(struct pipe_context *pipe)362{363struct swr_context *ctx = swr_context(pipe);364struct swr_screen *screen = swr_screen(pipe->screen);365366if (ctx->blitter)367util_blitter_destroy(ctx->blitter);368369for (unsigned i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {370if (ctx->framebuffer.cbufs[i]) {371struct swr_resource *res = swr_resource(ctx->framebuffer.cbufs[i]->texture);372/* NULL curr_pipe, so we don't have a reference to a deleted pipe */373res->curr_pipe = NULL;374pipe_surface_reference(&ctx->framebuffer.cbufs[i], NULL);375}376}377378if (ctx->framebuffer.zsbuf) {379struct swr_resource *res = swr_resource(ctx->framebuffer.zsbuf->texture);380/* NULL curr_pipe, so we don't have a reference to a deleted pipe */381res->curr_pipe = NULL;382pipe_surface_reference(&ctx->framebuffer.zsbuf, NULL);383}384385for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {386pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_FRAGMENT][i], NULL);387}388389for (unsigned i = 0; i < ARRAY_SIZE(ctx->sampler_views[0]); i++) {390pipe_sampler_view_reference(&ctx->sampler_views[PIPE_SHADER_VERTEX][i], NULL);391}392393if (ctx->pipe.stream_uploader)394u_upload_destroy(ctx->pipe.stream_uploader);395396/* Idle core after destroying buffer resources, but before deleting397* context. Destroying resources has potentially called StoreTiles.*/398ctx->api.pfnSwrWaitForIdle(ctx->swrContext);399400if (ctx->swrContext)401ctx->api.pfnSwrDestroyContext(ctx->swrContext);402403delete ctx->blendJIT;404405swr_destroy_scratch_buffers(ctx);406407408/* Only update screen->pipe if current context is being destroyed */409assert(screen);410if (screen->pipe == pipe)411screen->pipe = NULL;412413AlignedFree(ctx);414}415416417static void418swr_render_condition(struct pipe_context *pipe,419struct pipe_query *query,420bool condition,421enum pipe_render_cond_flag mode)422{423struct swr_context *ctx = swr_context(pipe);424425ctx->render_cond_query = query;426ctx->render_cond_mode = mode;427ctx->render_cond_cond = condition;428}429430431static void432swr_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)433{434// NOOP435}436437static void438swr_UpdateStats(HANDLE hPrivateContext, const SWR_STATS *pStats)439{440swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;441442if (!pDC)443return;444445struct swr_query_result *pqr = pDC->pStats;446447SWR_STATS *pSwrStats = &pqr->core;448449pSwrStats->DepthPassCount += pStats->DepthPassCount;450pSwrStats->PsInvocations += pStats->PsInvocations;451pSwrStats->CsInvocations += pStats->CsInvocations;452}453454static void455swr_UpdateStatsFE(HANDLE hPrivateContext, const SWR_STATS_FE *pStats)456{457swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;458459if (!pDC)460return;461462struct swr_query_result *pqr = pDC->pStats;463464SWR_STATS_FE *pSwrStats = &pqr->coreFE;465p_atomic_add(&pSwrStats->IaVertices, pStats->IaVertices);466p_atomic_add(&pSwrStats->IaPrimitives, pStats->IaPrimitives);467p_atomic_add(&pSwrStats->VsInvocations, pStats->VsInvocations);468p_atomic_add(&pSwrStats->HsInvocations, pStats->HsInvocations);469p_atomic_add(&pSwrStats->DsInvocations, pStats->DsInvocations);470p_atomic_add(&pSwrStats->GsInvocations, pStats->GsInvocations);471p_atomic_add(&pSwrStats->CInvocations, pStats->CInvocations);472p_atomic_add(&pSwrStats->CPrimitives, pStats->CPrimitives);473p_atomic_add(&pSwrStats->GsPrimitives, pStats->GsPrimitives);474475for (unsigned i = 0; i < 4; i++) {476p_atomic_add(&pSwrStats->SoPrimStorageNeeded[i],477pStats->SoPrimStorageNeeded[i]);478p_atomic_add(&pSwrStats->SoNumPrimsWritten[i],479pStats->SoNumPrimsWritten[i]);480}481}482483static void484swr_UpdateStreamOut(HANDLE hPrivateContext, uint64_t numPrims)485{486swr_draw_context *pDC = (swr_draw_context*)hPrivateContext;487488if (!pDC)489return;490491if (pDC->soPrims)492*pDC->soPrims += numPrims;493}494495struct pipe_context *496swr_create_context(struct pipe_screen *p_screen, void *priv, unsigned flags)497{498struct swr_context *ctx = (struct swr_context *)499AlignedMalloc(sizeof(struct swr_context), KNOB_SIMD_BYTES);500memset((void*)ctx, 0, sizeof(struct swr_context));501502swr_screen(p_screen)->pfnSwrGetInterface(ctx->api);503swr_screen(p_screen)->pfnSwrGetTileInterface(ctx->tileApi);504ctx->swrDC.pAPI = &ctx->api;505ctx->swrDC.pTileAPI = &ctx->tileApi;506507ctx->blendJIT =508new std::unordered_map<BLEND_COMPILE_STATE, PFN_BLEND_JIT_FUNC>;509510ctx->max_draws_in_flight = KNOB_MAX_DRAWS_IN_FLIGHT;511512SWR_CREATECONTEXT_INFO createInfo {0};513514createInfo.privateStateSize = sizeof(swr_draw_context);515createInfo.pfnLoadTile = swr_LoadHotTile;516createInfo.pfnStoreTile = swr_StoreHotTile;517createInfo.pfnUpdateStats = swr_UpdateStats;518createInfo.pfnUpdateStatsFE = swr_UpdateStatsFE;519createInfo.pfnUpdateStreamOut = swr_UpdateStreamOut;520createInfo.pfnMakeGfxPtr = swr_MakeGfxPtr;521522SWR_THREADING_INFO threadingInfo {0};523524threadingInfo.MAX_WORKER_THREADS = KNOB_MAX_WORKER_THREADS;525threadingInfo.MAX_NUMA_NODES = KNOB_MAX_NUMA_NODES;526threadingInfo.MAX_CORES_PER_NUMA_NODE = KNOB_MAX_CORES_PER_NUMA_NODE;527threadingInfo.MAX_THREADS_PER_CORE = KNOB_MAX_THREADS_PER_CORE;528threadingInfo.SINGLE_THREADED = KNOB_SINGLE_THREADED;529530// Use non-standard settings for KNL531if (swr_screen(p_screen)->is_knl)532{533if (nullptr == getenv("KNOB_MAX_THREADS_PER_CORE"))534threadingInfo.MAX_THREADS_PER_CORE = 2;535536if (nullptr == getenv("KNOB_MAX_DRAWS_IN_FLIGHT"))537{538ctx->max_draws_in_flight = 2048;539createInfo.MAX_DRAWS_IN_FLIGHT = ctx->max_draws_in_flight;540}541}542543createInfo.pThreadInfo = &threadingInfo;544545ctx->swrContext = ctx->api.pfnSwrCreateContext(&createInfo);546547ctx->api.pfnSwrInit();548549if (ctx->swrContext == NULL)550goto fail;551552ctx->pipe.screen = p_screen;553ctx->pipe.destroy = swr_destroy;554ctx->pipe.priv = priv;555ctx->pipe.create_surface = swr_create_surface;556ctx->pipe.surface_destroy = swr_surface_destroy;557ctx->pipe.buffer_map = swr_transfer_map;558ctx->pipe.buffer_unmap = swr_transfer_unmap;559ctx->pipe.texture_map = swr_transfer_map;560ctx->pipe.texture_unmap = swr_transfer_unmap;561ctx->pipe.transfer_flush_region = swr_transfer_flush_region;562563ctx->pipe.buffer_subdata = u_default_buffer_subdata;564ctx->pipe.texture_subdata = u_default_texture_subdata;565566ctx->pipe.clear_texture = util_clear_texture;567ctx->pipe.resource_copy_region = swr_resource_copy;568ctx->pipe.flush_resource = swr_flush_resource;569ctx->pipe.render_condition = swr_render_condition;570571swr_state_init(&ctx->pipe);572swr_clear_init(&ctx->pipe);573swr_draw_init(&ctx->pipe);574swr_query_init(&ctx->pipe);575576ctx->pipe.stream_uploader = u_upload_create_default(&ctx->pipe);577if (!ctx->pipe.stream_uploader)578goto fail;579ctx->pipe.const_uploader = ctx->pipe.stream_uploader;580581ctx->pipe.blit = swr_blit;582ctx->blitter = util_blitter_create(&ctx->pipe);583if (!ctx->blitter)584goto fail;585586swr_init_scratch_buffers(ctx);587588return &ctx->pipe;589590fail:591/* Should really validate the init steps and fail gracefully */592swr_destroy(&ctx->pipe);593return NULL;594}595596597