Path: blob/21.2-virgl/src/gallium/drivers/freedreno/a5xx/fd5_blitter.c
4574 views
/*1* Copyright (C) 2017 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "freedreno_blitter.h"27#include "freedreno_resource.h"2829#include "fd5_blitter.h"30#include "fd5_emit.h"31#include "fd5_format.h"3233/* Make sure none of the requested dimensions extend beyond the size of the34* resource. Not entirely sure why this happens, but sometimes it does, and35* w/ 2d blt doesn't have wrap modes like a sampler, so force those cases36* back to u_blitter37*/38static bool39ok_dims(const struct pipe_resource *r, const struct pipe_box *b, int lvl)40{41return (b->x >= 0) && (b->x + b->width <= u_minify(r->width0, lvl)) &&42(b->y >= 0) && (b->y + b->height <= u_minify(r->height0, lvl)) &&43(b->z >= 0) && (b->z + b->depth <= u_minify(r->depth0, lvl));44}4546/* Not sure if format restrictions differ for src and dst, or if47* they only matter when src fmt != dst fmt.. but there appear to48* be *some* limitations so let's just start rejecting stuff that49* piglit complains about50*/51static bool52ok_format(enum pipe_format fmt)53{54if (util_format_is_compressed(fmt))55return false;5657switch (fmt) {58case PIPE_FORMAT_R10G10B10A2_SSCALED:59case PIPE_FORMAT_R10G10B10A2_SNORM:60case PIPE_FORMAT_B10G10R10A2_USCALED:61case PIPE_FORMAT_B10G10R10A2_SSCALED:62case PIPE_FORMAT_B10G10R10A2_SNORM:63case PIPE_FORMAT_R10G10B10A2_UNORM:64case PIPE_FORMAT_R10G10B10A2_USCALED:65case PIPE_FORMAT_B10G10R10A2_UNORM:66case PIPE_FORMAT_R10SG10SB10SA2U_NORM:67case PIPE_FORMAT_B10G10R10A2_UINT:68case PIPE_FORMAT_R10G10B10A2_UINT:69return false;70default:71break;72}7374if (fd5_pipe2color(fmt) == RB5_NONE)75return false;7677return true;78}7980static bool81can_do_blit(const struct pipe_blit_info *info)82{83/* I think we can do scaling, but not in z dimension since that would84* require blending..85*/86if (info->dst.box.depth != info->src.box.depth)87return false;8889if (!ok_format(info->dst.format))90return false;9192if (!ok_format(info->src.format))93return false;9495/* hw ignores {SRC,DST}_INFO.COLOR_SWAP if {SRC,DST}_INFO.TILE_MODE96* is set (not linear). We can kind of get around that when tiling/97* untiling by setting both src and dst COLOR_SWAP=WZYX, but that98* means the formats must match:99*/100if ((fd_resource(info->dst.resource)->layout.tile_mode ||101fd_resource(info->src.resource)->layout.tile_mode) &&102info->dst.format != info->src.format)103return false;104105/* until we figure out a few more registers: */106if ((info->dst.box.width != info->src.box.width) ||107(info->dst.box.height != info->src.box.height))108return false;109110/* src box can be inverted, which we don't support.. dst box cannot: */111if ((info->src.box.width < 0) || (info->src.box.height < 0))112return false;113114if (!ok_dims(info->src.resource, &info->src.box, info->src.level))115return false;116117if (!ok_dims(info->dst.resource, &info->dst.box, info->dst.level))118return false;119120debug_assert(info->dst.box.width >= 0);121debug_assert(info->dst.box.height >= 0);122debug_assert(info->dst.box.depth >= 0);123124if ((info->dst.resource->nr_samples > 1) ||125(info->src.resource->nr_samples > 1))126return false;127128if (info->scissor_enable)129return false;130131if (info->window_rectangle_include)132return false;133134if (info->render_condition_enable)135return false;136137if (info->alpha_blend)138return false;139140if (info->filter != PIPE_TEX_FILTER_NEAREST)141return false;142143if (info->mask != util_format_get_mask(info->src.format))144return false;145146if (info->mask != util_format_get_mask(info->dst.format))147return false;148149return true;150}151152static void153emit_setup(struct fd_ringbuffer *ring)154{155OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1);156OUT_RING(ring, 0x00000008);157158OUT_PKT4(ring, REG_A5XX_UNKNOWN_2100, 1);159OUT_RING(ring, 0x86000000); /* UNKNOWN_2100 */160161OUT_PKT4(ring, REG_A5XX_UNKNOWN_2180, 1);162OUT_RING(ring, 0x86000000); /* UNKNOWN_2180 */163164OUT_PKT4(ring, REG_A5XX_UNKNOWN_2184, 1);165OUT_RING(ring, 0x00000009); /* UNKNOWN_2184 */166167OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1);168OUT_RING(ring, A5XX_RB_CNTL_BYPASS);169170OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1);171OUT_RING(ring, 0x00000004); /* RB_MODE_CNTL */172173OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1);174OUT_RING(ring, 0x0000000c); /* SP_MODE_CNTL */175176OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1);177OUT_RING(ring, 0x00000344); /* TPL1_MODE_CNTL */178179OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1);180OUT_RING(ring, 0x00000002); /* HLSQ_MODE_CNTL */181182OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1);183OUT_RING(ring, 0x00000181); /* GRAS_CL_CNTL */184}185186/* buffers need to be handled specially since x/width can exceed the bounds187* supported by hw.. if necessary decompose into (potentially) two 2D blits188*/189static void190emit_blit_buffer(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)191{192const struct pipe_box *sbox = &info->src.box;193const struct pipe_box *dbox = &info->dst.box;194struct fd_resource *src, *dst;195unsigned sshift, dshift;196197src = fd_resource(info->src.resource);198dst = fd_resource(info->dst.resource);199200debug_assert(src->layout.cpp == 1);201debug_assert(dst->layout.cpp == 1);202debug_assert(info->src.resource->format == info->dst.resource->format);203debug_assert((sbox->y == 0) && (sbox->height == 1));204debug_assert((dbox->y == 0) && (dbox->height == 1));205debug_assert((sbox->z == 0) && (sbox->depth == 1));206debug_assert((dbox->z == 0) && (dbox->depth == 1));207debug_assert(sbox->width == dbox->width);208debug_assert(info->src.level == 0);209debug_assert(info->dst.level == 0);210211/*212* Buffers can have dimensions bigger than max width, remap into213* multiple 1d blits to fit within max dimension214*215* Note that blob uses .ARRAY_PITCH=128 for blitting buffers, which216* seems to prevent overfetch related faults. Not quite sure what217* the deal is there.218*219* Low 6 bits of SRC/DST addresses need to be zero (ie. address220* aligned to 64) so we need to shift src/dst x1/x2 to make up the221* difference. On top of already splitting up the blit so width222* isn't > 16k.223*224* We perhaps could do a bit better, if src and dst are aligned but225* in the worst case this means we have to split the copy up into226* 16k (0x4000) minus 64 (0x40).227*/228229sshift = sbox->x & 0x3f;230dshift = dbox->x & 0x3f;231232for (unsigned off = 0; off < sbox->width; off += (0x4000 - 0x40)) {233unsigned soff, doff, w, p;234235soff = (sbox->x + off) & ~0x3f;236doff = (dbox->x + off) & ~0x3f;237238w = MIN2(sbox->width - off, (0x4000 - 0x40));239p = align(w, 64);240241debug_assert((soff + w) <= fd_bo_size(src->bo));242debug_assert((doff + w) <= fd_bo_size(dst->bo));243244OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);245OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));246247/*248* Emit source:249*/250OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);251OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |252A5XX_RB_2D_SRC_INFO_TILE_MODE(TILE5_LINEAR) |253A5XX_RB_2D_SRC_INFO_COLOR_SWAP(WZYX));254OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */255OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(p) |256A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(128));257OUT_RING(ring, 0x00000000);258OUT_RING(ring, 0x00000000);259OUT_RING(ring, 0x00000000);260OUT_RING(ring, 0x00000000);261OUT_RING(ring, 0x00000000);262263OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);264OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(RB5_R8_UNORM) |265A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(WZYX));266267/*268* Emit destination:269*/270OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);271OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |272A5XX_RB_2D_DST_INFO_TILE_MODE(TILE5_LINEAR) |273A5XX_RB_2D_DST_INFO_COLOR_SWAP(WZYX));274OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */275OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(p) |276A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(128));277OUT_RING(ring, 0x00000000);278OUT_RING(ring, 0x00000000);279OUT_RING(ring, 0x00000000);280OUT_RING(ring, 0x00000000);281OUT_RING(ring, 0x00000000);282283OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);284OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(RB5_R8_UNORM) |285A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(WZYX));286287/*288* Blit command:289*/290OUT_PKT7(ring, CP_BLIT, 5);291OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));292OUT_RING(ring, CP_BLIT_1_SRC_X1(sshift) | CP_BLIT_1_SRC_Y1(0));293OUT_RING(ring, CP_BLIT_2_SRC_X2(sshift + w - 1) | CP_BLIT_2_SRC_Y2(0));294OUT_RING(ring, CP_BLIT_3_DST_X1(dshift) | CP_BLIT_3_DST_Y1(0));295OUT_RING(ring, CP_BLIT_4_DST_X2(dshift + w - 1) | CP_BLIT_4_DST_Y2(0));296297OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);298OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));299300OUT_WFI5(ring);301}302}303304static void305emit_blit(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)306{307const struct pipe_box *sbox = &info->src.box;308const struct pipe_box *dbox = &info->dst.box;309struct fd_resource *src, *dst;310struct fdl_slice *sslice, *dslice;311enum a5xx_color_fmt sfmt, dfmt;312enum a5xx_tile_mode stile, dtile;313enum a3xx_color_swap sswap, dswap;314unsigned ssize, dsize, spitch, dpitch;315unsigned sx1, sy1, sx2, sy2;316unsigned dx1, dy1, dx2, dy2;317318src = fd_resource(info->src.resource);319dst = fd_resource(info->dst.resource);320321sslice = fd_resource_slice(src, info->src.level);322dslice = fd_resource_slice(dst, info->dst.level);323324sfmt = fd5_pipe2color(info->src.format);325dfmt = fd5_pipe2color(info->dst.format);326327stile = fd_resource_tile_mode(info->src.resource, info->src.level);328dtile = fd_resource_tile_mode(info->dst.resource, info->dst.level);329330sswap = fd5_pipe2swap(info->src.format);331dswap = fd5_pipe2swap(info->dst.format);332333spitch = fd_resource_pitch(src, info->src.level);334dpitch = fd_resource_pitch(dst, info->dst.level);335336/* if dtile, then dswap ignored by hw, and likewise if stile then sswap337* ignored by hw.. but in this case we have already rejected the blit338* if src and dst formats differ, so juse use WZYX for both src and339* dst swap mode (so we don't change component order)340*/341if (stile || dtile) {342debug_assert(info->src.format == info->dst.format);343sswap = dswap = WZYX;344}345346sx1 = sbox->x;347sy1 = sbox->y;348sx2 = sbox->x + sbox->width - 1;349sy2 = sbox->y + sbox->height - 1;350351dx1 = dbox->x;352dy1 = dbox->y;353dx2 = dbox->x + dbox->width - 1;354dy2 = dbox->y + dbox->height - 1;355356if (info->src.resource->target == PIPE_TEXTURE_3D)357ssize = sslice->size0;358else359ssize = src->layout.layer_size;360361if (info->dst.resource->target == PIPE_TEXTURE_3D)362dsize = dslice->size0;363else364dsize = dst->layout.layer_size;365366for (unsigned i = 0; i < info->dst.box.depth; i++) {367unsigned soff = fd_resource_offset(src, info->src.level, sbox->z + i);368unsigned doff = fd_resource_offset(dst, info->dst.level, dbox->z + i);369370debug_assert((soff + (sbox->height * spitch)) <= fd_bo_size(src->bo));371debug_assert((doff + (dbox->height * dpitch)) <= fd_bo_size(dst->bo));372373OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);374OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(BLIT2D));375376/*377* Emit source:378*/379OUT_PKT4(ring, REG_A5XX_RB_2D_SRC_INFO, 9);380OUT_RING(ring, A5XX_RB_2D_SRC_INFO_COLOR_FORMAT(sfmt) |381A5XX_RB_2D_SRC_INFO_TILE_MODE(stile) |382A5XX_RB_2D_SRC_INFO_COLOR_SWAP(sswap));383OUT_RELOC(ring, src->bo, soff, 0, 0); /* RB_2D_SRC_LO/HI */384OUT_RING(ring, A5XX_RB_2D_SRC_SIZE_PITCH(spitch) |385A5XX_RB_2D_SRC_SIZE_ARRAY_PITCH(ssize));386OUT_RING(ring, 0x00000000);387OUT_RING(ring, 0x00000000);388OUT_RING(ring, 0x00000000);389OUT_RING(ring, 0x00000000);390OUT_RING(ring, 0x00000000);391392OUT_PKT4(ring, REG_A5XX_GRAS_2D_SRC_INFO, 1);393OUT_RING(ring, A5XX_GRAS_2D_SRC_INFO_COLOR_FORMAT(sfmt) |394A5XX_GRAS_2D_SRC_INFO_TILE_MODE(stile) |395A5XX_GRAS_2D_SRC_INFO_COLOR_SWAP(sswap));396397/*398* Emit destination:399*/400OUT_PKT4(ring, REG_A5XX_RB_2D_DST_INFO, 9);401OUT_RING(ring, A5XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |402A5XX_RB_2D_DST_INFO_TILE_MODE(dtile) |403A5XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));404OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */405OUT_RING(ring, A5XX_RB_2D_DST_SIZE_PITCH(dpitch) |406A5XX_RB_2D_DST_SIZE_ARRAY_PITCH(dsize));407OUT_RING(ring, 0x00000000);408OUT_RING(ring, 0x00000000);409OUT_RING(ring, 0x00000000);410OUT_RING(ring, 0x00000000);411OUT_RING(ring, 0x00000000);412413OUT_PKT4(ring, REG_A5XX_GRAS_2D_DST_INFO, 1);414OUT_RING(ring, A5XX_GRAS_2D_DST_INFO_COLOR_FORMAT(dfmt) |415A5XX_GRAS_2D_DST_INFO_TILE_MODE(dtile) |416A5XX_GRAS_2D_DST_INFO_COLOR_SWAP(dswap));417418/*419* Blit command:420*/421OUT_PKT7(ring, CP_BLIT, 5);422OUT_RING(ring, CP_BLIT_0_OP(BLIT_OP_COPY));423OUT_RING(ring, CP_BLIT_1_SRC_X1(sx1) | CP_BLIT_1_SRC_Y1(sy1));424OUT_RING(ring, CP_BLIT_2_SRC_X2(sx2) | CP_BLIT_2_SRC_Y2(sy2));425OUT_RING(ring, CP_BLIT_3_DST_X1(dx1) | CP_BLIT_3_DST_Y1(dy1));426OUT_RING(ring, CP_BLIT_4_DST_X2(dx2) | CP_BLIT_4_DST_Y2(dy2));427428OUT_PKT7(ring, CP_SET_RENDER_MODE, 1);429OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(END2D));430}431}432433bool434fd5_blitter_blit(struct fd_context *ctx,435const struct pipe_blit_info *info) assert_dt436{437struct fd_batch *batch;438439if (!can_do_blit(info)) {440return false;441}442443struct fd_resource *src = fd_resource(info->src.resource);444struct fd_resource *dst = fd_resource(info->dst.resource);445446batch = fd_bc_alloc_batch(ctx, true);447448fd_screen_lock(ctx->screen);449450fd_batch_resource_read(batch, src);451fd_batch_resource_write(batch, dst);452453fd_screen_unlock(ctx->screen);454455DBG_BLIT(info, batch);456457fd_batch_update_queries(batch);458459emit_setup(batch->draw);460461if ((info->src.resource->target == PIPE_BUFFER) &&462(info->dst.resource->target == PIPE_BUFFER)) {463assert(fd_resource(info->src.resource)->layout.tile_mode == TILE5_LINEAR);464assert(fd_resource(info->dst.resource)->layout.tile_mode == TILE5_LINEAR);465emit_blit_buffer(batch->draw, info);466} else {467/* I don't *think* we need to handle blits between buffer <-> !buffer */468debug_assert(info->src.resource->target != PIPE_BUFFER);469debug_assert(info->dst.resource->target != PIPE_BUFFER);470emit_blit(batch->draw, info);471}472473fd_batch_needs_flush(batch);474475fd_batch_flush(batch);476fd_batch_reference(&batch, NULL);477478/* Acc query state will have been dirtied by our fd_batch_update_queries, so479* the ctx->batch may need to turn its queries back on.480*/481ctx->update_active_queries = true;482483return true;484}485486unsigned487fd5_tile_mode(const struct pipe_resource *tmpl)488{489/* basically just has to be a format we can blit, so uploads/downloads490* via linear staging buffer works:491*/492if (ok_format(tmpl->format))493return TILE5_3;494495return TILE5_LINEAR;496}497498499