Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_blt.c
4570 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122/* blt command encoding for gen4/5 */23#include "crocus_context.h"2425#include "crocus_genx_macros.h"26#include "crocus_genx_protos.h"27#include "crocus_resource.h"2829#define FILE_DEBUG_FLAG DEBUG_BLIT3031#if GFX_VER <= 53233static uint32_t34color_depth_for_cpp(int cpp)35{36switch (cpp) {37case 4: return COLOR_DEPTH__32bit;38case 2: return COLOR_DEPTH__565;39case 1: return COLOR_DEPTH__8bit;40default:41unreachable("not reached");42}43}4445static void46blt_set_alpha_to_one(struct crocus_batch *batch,47struct crocus_resource *dst,48int x, int y, int width, int height)49{50const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format);51unsigned cpp = fmtl->bpb / 8;52uint32_t pitch = dst->surf.row_pitch_B;5354if (dst->surf.tiling != ISL_TILING_LINEAR)55pitch /= 4;56/* We need to split the blit into chunks that each fit within the blitter's57* restrictions. We can't use a chunk size of 32768 because we need to58* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's59* a nice round power of two, big enough that performance won't suffer, and60* small enough to guarantee everything fits.61*/62const uint32_t max_chunk_size = 16384;6364for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {65for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {66const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);67const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);68uint32_t tile_x, tile_y, offset;69ASSERTED uint32_t z_offset_el, array_offset;70isl_tiling_get_intratile_offset_el(dst->surf.tiling,71cpp * 8, dst->surf.row_pitch_B,72dst->surf.array_pitch_el_rows,73chunk_x, chunk_y, 0, 0,74&offset,75&tile_x, &tile_y,76&z_offset_el, &array_offset);77assert(z_offset_el == 0);78assert(array_offset == 0);79crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) {80xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;81xyblt.ColorDepth = color_depth_for_cpp(cpp);82xyblt.RasterOperation = 0xF0;83xyblt.DestinationPitch = pitch;84xyblt._32bppByteMask = 2;85xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset);86xyblt.DestinationX1Coordinate = tile_x;87xyblt.DestinationY1Coordinate = tile_y;88xyblt.DestinationX2Coordinate = tile_x + chunk_w;89xyblt.DestinationY2Coordinate = tile_y + chunk_h;90xyblt.SolidPatternColor = 0xffffffff;91}92}93}94}9596static bool validate_blit_for_blt(struct crocus_batch *batch,97const struct pipe_blit_info *info)98{99/* If the source and destination are the same size with no mirroring,100* the rectangles are within the size of the texture and there is no101* scissor, then we can probably use the blit engine.102*/103if (info->dst.box.width != info->src.box.width ||104info->dst.box.height != info->src.box.height)105return false;106107if (info->scissor_enable)108return false;109110if (info->dst.box.height < 0 || info->src.box.height < 0)111return false;112113if (info->dst.box.depth > 1 || info->src.box.depth > 1)114return false;115116const struct util_format_description *desc =117util_format_description(info->src.format);118int i = util_format_get_first_non_void_channel(info->src.format);119if (i == -1)120return false;121122/* can't do the alpha to 1 setting for these. */123if ((util_format_has_alpha1(info->src.format) &&124util_format_has_alpha(info->dst.format) &&125desc->channel[i].size > 8))126return false;127return true;128}129130static inline int crocus_resource_blt_pitch(struct crocus_resource *res)131{132int pitch = res->surf.row_pitch_B;133if (res->surf.tiling != ISL_TILING_LINEAR)134pitch /= 4;135return pitch;136}137138139static bool emit_copy_blt(struct crocus_batch *batch,140struct crocus_resource *src,141struct crocus_resource *dst,142unsigned cpp,143int32_t src_pitch,144unsigned src_offset,145int32_t dst_pitch,146unsigned dst_offset,147uint16_t src_x, uint16_t src_y,148uint16_t dst_x, uint16_t dst_y,149uint16_t w, uint16_t h)150151{152uint32_t src_tile_w, src_tile_h;153uint32_t dst_tile_w, dst_tile_h;154int dst_y2 = dst_y + h;155int dst_x2 = dst_x + w;156157DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",158__func__,159src, src_pitch, src_offset, src_x, src_y,160dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);161162isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);163isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);164165/* For Tiled surfaces, the pitch has to be a multiple of the Tile width166* (X direction width of the Tile). This is ensured while allocating the167* buffer object.168*/169assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);170assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);171172/* For big formats (such as floating point), do the copy using 16 or173* 32bpp and multiply the coordinates.174*/175if (cpp > 4) {176if (cpp % 4 == 2) {177dst_x *= cpp / 2;178dst_x2 *= cpp / 2;179src_x *= cpp / 2;180cpp = 2;181} else {182assert(cpp % 4 == 0);183dst_x *= cpp / 4;184dst_x2 *= cpp / 4;185src_x *= cpp / 4;186cpp = 4;187}188}189190/* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop191* the low bits. Offsets must be naturally aligned.192*/193if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||194dst_pitch % 4 != 0 || dst_offset % cpp != 0)195return false;196197/* For tiled source and destination, pitch value should be specified198* as a number of Dwords.199*/200if (dst->surf.tiling != ISL_TILING_LINEAR)201dst_pitch /= 4;202203if (src->surf.tiling != ISL_TILING_LINEAR)204src_pitch /= 4;205206assert(cpp <= 4);207crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {208xyblt.RasterOperation = 0xCC;209xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;210xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;211xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);212xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);213xyblt.ColorDepth = color_depth_for_cpp(cpp);214xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;215xyblt.DestinationX1Coordinate = dst_x;216xyblt.DestinationY1Coordinate = dst_y;217xyblt.DestinationX2Coordinate = dst_x2;218xyblt.DestinationY2Coordinate = dst_y2;219xyblt.DestinationPitch = dst_pitch;220xyblt.SourceX1Coordinate = src_x;221xyblt.SourceY1Coordinate = src_y;222xyblt.SourcePitch = src_pitch;223};224225crocus_emit_mi_flush(batch);226return true;227}228229static bool crocus_emit_blt(struct crocus_batch *batch,230struct crocus_resource *src,231struct crocus_resource *dst,232unsigned dst_level,233unsigned dst_x, unsigned dst_y,234unsigned dst_z,235unsigned src_level,236const struct pipe_box *src_box)237{238const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);239unsigned src_cpp = src_fmtl->bpb / 8;240const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);241const unsigned dst_cpp = dst_fmtl->bpb / 8;242uint16_t src_x, src_y;243uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;244uint32_t src_width = src_box->width, src_height = src_box->height;245246/* gen4/5 can't handle Y tiled blits. */247if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)248return false;249250if (src->surf.format != dst->surf.format)251return false;252253if (src_cpp != dst_cpp)254return false;255256src_x = src_box->x;257src_y = src_box->y;258259assert(src_cpp == dst_cpp);260261crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,262&src_image_y);263if (util_format_is_compressed(src->base.b.format)) {264int bw = util_format_get_blockwidth(src->base.b.format);265int bh = util_format_get_blockheight(src->base.b.format);266assert(src_x % bw == 0);267assert(src_y % bh == 0);268src_x /= (int)bw;269src_y /= (int)bh;270src_width = DIV_ROUND_UP(src_width, (int)bw);271src_height = DIV_ROUND_UP(src_height, (int)bh);272}273274crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,275&dst_image_y);276if (util_format_is_compressed(dst->base.b.format)) {277int bw = util_format_get_blockwidth(dst->base.b.format);278int bh = util_format_get_blockheight(dst->base.b.format);279assert(dst_x % bw == 0);280assert(dst_y % bh == 0);281dst_x /= (int)bw;282dst_y /= (int)bh;283}284src_x += src_image_x;285src_y += src_image_y;286dst_x += dst_image_x;287dst_y += dst_image_y;288289/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics290* Data Size Limitations):291*292* The BLT engine is capable of transferring very large quantities of293* graphics data. Any graphics data read from and written to the294* destination is permitted to represent a number of pixels that295* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line296* at the destination. The maximum number of pixels that may be297* represented per scan line’s worth of graphics data depends on the298* color depth.299*300* The blitter's pitch is a signed 16-bit integer, but measured in bytes301* for linear surfaces and DWords for tiled surfaces. So the maximum302* pitch is 32k linear and 128k tiled.303*/304if (crocus_resource_blt_pitch(src) >= 32768 ||305crocus_resource_blt_pitch(dst) >= 32768) {306return false;307}308309/* We need to split the blit into chunks that each fit within the blitter's310* restrictions. We can't use a chunk size of 32768 because we need to311* ensure that src_tile_x + chunk_size fits. We choose 16384 because it's312* a nice round power of two, big enough that performance won't suffer, and313* small enough to guarantee everything fits.314*/315const uint32_t max_chunk_size = 16384;316317for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {318for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {319const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);320const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);321322ASSERTED uint32_t z_offset_el, array_offset;323uint32_t src_offset, src_tile_x, src_tile_y;324isl_tiling_get_intratile_offset_el(src->surf.tiling,325src_cpp * 8, src->surf.row_pitch_B,326src->surf.array_pitch_el_rows,327src_x + chunk_x, src_y + chunk_y, 0, 0,328&src_offset,329&src_tile_x, &src_tile_y,330&z_offset_el, &array_offset);331assert(z_offset_el == 0);332assert(array_offset == 0);333334uint32_t dst_offset, dst_tile_x, dst_tile_y;335isl_tiling_get_intratile_offset_el(dst->surf.tiling,336dst_cpp * 8, dst->surf.row_pitch_B,337dst->surf.array_pitch_el_rows,338dst_x + chunk_x, dst_y + chunk_y, 0, 0,339&dst_offset,340&dst_tile_x, &dst_tile_y,341&z_offset_el, &array_offset);342assert(z_offset_el == 0);343assert(array_offset == 0);344if (!emit_copy_blt(batch, src, dst,345src_cpp, src->surf.row_pitch_B,346src_offset,347dst->surf.row_pitch_B, dst_offset,348src_tile_x, src_tile_y,349dst_tile_x, dst_tile_y,350chunk_w, chunk_h)) {351return false;352}353}354}355356if (util_format_has_alpha1(src->base.b.format) &&357util_format_has_alpha(dst->base.b.format))358blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height);359return true;360}361362static bool crocus_blit_blt(struct crocus_batch *batch,363const struct pipe_blit_info *info)364{365if (!validate_blit_for_blt(batch, info))366return false;367368return crocus_emit_blt(batch,369(struct crocus_resource *)info->src.resource,370(struct crocus_resource *)info->dst.resource,371info->dst.level,372info->dst.box.x,373info->dst.box.y,374info->dst.box.z,375info->src.level,376&info->src.box);377}378379380static bool crocus_copy_region_blt(struct crocus_batch *batch,381struct crocus_resource *dst,382unsigned dst_level,383unsigned dstx, unsigned dsty, unsigned dstz,384struct crocus_resource *src,385unsigned src_level,386const struct pipe_box *src_box)387{388if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER)389return false;390return crocus_emit_blt(batch,391src,392dst,393dst_level,394dstx, dsty, dstz,395src_level,396src_box);397}398#endif399400void401genX(crocus_init_blt)(struct crocus_screen *screen)402{403#if GFX_VER <= 5404screen->vtbl.blit_blt = crocus_blit_blt;405screen->vtbl.copy_region_blt = crocus_copy_region_blt;406#else407screen->vtbl.blit_blt = NULL;408screen->vtbl.copy_region_blt = NULL;409#endif410}411412413