Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_blit.c
4570 views
/*1* Copyright © 2017 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122#include <stdio.h>23#include "pipe/p_defines.h"24#include "pipe/p_state.h"25#include "pipe/p_context.h"26#include "pipe/p_screen.h"27#include "util/format/u_format.h"28#include "util/u_inlines.h"29#include "util/u_surface.h"30#include "util/ralloc.h"31#include "intel/blorp/blorp.h"32#include "crocus_context.h"33#include "crocus_resource.h"34#include "crocus_screen.h"3536void crocus_blitter_begin(struct crocus_context *ice, enum crocus_blitter_op op, bool render_cond)37{38util_blitter_save_vertex_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_VERTEX]);39util_blitter_save_tessctrl_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_CTRL]);40util_blitter_save_tesseval_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL]);41util_blitter_save_geometry_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_GEOMETRY]);42util_blitter_save_so_targets(ice->blitter, ice->state.so_targets,43(struct pipe_stream_output_target**)ice->state.so_target);44util_blitter_save_vertex_buffer_slot(ice->blitter, ice->state.vertex_buffers);45util_blitter_save_vertex_elements(ice->blitter, (void *)ice->state.cso_vertex_elements);46if (op & CROCUS_SAVE_FRAGMENT_STATE) {47util_blitter_save_blend(ice->blitter, ice->state.cso_blend);48util_blitter_save_depth_stencil_alpha(ice->blitter, ice->state.cso_zsa);49util_blitter_save_stencil_ref(ice->blitter, &ice->state.stencil_ref);50util_blitter_save_fragment_shader(ice->blitter, ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]);51util_blitter_save_sample_mask(ice->blitter, ice->state.sample_mask);52util_blitter_save_rasterizer(ice->blitter, ice->state.cso_rast);53util_blitter_save_scissor(ice->blitter, &ice->state.scissors[0]);54util_blitter_save_viewport(ice->blitter, &ice->state.viewports[0]);55util_blitter_save_fragment_constant_buffer_slot(ice->blitter, &ice->state.shaders[MESA_SHADER_FRAGMENT].constbufs[0]);56}5758if (!render_cond)59util_blitter_save_render_condition(ice->blitter,60(struct pipe_query *)ice->condition.query,61ice->condition.condition,62ice->condition.mode);6364// util_blitter_save_scissor(ice->blitter, &ice->scissors[0]);65if (op & CROCUS_SAVE_FRAMEBUFFER)66util_blitter_save_framebuffer(ice->blitter, &ice->state.framebuffer);6768if (op & CROCUS_SAVE_TEXTURES) {69util_blitter_save_fragment_sampler_states(ice->blitter, 1, (void **)ice->state.shaders[MESA_SHADER_FRAGMENT].samplers);70util_blitter_save_fragment_sampler_views(ice->blitter, 1, (struct pipe_sampler_view **)ice->state.shaders[MESA_SHADER_FRAGMENT].textures);71}72}7374/**75* Helper function for handling mirror image blits.76*77* If coord0 > coord1, swap them and return "true" (mirrored).78*/79static bool80apply_mirror(float *coord0, float *coord1)81{82if (*coord0 > *coord1) {83float tmp = *coord0;84*coord0 = *coord1;85*coord1 = tmp;86return true;87}88return false;89}9091/**92* Compute the number of pixels to clip for each side of a rect93*94* \param x0 The rect's left coordinate95* \param y0 The rect's bottom coordinate96* \param x1 The rect's right coordinate97* \param y1 The rect's top coordinate98* \param min_x The clipping region's left coordinate99* \param min_y The clipping region's bottom coordinate100* \param max_x The clipping region's right coordinate101* \param max_y The clipping region's top coordinate102* \param clipped_x0 The number of pixels to clip from the left side103* \param clipped_y0 The number of pixels to clip from the bottom side104* \param clipped_x1 The number of pixels to clip from the right side105* \param clipped_y1 The number of pixels to clip from the top side106*107* \return false if we clip everything away, true otherwise108*/109static inline bool110compute_pixels_clipped(float x0, float y0, float x1, float y1,111float min_x, float min_y, float max_x, float max_y,112float *clipped_x0, float *clipped_y0,113float *clipped_x1, float *clipped_y1)114{115/* If we are going to clip everything away, stop. */116if (!(min_x <= max_x &&117min_y <= max_y &&118x0 <= max_x &&119y0 <= max_y &&120min_x <= x1 &&121min_y <= y1 &&122x0 <= x1 &&123y0 <= y1)) {124return false;125}126127if (x0 < min_x)128*clipped_x0 = min_x - x0;129else130*clipped_x0 = 0;131if (max_x < x1)132*clipped_x1 = x1 - max_x;133else134*clipped_x1 = 0;135136if (y0 < min_y)137*clipped_y0 = min_y - y0;138else139*clipped_y0 = 0;140if (max_y < y1)141*clipped_y1 = y1 - max_y;142else143*clipped_y1 = 0;144145return true;146}147148/**149* Clips a coordinate (left, right, top or bottom) for the src or dst rect150* (whichever requires the largest clip) and adjusts the coordinate151* for the other rect accordingly.152*153* \param mirror true if mirroring is required154* \param src the source rect coordinate (for example src_x0)155* \param dst0 the dst rect coordinate (for example dst_x0)156* \param dst1 the opposite dst rect coordinate (for example dst_x1)157* \param clipped_dst0 number of pixels to clip from the dst coordinate158* \param clipped_dst1 number of pixels to clip from the opposite dst coordinate159* \param scale the src vs dst scale involved for that coordinate160* \param is_left_or_bottom true if we are clipping the left or bottom sides161* of the rect.162*/163static void164clip_coordinates(bool mirror,165float *src, float *dst0, float *dst1,166float clipped_dst0,167float clipped_dst1,168float scale,169bool is_left_or_bottom)170{171/* When clipping we need to add or subtract pixels from the original172* coordinates depending on whether we are acting on the left/bottom173* or right/top sides of the rect respectively. We assume we have to174* add them in the code below, and multiply by -1 when we should175* subtract.176*/177int mult = is_left_or_bottom ? 1 : -1;178179if (!mirror) {180*dst0 += clipped_dst0 * mult;181*src += clipped_dst0 * scale * mult;182} else {183*dst1 -= clipped_dst1 * mult;184*src += clipped_dst1 * scale * mult;185}186}187188/**189* Apply a scissor rectangle to blit coordinates.190*191* Returns true if the blit was entirely scissored away.192*/193static bool194apply_blit_scissor(const struct pipe_scissor_state *scissor,195float *src_x0, float *src_y0,196float *src_x1, float *src_y1,197float *dst_x0, float *dst_y0,198float *dst_x1, float *dst_y1,199bool mirror_x, bool mirror_y)200{201float clip_dst_x0, clip_dst_x1, clip_dst_y0, clip_dst_y1;202203/* Compute number of pixels to scissor away. */204if (!compute_pixels_clipped(*dst_x0, *dst_y0, *dst_x1, *dst_y1,205scissor->minx, scissor->miny,206scissor->maxx, scissor->maxy,207&clip_dst_x0, &clip_dst_y0,208&clip_dst_x1, &clip_dst_y1))209return true;210211// XXX: comments assume source clipping, which we don't do212213/* When clipping any of the two rects we need to adjust the coordinates214* in the other rect considering the scaling factor involved. To obtain215* the best precision we want to make sure that we only clip once per216* side to avoid accumulating errors due to the scaling adjustment.217*218* For example, if src_x0 and dst_x0 need both to be clipped we want to219* avoid the situation where we clip src_x0 first, then adjust dst_x0220* accordingly but then we realize that the resulting dst_x0 still needs221* to be clipped, so we clip dst_x0 and adjust src_x0 again. Because we are222* applying scaling factors to adjust the coordinates in each clipping223* pass we lose some precision and that can affect the results of the224* blorp blit operation slightly. What we want to do here is detect the225* rect that we should clip first for each side so that when we adjust226* the other rect we ensure the resulting coordinate does not need to be227* clipped again.228*229* The code below implements this by comparing the number of pixels that230* we need to clip for each side of both rects considering the scales231* involved. For example, clip_src_x0 represents the number of pixels232* to be clipped for the src rect's left side, so if clip_src_x0 = 5,233* clip_dst_x0 = 4 and scale_x = 2 it means that we are clipping more234* from the dst rect so we should clip dst_x0 only and adjust src_x0.235* This is because clipping 4 pixels in the dst is equivalent to236* clipping 4 * 2 = 8 > 5 in the src.237*/238239if (*src_x0 == *src_x1 || *src_y0 == *src_y1240|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1)241return true;242243float scale_x = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);244float scale_y = (float) (*src_y1 - *src_y0) / (*dst_y1 - *dst_y0);245246/* Clip left side */247clip_coordinates(mirror_x, src_x0, dst_x0, dst_x1,248clip_dst_x0, clip_dst_x1, scale_x, true);249250/* Clip right side */251clip_coordinates(mirror_x, src_x1, dst_x1, dst_x0,252clip_dst_x1, clip_dst_x0, scale_x, false);253254/* Clip bottom side */255clip_coordinates(mirror_y, src_y0, dst_y0, dst_y1,256clip_dst_y0, clip_dst_y1, scale_y, true);257258/* Clip top side */259clip_coordinates(mirror_y, src_y1, dst_y1, dst_y0,260clip_dst_y1, clip_dst_y0, scale_y, false);261262/* Check for invalid bounds263* Can't blit for 0-dimensions264*/265return *src_x0 == *src_x1 || *src_y0 == *src_y1266|| *dst_x0 == *dst_x1 || *dst_y0 == *dst_y1;267}268269void270crocus_blorp_surf_for_resource(struct crocus_vtable *vtbl,271struct isl_device *isl_dev,272struct blorp_surf *surf,273struct pipe_resource *p_res,274enum isl_aux_usage aux_usage,275unsigned level,276bool is_render_target)277{278struct crocus_resource *res = (void *) p_res;279280assert(!crocus_resource_unfinished_aux_import(res));281282if (isl_aux_usage_has_hiz(aux_usage) &&283!crocus_resource_level_has_hiz(res, level))284aux_usage = ISL_AUX_USAGE_NONE;285286*surf = (struct blorp_surf) {287.surf = &res->surf,288.addr = (struct blorp_address) {289.buffer = res->bo,290.offset = res->offset,291.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,292.mocs = crocus_mocs(res->bo, isl_dev),293},294.aux_usage = aux_usage,295};296297if (aux_usage != ISL_AUX_USAGE_NONE) {298surf->aux_surf = &res->aux.surf;299surf->aux_addr = (struct blorp_address) {300.buffer = res->aux.bo,301.offset = res->aux.offset,302.reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,303.mocs = crocus_mocs(res->bo, isl_dev),304};305surf->clear_color =306crocus_resource_get_clear_color(res);307}308}309310static void311tex_cache_flush_hack(struct crocus_batch *batch,312enum isl_format view_format,313enum isl_format surf_format)314{315/* The WaSamplerCacheFlushBetweenRedescribedSurfaceReads workaround says:316*317* "Currently Sampler assumes that a surface would not have two318* different format associate with it. It will not properly cache319* the different views in the MT cache, causing a data corruption."320*321* We may need to handle this for texture views in general someday, but322* for now we handle it here, as it hurts copies and blits particularly323* badly because they ofter reinterpret formats.324*325* If the BO hasn't been referenced yet this batch, we assume that the326* texture cache doesn't contain any relevant data nor need flushing.327*328* Icelake (Gen11+) claims to fix this issue, but seems to still have329* issues with ASTC formats.330*/331bool need_flush = view_format != surf_format;332if (!need_flush)333return;334335const char *reason =336"workaround: WaSamplerCacheFlushBetweenRedescribedSurfaceReads";337338crocus_emit_pipe_control_flush(batch, reason, PIPE_CONTROL_CS_STALL);339crocus_emit_pipe_control_flush(batch, reason,340PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);341}342343static struct crocus_resource *344crocus_resource_for_aspect(const struct intel_device_info *devinfo,345struct pipe_resource *p_res, unsigned pipe_mask)346{347if (pipe_mask == PIPE_MASK_S) {348struct crocus_resource *junk, *s_res;349crocus_get_depth_stencil_resources(devinfo, p_res, &junk, &s_res);350return s_res;351} else {352return (struct crocus_resource *)p_res;353}354}355356static enum pipe_format357pipe_format_for_aspect(enum pipe_format format, unsigned pipe_mask)358{359if (pipe_mask == PIPE_MASK_S) {360return util_format_stencil_only(format);361} else if (pipe_mask == PIPE_MASK_Z) {362return util_format_get_depth_only(format);363} else {364return format;365}366}367368static void369crocus_u_blitter(struct crocus_context *ice,370const struct pipe_blit_info *info)371{372struct pipe_blit_info dinfo = *info;373if (!util_format_has_alpha(dinfo.dst.resource->format))374dinfo.mask &= ~PIPE_MASK_A;375crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);376util_blitter_blit(ice->blitter, &dinfo);377}378379/**380* The pipe->blit() driver hook.381*382* This performs a blit between two surfaces, which copies data but may383* also perform format conversion, scaling, flipping, and so on.384*/385static void386crocus_blit(struct pipe_context *ctx, const struct pipe_blit_info *info)387{388struct crocus_context *ice = (void *) ctx;389struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;390const struct intel_device_info *devinfo = &screen->devinfo;391struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];392enum blorp_batch_flags blorp_flags = 0;393394/* We don't support color masking. */395assert((info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA ||396(info->mask & PIPE_MASK_RGBA) == 0);397398if (info->render_condition_enable)399if (!crocus_check_conditional_render(ice))400return;401402if (devinfo->ver <= 5) {403if (!screen->vtbl.blit_blt(batch, info)) {404405if (!util_format_is_depth_or_stencil(info->src.resource->format) &&406info->dst.resource->target != PIPE_TEXTURE_3D)407goto use_blorp;408409if (!util_blitter_is_blit_supported(ice->blitter, info)) {410if (util_format_is_depth_or_stencil(info->src.resource->format)) {411412struct pipe_blit_info depth_blit = *info;413depth_blit.mask = PIPE_MASK_Z;414crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);415util_blitter_blit(ice->blitter, &depth_blit);416417struct pipe_surface *dst_view, dst_templ;418util_blitter_default_dst_texture(&dst_templ, info->dst.resource, info->dst.level, info->dst.box.z);419dst_view = ctx->create_surface(ctx, info->dst.resource, &dst_templ);420421crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);422423util_blitter_clear_depth_stencil(ice->blitter, dst_view, PIPE_CLEAR_STENCIL,4240, 0, info->dst.box.x, info->dst.box.y,425info->dst.box.width, info->dst.box.height);426crocus_blitter_begin(ice, CROCUS_SAVE_FRAMEBUFFER | CROCUS_SAVE_TEXTURES | CROCUS_SAVE_FRAGMENT_STATE, info->render_condition_enable);427util_blitter_stencil_fallback(ice->blitter,428info->dst.resource,429info->dst.level,430&info->dst.box,431info->src.resource,432info->src.level,433&info->src.box, NULL);434435}436return;437}438439crocus_u_blitter(ice, info);440}441return;442}443444if (devinfo->ver == 6) {445if (info->src.resource->target == PIPE_TEXTURE_3D &&446info->dst.resource->target == PIPE_TEXTURE_3D) {447crocus_u_blitter(ice, info);448return;449}450}451452use_blorp:453if (info->render_condition_enable) {454if (ice->state.predicate == CROCUS_PREDICATE_STATE_USE_BIT)455blorp_flags |= BLORP_BATCH_PREDICATE_ENABLE;456}457458float src_x0 = info->src.box.x;459float src_x1 = info->src.box.x + info->src.box.width;460float src_y0 = info->src.box.y;461float src_y1 = info->src.box.y + info->src.box.height;462float dst_x0 = info->dst.box.x;463float dst_x1 = info->dst.box.x + info->dst.box.width;464float dst_y0 = info->dst.box.y;465float dst_y1 = info->dst.box.y + info->dst.box.height;466bool mirror_x = apply_mirror(&src_x0, &src_x1);467bool mirror_y = apply_mirror(&src_y0, &src_y1);468enum blorp_filter filter;469470if (info->scissor_enable) {471bool noop = apply_blit_scissor(&info->scissor,472&src_x0, &src_y0, &src_x1, &src_y1,473&dst_x0, &dst_y0, &dst_x1, &dst_y1,474mirror_x, mirror_y);475if (noop)476return;477}478479if (abs(info->dst.box.width) == abs(info->src.box.width) &&480abs(info->dst.box.height) == abs(info->src.box.height)) {481if (info->src.resource->nr_samples > 1 &&482info->dst.resource->nr_samples <= 1) {483/* The OpenGL ES 3.2 specification, section 16.2.1, says:484*485* "If the read framebuffer is multisampled (its effective486* value of SAMPLE_BUFFERS is one) and the draw framebuffer487* is not (its value of SAMPLE_BUFFERS is zero), the samples488* corresponding to each pixel location in the source are489* converted to a single sample before being written to the490* destination. The filter parameter is ignored. If the491* source formats are integer types or stencil values, a492* single sample’s value is selected for each pixel. If the493* source formats are floating-point or normalized types,494* the sample values for each pixel are resolved in an495* implementation-dependent manner. If the source formats496* are depth values, sample values are resolved in an497* implementation-dependent manner where the result will be498* between the minimum and maximum depth values in the pixel."499*500* When selecting a single sample, we always choose sample 0.501*/502if (util_format_is_depth_or_stencil(info->src.format) ||503util_format_is_pure_integer(info->src.format)) {504filter = BLORP_FILTER_SAMPLE_0;505} else {506filter = BLORP_FILTER_AVERAGE;507}508} else {509/* The OpenGL 4.6 specification, section 18.3.1, says:510*511* "If the source and destination dimensions are identical,512* no filtering is applied."513*514* Using BLORP_FILTER_NONE will also handle the upsample case by515* replicating the one value in the source to all values in the516* destination.517*/518filter = BLORP_FILTER_NONE;519}520} else if (info->filter == PIPE_TEX_FILTER_LINEAR) {521filter = BLORP_FILTER_BILINEAR;522} else {523filter = BLORP_FILTER_NEAREST;524}525526struct blorp_batch blorp_batch;527blorp_batch_init(&ice->blorp, &blorp_batch, batch, blorp_flags);528529float src_z_step = (float)info->src.box.depth / (float)info->dst.box.depth;530531/* There is no interpolation to the pixel center during rendering, so532* add the 0.5 offset ourselves here.533*/534float depth_center_offset = 0;535if (info->src.resource->target == PIPE_TEXTURE_3D)536depth_center_offset = 0.5 / info->dst.box.depth * info->src.box.depth;537538/* Perform a blit for each aspect requested by the caller. PIPE_MASK_R is539* used to represent the color aspect. */540unsigned aspect_mask = info->mask & (PIPE_MASK_R | PIPE_MASK_ZS);541while (aspect_mask) {542unsigned aspect = 1 << u_bit_scan(&aspect_mask);543544struct crocus_resource *src_res =545crocus_resource_for_aspect(devinfo, info->src.resource, aspect);546struct crocus_resource *dst_res =547crocus_resource_for_aspect(devinfo, info->dst.resource, aspect);548549enum pipe_format src_pfmt =550pipe_format_for_aspect(info->src.format, aspect);551enum pipe_format dst_pfmt =552pipe_format_for_aspect(info->dst.format, aspect);553554if (crocus_resource_unfinished_aux_import(src_res))555crocus_resource_finish_aux_import(ctx->screen, src_res);556if (crocus_resource_unfinished_aux_import(dst_res))557crocus_resource_finish_aux_import(ctx->screen, dst_res);558559struct crocus_format_info src_fmt =560crocus_format_for_usage(devinfo, src_pfmt, ISL_SURF_USAGE_TEXTURE_BIT);561enum isl_aux_usage src_aux_usage =562crocus_resource_texture_aux_usage(src_res);563564crocus_resource_prepare_texture(ice, src_res, src_fmt.fmt,565info->src.level, 1, info->src.box.z,566info->src.box.depth);567// crocus_emit_buffer_barrier_for(batch, src_res->bo,568// CROCUS_DOMAIN_OTHER_READ);569570bool dst_aux_disable = false;571/* on SNB blorp will use render target instead of depth572* so disable HiZ.573*/574if (devinfo->ver <= 6 && util_format_is_depth_or_stencil(dst_pfmt))575dst_aux_disable = true;576struct crocus_format_info dst_fmt =577crocus_format_for_usage(devinfo, dst_pfmt,578ISL_SURF_USAGE_RENDER_TARGET_BIT);579enum isl_aux_usage dst_aux_usage =580crocus_resource_render_aux_usage(ice, dst_res, info->dst.level,581dst_fmt.fmt, dst_aux_disable);582583struct blorp_surf src_surf, dst_surf;584crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,585&src_res->base.b, src_aux_usage,586info->src.level, false);587crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,588&dst_res->base.b, dst_aux_usage,589info->dst.level, true);590591crocus_resource_prepare_render(ice, dst_res, info->dst.level,592info->dst.box.z, info->dst.box.depth,593dst_aux_usage);594// crocus_emit_buffer_barrier_for(batch, dst_res->bo,595// CROCUS_DOMAIN_RENDER_WRITE);596597if (crocus_batch_references(batch, src_res->bo))598tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);599600if (dst_res->base.b.target == PIPE_BUFFER) {601util_range_add(&dst_res->base.b, &dst_res->valid_buffer_range,602dst_x0, dst_x1);603}604605struct isl_swizzle src_swiz = pipe_to_isl_swizzles(src_fmt.swizzles);606struct isl_swizzle dst_swiz = pipe_to_isl_swizzles(dst_fmt.swizzles);607608for (int slice = 0; slice < info->dst.box.depth; slice++) {609unsigned dst_z = info->dst.box.z + slice;610float src_z = info->src.box.z + slice * src_z_step +611depth_center_offset;612613crocus_batch_maybe_flush(batch, 1500);614615blorp_blit(&blorp_batch,616&src_surf, info->src.level, src_z,617src_fmt.fmt, src_swiz,618&dst_surf, info->dst.level, dst_z,619dst_fmt.fmt, dst_swiz,620src_x0, src_y0, src_x1, src_y1,621dst_x0, dst_y0, dst_x1, dst_y1,622filter, mirror_x, mirror_y);623624}625626tex_cache_flush_hack(batch, src_fmt.fmt, src_res->surf.format);627628crocus_resource_finish_render(ice, dst_res, info->dst.level,629info->dst.box.z, info->dst.box.depth,630dst_aux_usage);631}632633blorp_batch_finish(&blorp_batch);634635crocus_flush_and_dirty_for_history(ice, batch, (struct crocus_resource *)636info->dst.resource,637PIPE_CONTROL_RENDER_TARGET_FLUSH,638"cache history: post-blit");639}640641static void642get_copy_region_aux_settings(struct crocus_resource *res,643enum isl_aux_usage *out_aux_usage,644bool is_render_target)645{646switch (res->aux.usage) {647case ISL_AUX_USAGE_MCS:648/* A stencil resolve operation must be performed prior to doing resource649* copies or used by CPU.650* (see HSD 1209978162)651*/652if (is_render_target && isl_surf_usage_is_stencil(res->surf.usage)) {653*out_aux_usage = ISL_AUX_USAGE_NONE;654} else {655*out_aux_usage = res->aux.usage;656}657break;658default:659*out_aux_usage = ISL_AUX_USAGE_NONE;660break;661}662}663664/**665* Perform a GPU-based raw memory copy between compatible view classes.666*667* Does not perform any flushing - the new data may still be left in the668* render cache, and old data may remain in other caches.669*670* Wraps blorp_copy() and blorp_buffer_copy().671*/672void673crocus_copy_region(struct blorp_context *blorp,674struct crocus_batch *batch,675struct pipe_resource *dst,676unsigned dst_level,677unsigned dstx, unsigned dsty, unsigned dstz,678struct pipe_resource *src,679unsigned src_level,680const struct pipe_box *src_box)681{682struct blorp_batch blorp_batch;683struct crocus_context *ice = blorp->driver_ctx;684struct crocus_screen *screen = (void *) ice->ctx.screen;685const struct intel_device_info *devinfo = &screen->devinfo;686struct crocus_resource *src_res = (void *) src;687struct crocus_resource *dst_res = (void *) dst;688689if (devinfo->ver <= 5) {690if (screen->vtbl.copy_region_blt(batch, dst_res,691dst_level, dstx, dsty, dstz,692src_res, src_level, src_box))693return;694}695enum isl_aux_usage src_aux_usage, dst_aux_usage;696get_copy_region_aux_settings(src_res, &src_aux_usage,697false);698get_copy_region_aux_settings(dst_res, &dst_aux_usage,699true);700701if (crocus_batch_references(batch, src_res->bo))702tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);703704if (dst->target == PIPE_BUFFER)705util_range_add(&dst_res->base.b, &dst_res->valid_buffer_range, dstx, dstx + src_box->width);706707if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {708struct blorp_address src_addr = {709.buffer = crocus_resource_bo(src), .offset = src_box->x,710};711struct blorp_address dst_addr = {712.buffer = crocus_resource_bo(dst), .offset = dstx,713.reloc_flags = EXEC_OBJECT_WRITE,714};715716crocus_batch_maybe_flush(batch, 1500);717718blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);719blorp_buffer_copy(&blorp_batch, src_addr, dst_addr, src_box->width);720blorp_batch_finish(&blorp_batch);721} else {722// XXX: what about one surface being a buffer and not the other?723724struct blorp_surf src_surf, dst_surf;725crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &src_surf,726src, src_aux_usage, src_level, false);727crocus_blorp_surf_for_resource(&screen->vtbl, &screen->isl_dev, &dst_surf,728dst, dst_aux_usage, dst_level, true);729730crocus_resource_prepare_access(ice, src_res, src_level, 1,731src_box->z, src_box->depth,732src_aux_usage, false);733crocus_resource_prepare_access(ice, dst_res, dst_level, 1,734dstz, src_box->depth,735dst_aux_usage, false);736737blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);738739for (int slice = 0; slice < src_box->depth; slice++) {740crocus_batch_maybe_flush(batch, 1500);741742blorp_copy(&blorp_batch, &src_surf, src_level, src_box->z + slice,743&dst_surf, dst_level, dstz + slice,744src_box->x, src_box->y, dstx, dsty,745src_box->width, src_box->height);746}747blorp_batch_finish(&blorp_batch);748749crocus_resource_finish_write(ice, dst_res, dst_level, dstz,750src_box->depth, dst_aux_usage);751}752753tex_cache_flush_hack(batch, ISL_FORMAT_UNSUPPORTED, src_res->surf.format);754}755756static struct crocus_batch *757get_preferred_batch(struct crocus_context *ice, struct crocus_bo *bo)758{759/* If the compute batch is already using this buffer, we'd prefer to760* continue queueing in the compute batch.761*/762if (crocus_batch_references(&ice->batches[CROCUS_BATCH_COMPUTE], bo))763return &ice->batches[CROCUS_BATCH_COMPUTE];764765/* Otherwise default to the render batch. */766return &ice->batches[CROCUS_BATCH_RENDER];767}768769770/**771* The pipe->resource_copy_region() driver hook.772*773* This implements ARB_copy_image semantics - a raw memory copy between774* compatible view classes.775*/776static void777crocus_resource_copy_region(struct pipe_context *ctx,778struct pipe_resource *p_dst,779unsigned dst_level,780unsigned dstx, unsigned dsty, unsigned dstz,781struct pipe_resource *p_src,782unsigned src_level,783const struct pipe_box *src_box)784{785struct crocus_context *ice = (void *) ctx;786struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];787struct crocus_screen *screen = (struct crocus_screen *)ctx->screen;788const struct intel_device_info *devinfo = &screen->devinfo;789struct crocus_resource *src = (void *) p_src;790struct crocus_resource *dst = (void *) p_dst;791792if (crocus_resource_unfinished_aux_import(src))793crocus_resource_finish_aux_import(ctx->screen, src);794if (crocus_resource_unfinished_aux_import(dst))795crocus_resource_finish_aux_import(ctx->screen, dst);796797/* Use MI_COPY_MEM_MEM for tiny (<= 16 byte, % 4) buffer copies. */798if (p_src->target == PIPE_BUFFER && p_dst->target == PIPE_BUFFER &&799(src_box->width % 4 == 0) && src_box->width <= 16 &&800screen->vtbl.copy_mem_mem) {801struct crocus_bo *dst_bo = crocus_resource_bo(p_dst);802batch = get_preferred_batch(ice, dst_bo);803crocus_batch_maybe_flush(batch, 24 + 5 * (src_box->width / 4));804crocus_emit_pipe_control_flush(batch,805"stall for MI_COPY_MEM_MEM copy_region",806PIPE_CONTROL_CS_STALL);807screen->vtbl.copy_mem_mem(batch, dst_bo, dstx, crocus_resource_bo(p_src),808src_box->x, src_box->width);809return;810}811812if (devinfo->ver < 6 && util_format_is_depth_or_stencil(p_dst->format)) {813util_resource_copy_region(ctx, p_dst, dst_level, dstx, dsty, dstz,814p_src, src_level, src_box);815return;816}817crocus_copy_region(&ice->blorp, batch, p_dst, dst_level, dstx, dsty, dstz,818p_src, src_level, src_box);819820if (util_format_is_depth_and_stencil(p_dst->format) &&821util_format_has_stencil(util_format_description(p_src->format)) &&822devinfo->ver >= 6) {823struct crocus_resource *junk, *s_src_res, *s_dst_res;824crocus_get_depth_stencil_resources(devinfo, p_src, &junk, &s_src_res);825crocus_get_depth_stencil_resources(devinfo, p_dst, &junk, &s_dst_res);826827crocus_copy_region(&ice->blorp, batch, &s_dst_res->base.b, dst_level, dstx,828dsty, dstz, &s_src_res->base.b, src_level, src_box);829}830831crocus_flush_and_dirty_for_history(ice, batch, dst,832PIPE_CONTROL_RENDER_TARGET_FLUSH,833"cache history: post copy_region");834}835836void837crocus_init_blit_functions(struct pipe_context *ctx)838{839ctx->blit = crocus_blit;840ctx->resource_copy_region = crocus_resource_copy_region;841}842843844