Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_clear.c
4570 views
/*1* Copyright 2017 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "si_pipe.h"25#include "sid.h"26#include "util/format/u_format.h"27#include "util/u_pack_color.h"28#include "util/u_surface.h"2930enum31{32SI_CLEAR = SI_SAVE_FRAGMENT_STATE,33SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,34};3536void si_init_buffer_clear(struct si_clear_info *info,37struct pipe_resource *resource, uint64_t offset,38uint32_t size, uint32_t clear_value)39{40info->resource = resource;41info->offset = offset;42info->size = size;43info->clear_value = clear_value;44info->writemask = 0xffffffff;45info->is_dcc_msaa = false;46}4748static void si_init_buffer_clear_rmw(struct si_clear_info *info,49struct pipe_resource *resource, uint64_t offset,50uint32_t size, uint32_t clear_value, uint32_t writemask)51{52si_init_buffer_clear(info, resource, offset, size, clear_value);53info->writemask = writemask;54}5556void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,57unsigned num_clears, unsigned types)58{59if (!num_clears)60return;6162/* Flush caches and wait for idle. */63if (types & (SI_CLEAR_TYPE_CMASK | SI_CLEAR_TYPE_DCC))64sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU);6566if (types & SI_CLEAR_TYPE_HTILE)67sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_DB_META, L2_LRU);6869/* Flush caches in case we use compute. */70sctx->flags |= SI_CONTEXT_INV_VCACHE;7172/* GFX6-8: CB and DB don't use L2. */73if (sctx->chip_class <= GFX8)74sctx->flags |= SI_CONTEXT_INV_L2;7576/* Execute clears. */77for (unsigned i = 0; i < num_clears; i++) {78if (info[i].is_dcc_msaa) {79gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value,80SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);81continue;82}8384assert(info[i].size > 0);8586if (info[i].writemask != 0xffffffff) {87si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size,88info[i].clear_value, info[i].writemask,89SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);90} else {91/* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */92si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size,93&info[i].clear_value, 4, SI_OP_SKIP_CACHE_INV_BEFORE,94SI_COHERENCY_CP, SI_COMPUTE_CLEAR_METHOD);95}96}9798/* Wait for idle. */99sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;100101/* GFX6-8: CB and DB don't use L2. */102if (sctx->chip_class <= GFX8)103sctx->flags |= SI_CONTEXT_WB_L2;104}105106static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex)107{108/* CMASK for MSAA is allocated in advance or always disabled109* by "nofmask" option.110*/111if (tex->cmask_buffer)112return true;113114if (!tex->surface.cmask_size)115return false;116117tex->cmask_buffer =118si_aligned_buffer_create(&sscreen->b, SI_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT,119tex->surface.cmask_size, 1 << tex->surface.cmask_alignment_log2);120if (tex->cmask_buffer == NULL)121return false;122123tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;124tex->cb_color_info |= S_028C70_FAST_CLEAR(1);125126p_atomic_inc(&sscreen->compressed_colortex_counter);127return true;128}129130static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_format,131const union pipe_color_union *color)132{133union util_color uc;134135memset(&uc, 0, sizeof(uc));136137if (tex->surface.bpe == 16) {138/* DCC fast clear only:139* CLEAR_WORD0 = R = G = B140* CLEAR_WORD1 = A141*/142assert(color->ui[0] == color->ui[1] && color->ui[0] == color->ui[2]);143uc.ui[0] = color->ui[0];144uc.ui[1] = color->ui[3];145} else {146if (tex->swap_rgb_to_bgr)147surface_format = util_format_rgb_to_bgr(surface_format);148149util_pack_color_union(surface_format, &uc, color);150}151152if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)153return false;154155memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));156return true;157}158159/** Linearize and convert luminance/intensity to red. */160enum pipe_format si_simplify_cb_format(enum pipe_format format)161{162format = util_format_linear(format);163format = util_format_luminance_to_red(format);164return util_format_intensity_to_red(format);165}166167bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format)168{169format = si_simplify_cb_format(format);170const struct util_format_description *desc = util_format_description(format);171172/* Formats with 3 channels can't have alpha. */173if (desc->nr_channels == 3)174return true; /* same as xxxA; is any value OK here? */175176if (sscreen->info.chip_class >= GFX10 && desc->nr_channels == 1)177return desc->swizzle[3] == PIPE_SWIZZLE_X;178179return si_translate_colorswap(format, false) <= 1;180}181182static bool vi_get_fast_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format,183enum pipe_format surface_format,184const union pipe_color_union *color, uint32_t *clear_value,185bool *eliminate_needed)186{187/* If we want to clear without needing a fast clear eliminate step, we188* can set color and alpha independently to 0 or 1 (or 0/max for integer189* formats).190*/191bool values[4] = {}; /* whether to clear to 0 or 1 */192bool color_value = false; /* clear color to 0 or 1 */193bool alpha_value = false; /* clear alpha to 0 or 1 */194int alpha_channel; /* index of the alpha component */195bool has_color = false;196bool has_alpha = false;197198const struct util_format_description *desc =199util_format_description(si_simplify_cb_format(surface_format));200201/* 128-bit fast clear with different R,G,B values is unsupported. */202if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2]))203return false;204205*eliminate_needed = true;206*clear_value = DCC_CLEAR_COLOR_REG;207208if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)209return true; /* need ELIMINATE_FAST_CLEAR */210211bool base_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, base_format);212bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, surface_format);213214/* Formats with 3 channels can't have alpha. */215if (desc->nr_channels == 3)216alpha_channel = -1;217else if (surf_alpha_is_on_msb)218alpha_channel = desc->nr_channels - 1;219else220alpha_channel = 0;221222for (int i = 0; i < 4; ++i) {223if (desc->swizzle[i] >= PIPE_SWIZZLE_0)224continue;225226if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {227/* Use the maximum value for clamping the clear color. */228int max = u_bit_consecutive(0, desc->channel[i].size - 1);229230values[i] = color->i[i] != 0;231if (color->i[i] != 0 && MIN2(color->i[i], max) != max)232return true; /* need ELIMINATE_FAST_CLEAR */233} else if (desc->channel[i].pure_integer &&234desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {235/* Use the maximum value for clamping the clear color. */236unsigned max = u_bit_consecutive(0, desc->channel[i].size);237238values[i] = color->ui[i] != 0U;239if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)240return true; /* need ELIMINATE_FAST_CLEAR */241} else {242values[i] = color->f[i] != 0.0F;243if (color->f[i] != 0.0F && color->f[i] != 1.0F)244return true; /* need ELIMINATE_FAST_CLEAR */245}246247if (desc->swizzle[i] == alpha_channel) {248alpha_value = values[i];249has_alpha = true;250} else {251color_value = values[i];252has_color = true;253}254}255256/* If alpha isn't present, make it the same as color, and vice versa. */257if (!has_alpha)258alpha_value = color_value;259else if (!has_color)260color_value = alpha_value;261262if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb)263return true; /* require ELIMINATE_FAST_CLEAR */264265/* Check if all color values are equal if they are present. */266for (int i = 0; i < 4; ++i) {267if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel &&268values[i] != color_value)269return true; /* require ELIMINATE_FAST_CLEAR */270}271272/* This doesn't need ELIMINATE_FAST_CLEAR.273* On chips predating Raven2, the DCC clear codes and the CB clear274* color registers must match.275*/276*eliminate_needed = false;277278if (color_value) {279if (alpha_value)280*clear_value = DCC_CLEAR_COLOR_1111;281else282*clear_value = DCC_CLEAR_COLOR_1110;283} else {284if (alpha_value)285*clear_value = DCC_CLEAR_COLOR_0001;286else287*clear_value = DCC_CLEAR_COLOR_0000;288}289return true;290}291292bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,293unsigned clear_value, struct si_clear_info *out)294{295struct pipe_resource *dcc_buffer = &tex->buffer.b.b;296uint64_t dcc_offset = tex->surface.meta_offset;297uint32_t clear_size;298299assert(vi_dcc_enabled(tex, level));300301if (sctx->chip_class >= GFX10) {302/* 4x and 8x MSAA needs a sophisticated compute shader for303* the clear. */304if (tex->buffer.b.b.nr_storage_samples >= 4)305return false;306307unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);308309if (num_layers == 1) {310/* Clear a specific level. */311dcc_offset += tex->surface.u.gfx9.meta_levels[level].offset;312clear_size = tex->surface.u.gfx9.meta_levels[level].size;313} else if (tex->buffer.b.b.last_level == 0) {314/* Clear all layers having only 1 level. */315clear_size = tex->surface.meta_size;316} else {317/* Clearing DCC with both multiple levels and multiple layers is not318* implemented.319*/320return false;321}322} else if (sctx->chip_class == GFX9) {323/* TODO: Implement DCC fast clear for level 0 of mipmapped textures. Mipmapped324* DCC has to clear a rectangular area of DCC for level 0 (because the whole miptree325* is organized in a 2D plane).326*/327if (tex->buffer.b.b.last_level > 0)328return false;329330/* 4x and 8x MSAA need to clear only sample 0 and 1 in a compute shader and leave other331* samples untouched. (only the first 2 samples are compressed) */332if (tex->buffer.b.b.nr_storage_samples >= 4) {333si_init_buffer_clear(out, dcc_buffer, 0, 0, clear_value);334out->is_dcc_msaa = true;335return true;336}337338clear_size = tex->surface.meta_size;339} else {340unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);341342/* If this is 0, fast clear isn't possible. (can occur with MSAA) */343if (!tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size)344return false;345346/* Layered 4x and 8x MSAA DCC fast clears need to clear347* dcc_fast_clear_size bytes for each layer. A compute shader348* would be more efficient than separate per-layer clear operations.349*/350if (tex->buffer.b.b.nr_storage_samples >= 4 && num_layers > 1)351return false;352353dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset;354clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size * num_layers;355}356357si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value);358return true;359}360361/* Set the same micro tile mode as the destination of the last MSAA resolve.362* This allows hitting the MSAA resolve fast path, which requires that both363* src and dst micro tile modes match.364*/365static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex)366{367if (sscreen->info.chip_class >= GFX10 || tex->buffer.b.is_shared ||368tex->buffer.b.b.nr_samples <= 1 ||369tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)370return;371372assert(sscreen->info.chip_class >= GFX9 ||373tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);374assert(tex->buffer.b.b.last_level == 0);375376if (sscreen->info.chip_class >= GFX9) {377/* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */378assert(tex->surface.u.gfx9.swizzle_mode >= 4);379380/* If you do swizzle_mode % 4, you'll get:381* 0 = Depth382* 1 = Standard,383* 2 = Displayable384* 3 = Rotated385*386* Depth-sample order isn't allowed:387*/388assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0);389390switch (tex->last_msaa_resolve_target_micro_mode) {391case RADEON_MICRO_MODE_DISPLAY:392tex->surface.u.gfx9.swizzle_mode &= ~0x3;393tex->surface.u.gfx9.swizzle_mode += 2; /* D */394break;395case RADEON_MICRO_MODE_STANDARD:396tex->surface.u.gfx9.swizzle_mode &= ~0x3;397tex->surface.u.gfx9.swizzle_mode += 1; /* S */398break;399case RADEON_MICRO_MODE_RENDER:400tex->surface.u.gfx9.swizzle_mode &= ~0x3;401tex->surface.u.gfx9.swizzle_mode += 3; /* R */402break;403default: /* depth */404assert(!"unexpected micro mode");405return;406}407} else if (sscreen->info.chip_class >= GFX7) {408/* These magic numbers were copied from addrlib. It doesn't use409* any definitions for them either. They are all 2D_TILED_THIN1410* modes with different bpp and micro tile mode.411*/412switch (tex->last_msaa_resolve_target_micro_mode) {413case RADEON_MICRO_MODE_DISPLAY:414tex->surface.u.legacy.tiling_index[0] = 10;415break;416case RADEON_MICRO_MODE_STANDARD:417tex->surface.u.legacy.tiling_index[0] = 14;418break;419case RADEON_MICRO_MODE_RENDER:420tex->surface.u.legacy.tiling_index[0] = 28;421break;422default: /* depth, thick */423assert(!"unexpected micro mode");424return;425}426} else { /* GFX6 */427switch (tex->last_msaa_resolve_target_micro_mode) {428case RADEON_MICRO_MODE_DISPLAY:429switch (tex->surface.bpe) {430case 1:431tex->surface.u.legacy.tiling_index[0] = 10;432break;433case 2:434tex->surface.u.legacy.tiling_index[0] = 11;435break;436default: /* 4, 8 */437tex->surface.u.legacy.tiling_index[0] = 12;438break;439}440break;441case RADEON_MICRO_MODE_STANDARD:442switch (tex->surface.bpe) {443case 1:444tex->surface.u.legacy.tiling_index[0] = 14;445break;446case 2:447tex->surface.u.legacy.tiling_index[0] = 15;448break;449case 4:450tex->surface.u.legacy.tiling_index[0] = 16;451break;452default: /* 8, 16 */453tex->surface.u.legacy.tiling_index[0] = 17;454break;455}456break;457default: /* depth, thick */458assert(!"unexpected micro mode");459return;460}461}462463tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;464465p_atomic_inc(&sscreen->dirty_tex_counter);466}467468static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)469{470/* Maximum 14-bit UINT value. */471const uint32_t max_z_value = 0x3FFF;472473/* For clears, Zmask and Smem will always be set to zero. */474const uint32_t zmask = 0;475const uint32_t smem = 0;476477/* Convert depthValue to 14-bit zmin/zmax uint values. */478const uint32_t zmin = (depth * max_z_value) + 0.5f;479const uint32_t zmax = zmin;480481if (tex->htile_stencil_disabled) {482/* Z-only HTILE is laid out as follows:483* |31 18|17 4|3 0|484* +---------+---------+-------+485* | Max Z | Min Z | ZMask |486*/487return ((zmax & 0x3FFF) << 18) |488((zmin & 0x3FFF) << 4) |489((zmask & 0xF) << 0);490} else {491/* Z+S HTILE is laid out as-follows:492* |31 12|11 10|9 8|7 6|5 4|3 0|493* +-----------+-----+------+-----+-----+-------+494* | Z Range | | SMem | SR1 | SR0 | ZMask |495*496* The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.497* For a fast clear, zMin == zMax == clearValue. This means that the base will498* always be the clear value (converted to 14-bit UINT).499*500* When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of501* fast clears, where zMax == zMin, the delta is always zero.502*/503const uint32_t delta = 0;504const uint32_t zrange = (zmax << 6) | delta;505506/* SResults 0 & 1 are set based on the stencil compare state.507* For fast-clear, the default value of sr0 and sr1 are both 0x3.508*/509const uint32_t sresults = 0xf;510511return ((zrange & 0xFFFFF) << 12) |512((smem & 0x3) << 8) |513((sresults & 0xF) << 4) |514((zmask & 0xF) << 0);515}516}517518static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,519unsigned buffers)520{521/* TC-compatible HTILE only supports depth clears to 0 or 1. */522return buffers & PIPE_CLEAR_DEPTH &&523si_htile_enabled(zstex, level, PIPE_MASK_Z) &&524(!zstex->tc_compatible_htile || depth == 0 || depth == 1);525}526527static bool si_can_fast_clear_stencil(struct si_texture *zstex, unsigned level, uint8_t stencil,528unsigned buffers)529{530/* TC-compatible HTILE only supports stencil clears to 0. */531return buffers & PIPE_CLEAR_STENCIL &&532si_htile_enabled(zstex, level, PIPE_MASK_S) &&533(!zstex->tc_compatible_htile || stencil == 0);534}535536static void si_fast_clear(struct si_context *sctx, unsigned *buffers,537const union pipe_color_union *color, float depth, uint8_t stencil)538{539struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;540struct si_clear_info info[8 * 2 + 1]; /* MRTs * (CMASK + DCC) + ZS */541unsigned num_clears = 0;542unsigned clear_types = 0;543unsigned num_pixels = fb->width * fb->height;544545/* This function is broken in BE, so just disable this path for now */546#if UTIL_ARCH_BIG_ENDIAN547return;548#endif549550if (sctx->render_cond)551return;552553/* Gather information about what to clear. */554unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);555while (color_buffer_mask) {556unsigned i = u_bit_scan(&color_buffer_mask);557558struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;559unsigned level = fb->cbufs[i]->u.tex.level;560unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);561562/* the clear is allowed if all layers are bound */563if (fb->cbufs[i]->u.tex.first_layer != 0 ||564fb->cbufs[i]->u.tex.last_layer != num_layers - 1) {565continue;566}567568/* We can change the micro tile mode before a full clear. */569/* This is only used for MSAA textures when clearing all layers. */570si_set_optimal_micro_tile_mode(sctx->screen, tex);571572if (tex->swap_rgb_to_bgr_on_next_clear) {573assert(!tex->swap_rgb_to_bgr);574assert(tex->buffer.b.b.nr_samples >= 2);575tex->swap_rgb_to_bgr = true;576tex->swap_rgb_to_bgr_on_next_clear = false;577578/* Update all sampler views and images. */579p_atomic_inc(&sctx->screen->dirty_tex_counter);580}581582/* only supported on tiled surfaces */583if (tex->surface.is_linear) {584continue;585}586587if (sctx->chip_class <= GFX8 && tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&588!sctx->screen->info.htile_cmask_support_1d_tiling)589continue;590591/* Use a slow clear for small surfaces where the cost of592* the eliminate pass can be higher than the benefit of fast593* clear. The closed driver does this, but the numbers may differ.594*595* This helps on both dGPUs and APUs, even small APUs like Mullins.596*/597bool fb_too_small = num_pixels * num_layers <= 512 * 512;598bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small;599bool eliminate_needed = false;600bool fmask_decompress_needed = false;601602/* Try to clear DCC first, otherwise try CMASK. */603if (vi_dcc_enabled(tex, level)) {604uint32_t reset_value;605606if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))607continue;608609if (!vi_get_fast_clear_parameters(sctx->screen, tex->buffer.b.b.format,610fb->cbufs[i]->format, color, &reset_value,611&eliminate_needed))612continue;613614/* Shared textures can't use fast clear without an explicit flush615* because the clear color is not exported.616*617* Chips without DCC constant encoding must set the clear color registers618* correctly even if the fast clear eliminate pass is not needed.619*/620if ((eliminate_needed || !sctx->screen->info.has_dcc_constant_encode) &&621tex->buffer.b.is_shared &&622!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))623continue;624625if (eliminate_needed && too_small)626continue;627628/* We can clear any level, but we only set up the clear value registers for the first629* level. Therefore, all other levels can be cleared only if the clear value registers630* are not used, which is only the case with DCC constant encoding and 0/1 clear values.631*/632if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))633continue;634635if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&636!sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])637continue;638639assert(num_clears < ARRAY_SIZE(info));640641if (!vi_dcc_get_clear_info(sctx, tex, level, reset_value, &info[num_clears]))642continue;643644num_clears++;645clear_types |= SI_CLEAR_TYPE_DCC;646647si_mark_display_dcc_dirty(sctx, tex);648649/* DCC fast clear with MSAA should clear CMASK to 0xC. */650if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {651assert(num_clears < ARRAY_SIZE(info));652si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,653tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC);654clear_types |= SI_CLEAR_TYPE_CMASK;655fmask_decompress_needed = true;656}657} else {658if (level > 0)659continue;660661/* Shared textures can't use fast clear without an explicit flush662* because the clear color is not exported.663*/664if (tex->buffer.b.is_shared &&665!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))666continue;667668if (too_small)669continue;670671/* 128-bit formats are unsupported */672if (tex->surface.bpe > 8) {673continue;674}675676/* RB+ doesn't work with CMASK fast clear on Stoney. */677if (sctx->family == CHIP_STONEY)678continue;679680/* Disable fast clear if tex is encrypted */681if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)682continue;683684uint64_t cmask_offset = 0;685unsigned clear_size = 0;686687if (sctx->chip_class >= GFX10) {688assert(level == 0);689690/* Clearing CMASK with both multiple levels and multiple layers is not691* implemented.692*/693if (num_layers > 1 && tex->buffer.b.b.last_level > 0)694continue;695696if (!si_alloc_separate_cmask(sctx->screen, tex))697continue;698699if (num_layers == 1) {700/* Clear level 0. */701cmask_offset = tex->surface.cmask_offset + tex->surface.u.gfx9.color.cmask_level0.offset;702clear_size = tex->surface.u.gfx9.color.cmask_level0.size;703} else if (tex->buffer.b.b.last_level == 0) {704/* Clear all layers having only 1 level. */705cmask_offset = tex->surface.cmask_offset;706clear_size = tex->surface.cmask_size;707} else {708assert(0); /* this is prevented above */709}710} else if (sctx->chip_class == GFX9) {711/* TODO: Implement CMASK fast clear for level 0 of mipmapped textures. Mipmapped712* CMASK has to clear a rectangular area of CMASK for level 0 (because the whole713* miptree is organized in a 2D plane).714*/715if (tex->buffer.b.b.last_level > 0)716continue;717718if (!si_alloc_separate_cmask(sctx->screen, tex))719continue;720721cmask_offset = tex->surface.cmask_offset;722clear_size = tex->surface.cmask_size;723} else {724if (!si_alloc_separate_cmask(sctx->screen, tex))725continue;726727/* GFX6-8: This only covers mipmap level 0. */728cmask_offset = tex->surface.cmask_offset;729clear_size = tex->surface.cmask_size;730}731732/* Do the fast clear. */733assert(num_clears < ARRAY_SIZE(info));734si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,735cmask_offset, clear_size, 0);736clear_types |= SI_CLEAR_TYPE_CMASK;737eliminate_needed = true;738}739740if ((eliminate_needed || fmask_decompress_needed) &&741!(tex->dirty_level_mask & (1 << level))) {742tex->dirty_level_mask |= 1 << level;743p_atomic_inc(&sctx->screen->compressed_colortex_counter);744}745746*buffers &= ~(PIPE_CLEAR_COLOR0 << i);747748/* Chips with DCC constant encoding don't need to set the clear749* color registers for DCC clear values 0 and 1.750*/751if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed)752continue;753754if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {755sctx->framebuffer.dirty_cbufs |= 1 << i;756si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);757}758}759760/* Depth/stencil clears. */761struct pipe_surface *zsbuf = fb->zsbuf;762struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;763unsigned zs_num_layers = zstex ? util_num_layers(&zstex->buffer.b.b, zsbuf->u.tex.level) : 0;764765if (zstex && zsbuf->u.tex.first_layer == 0 &&766zsbuf->u.tex.last_layer == zs_num_layers - 1 &&767si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS)) {768unsigned level = zsbuf->u.tex.level;769bool update_db_depth_clear = false;770bool update_db_stencil_clear = false;771bool fb_too_small = num_pixels * zs_num_layers <= 512 * 512;772773/* Transition from TC-incompatible to TC-compatible HTILE if requested. */774if (zstex->enable_tc_compatible_htile_next_clear) {775/* If both depth and stencil are present, they must be cleared together. */776if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||777(*buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||778zstex->htile_stencil_disabled))) {779/* The conversion from TC-incompatible to TC-compatible can only be done in one clear. */780assert(zstex->buffer.b.b.last_level == 0);781assert(!zstex->tc_compatible_htile);782783/* Enable TC-compatible HTILE. */784zstex->enable_tc_compatible_htile_next_clear = false;785zstex->tc_compatible_htile = true;786787/* Update the framebuffer state to reflect the change. */788sctx->framebuffer.DB_has_shader_readable_metadata = true;789sctx->framebuffer.dirty_zsbuf = true;790si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);791792/* Update all sampler views and shader images in all contexts. */793p_atomic_inc(&sctx->screen->dirty_tex_counter);794795/* Perform the clear here if possible, else clear to uncompressed. */796uint32_t clear_value;797798if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {799if (si_can_fast_clear_depth(zstex, level, depth, *buffers)) {800/* Z-only clear. */801clear_value = si_get_htile_clear_value(zstex, depth);802*buffers &= ~PIPE_CLEAR_DEPTH;803zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);804update_db_depth_clear = true;805}806} else if ((*buffers & PIPE_BIND_DEPTH_STENCIL) == PIPE_BIND_DEPTH_STENCIL) {807if (si_can_fast_clear_depth(zstex, level, depth, *buffers) &&808si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {809/* Combined Z+S clear. */810clear_value = si_get_htile_clear_value(zstex, depth);811*buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;812zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);813zstex->stencil_cleared_level_mask |= BITFIELD_BIT(level);814update_db_depth_clear = true;815update_db_stencil_clear = true;816}817}818819if (!update_db_depth_clear) {820/* Clear to uncompressed, so that it doesn't contain values incompatible821* with the new TC-compatible HTILE setting.822*823* 0xfffff30f = uncompressed Z + S824* 0xfffc000f = uncompressed Z only825*/826clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f;827}828829assert(num_clears < ARRAY_SIZE(info));830si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b,831zstex->surface.meta_offset, zstex->surface.meta_size, clear_value);832clear_types |= SI_CLEAR_TYPE_HTILE;833}834} else if (num_clears || !fb_too_small) {835/* This is where the HTILE buffer clear is done.836*837* If there is no clear scheduled and the framebuffer size is too small, we should use838* the draw-based clear that is without waits. If there is some other clear scheduled,839* we will have to wait anyway, so add the HTILE buffer clear to the batch here.840* If the framebuffer size is large enough, use this codepath too.841*/842uint64_t htile_offset = zstex->surface.meta_offset;843unsigned htile_size = 0;844845/* Determine the HTILE subset to clear. */846if (sctx->chip_class >= GFX10) {847/* This can only clear a layered texture with 1 level or a mipmap texture848* with 1 layer. Other cases are unimplemented.849*/850if (zs_num_layers == 1) {851/* Clear a specific level. */852htile_offset += zstex->surface.u.gfx9.meta_levels[level].offset;853htile_size = zstex->surface.u.gfx9.meta_levels[level].size;854} else if (zstex->buffer.b.b.last_level == 0) {855/* Clear all layers having only 1 level. */856htile_size = zstex->surface.meta_size;857}858} else {859/* This can only clear a layered texture with 1 level. Other cases are860* unimplemented.861*/862if (zstex->buffer.b.b.last_level == 0)863htile_size = zstex->surface.meta_size;864}865866/* Perform the clear if it's possible. */867if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {868if (htile_size &&869si_can_fast_clear_depth(zstex, level, depth, *buffers)) {870/* Z-only clear. */871assert(num_clears < ARRAY_SIZE(info));872si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,873htile_size, si_get_htile_clear_value(zstex, depth));874clear_types |= SI_CLEAR_TYPE_HTILE;875*buffers &= ~PIPE_CLEAR_DEPTH;876zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);877update_db_depth_clear = true;878}879} else if ((*buffers & PIPE_BIND_DEPTH_STENCIL) == PIPE_BIND_DEPTH_STENCIL) {880if (htile_size &&881si_can_fast_clear_depth(zstex, level, depth, *buffers) &&882si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {883/* Combined Z+S clear. */884assert(num_clears < ARRAY_SIZE(info));885si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,886htile_size, si_get_htile_clear_value(zstex, depth));887clear_types |= SI_CLEAR_TYPE_HTILE;888*buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;889zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);890zstex->stencil_cleared_level_mask |= BITFIELD_BIT(level);891update_db_depth_clear = true;892update_db_stencil_clear = true;893}894} else {895/* Z-only or S-only clear when both Z/S are present using a read-modify-write896* compute shader.897*898* If we get both clears but only one of them can be fast-cleared, we use899* the draw-based fast clear to do both at the same time.900*/901const uint32_t htile_depth_writemask = 0xfffffc0f;902const uint32_t htile_stencil_writemask = 0x000003f0;903904if (htile_size &&905!(*buffers & PIPE_CLEAR_STENCIL) &&906si_can_fast_clear_depth(zstex, level, depth, *buffers)) {907/* Z-only clear with stencil left intact. */908assert(num_clears < ARRAY_SIZE(info));909si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,910htile_size, si_get_htile_clear_value(zstex, depth),911htile_depth_writemask);912clear_types |= SI_CLEAR_TYPE_HTILE;913*buffers &= ~PIPE_CLEAR_DEPTH;914zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);915update_db_depth_clear = true;916} else if (htile_size &&917!(*buffers & PIPE_CLEAR_DEPTH) &&918si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {919/* Stencil-only clear with depth left intact. */920assert(num_clears < ARRAY_SIZE(info));921si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,922htile_size, si_get_htile_clear_value(zstex, depth),923htile_stencil_writemask);924clear_types |= SI_CLEAR_TYPE_HTILE;925*buffers &= ~PIPE_CLEAR_STENCIL;926zstex->stencil_cleared_level_mask |= BITFIELD_BIT(level);927update_db_stencil_clear = true;928}929}930931/* Update DB_DEPTH_CLEAR. */932if (update_db_depth_clear &&933zstex->depth_clear_value[level] != (float)depth) {934zstex->depth_clear_value[level] = depth;935sctx->framebuffer.dirty_zsbuf = true;936si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);937}938939/* Update DB_STENCIL_CLEAR. */940if (update_db_stencil_clear &&941zstex->stencil_clear_value[level] != stencil) {942zstex->stencil_clear_value[level] = stencil;943sctx->framebuffer.dirty_zsbuf = true;944si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);945}946}947}948949si_execute_clears(sctx, info, num_clears, clear_types);950}951952static void si_clear(struct pipe_context *ctx, unsigned buffers,953const struct pipe_scissor_state *scissor_state,954const union pipe_color_union *color, double depth, unsigned stencil)955{956struct si_context *sctx = (struct si_context *)ctx;957struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;958struct pipe_surface *zsbuf = fb->zsbuf;959struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;960bool needs_db_flush = false;961962/* Unset clear flags for non-existent buffers. */963for (unsigned i = 0; i < 8; i++) {964if (i >= fb->nr_cbufs || !fb->cbufs[i])965buffers &= ~(PIPE_CLEAR_COLOR0 << i);966}967if (!zsbuf)968buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;969else if (!util_format_has_stencil(util_format_description(zsbuf->format)))970buffers &= ~PIPE_CLEAR_STENCIL;971972if (buffers & PIPE_CLEAR_DEPTH)973zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);974975si_fast_clear(sctx, &buffers, color, depth, stencil);976if (!buffers)977return; /* all buffers have been cleared */978979if (buffers & PIPE_CLEAR_COLOR) {980/* These buffers cannot use fast clear, make sure to disable expansion. */981unsigned color_buffer_mask = (buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);982while (color_buffer_mask) {983unsigned i = u_bit_scan(&color_buffer_mask);984struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;985if (tex->surface.fmask_size == 0)986tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);987}988}989990if (zstex && zsbuf->u.tex.first_layer == 0 &&991zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {992unsigned level = zsbuf->u.tex.level;993994if (si_can_fast_clear_depth(zstex, level, depth, buffers)) {995/* Need to disable EXPCLEAR temporarily if clearing996* to a new value. */997if (!(zstex->depth_cleared_level_mask_once & BITFIELD_BIT(level)) ||998zstex->depth_clear_value[level] != depth) {999sctx->db_depth_disable_expclear = true;1000}10011002if (zstex->depth_clear_value[level] != (float)depth) {1003if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {1004/* ZRANGE_PRECISION register of a bound surface will change so we1005* must flush the DB caches. */1006needs_db_flush = true;1007}1008/* Update DB_DEPTH_CLEAR. */1009zstex->depth_clear_value[level] = depth;1010sctx->framebuffer.dirty_zsbuf = true;1011si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);1012}1013sctx->db_depth_clear = true;1014si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);1015}10161017if (si_can_fast_clear_stencil(zstex, level, stencil, buffers)) {1018stencil &= 0xff;10191020/* Need to disable EXPCLEAR temporarily if clearing1021* to a new value. */1022if (!(zstex->stencil_cleared_level_mask & BITFIELD_BIT(level)) ||1023zstex->stencil_clear_value[level] != stencil) {1024sctx->db_stencil_disable_expclear = true;1025}10261027if (zstex->stencil_clear_value[level] != (uint8_t)stencil) {1028/* Update DB_STENCIL_CLEAR. */1029zstex->stencil_clear_value[level] = stencil;1030sctx->framebuffer.dirty_zsbuf = true;1031si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);1032}1033sctx->db_stencil_clear = true;1034si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);1035}10361037if (needs_db_flush)1038sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;1039}10401041if (unlikely(sctx->thread_trace_enabled)) {1042if (buffers & PIPE_CLEAR_COLOR)1043sctx->sqtt_next_event = EventCmdClearColorImage;1044else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)1045sctx->sqtt_next_event = EventCmdClearDepthStencilImage;1046}10471048si_blitter_begin(sctx, SI_CLEAR);1049util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb),1050buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1);1051si_blitter_end(sctx);10521053if (sctx->db_depth_clear) {1054sctx->db_depth_clear = false;1055sctx->db_depth_disable_expclear = false;1056zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);1057si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);1058}10591060if (sctx->db_stencil_clear) {1061sctx->db_stencil_clear = false;1062sctx->db_stencil_disable_expclear = false;1063zstex->stencil_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);1064si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);1065}1066}10671068static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,1069const union pipe_color_union *color, unsigned dstx,1070unsigned dsty, unsigned width, unsigned height,1071bool render_condition_enabled)1072{1073struct si_context *sctx = (struct si_context *)ctx;1074struct si_texture *sdst = (struct si_texture *)dst->texture;10751076if (dst->texture->nr_samples <= 1 && !vi_dcc_enabled(sdst, dst->u.tex.level)) {1077si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height,1078render_condition_enabled);1079return;1080}10811082si_blitter_begin(sctx,1083SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));1084util_blitter_clear_render_target(sctx->blitter, dst, color, dstx, dsty, width, height);1085si_blitter_end(sctx);1086}10871088static void si_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst,1089unsigned clear_flags, double depth, unsigned stencil,1090unsigned dstx, unsigned dsty, unsigned width, unsigned height,1091bool render_condition_enabled)1092{1093struct si_context *sctx = (struct si_context *)ctx;10941095si_blitter_begin(sctx,1096SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));1097util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty,1098width, height);1099si_blitter_end(sctx);1100}11011102static void si_clear_texture(struct pipe_context *pipe, struct pipe_resource *tex, unsigned level,1103const struct pipe_box *box, const void *data)1104{1105struct pipe_screen *screen = pipe->screen;1106struct si_texture *stex = (struct si_texture *)tex;1107struct pipe_surface tmpl = {{0}};1108struct pipe_surface *sf;11091110tmpl.format = tex->format;1111tmpl.u.tex.first_layer = box->z;1112tmpl.u.tex.last_layer = box->z + box->depth - 1;1113tmpl.u.tex.level = level;1114sf = pipe->create_surface(pipe, tex, &tmpl);1115if (!sf)1116return;11171118if (stex->is_depth) {1119unsigned clear;1120float depth;1121uint8_t stencil = 0;11221123/* Depth is always present. */1124clear = PIPE_CLEAR_DEPTH;1125util_format_unpack_z_float(tex->format, &depth, data, 1);11261127if (stex->surface.has_stencil) {1128clear |= PIPE_CLEAR_STENCIL;1129util_format_unpack_s_8uint(tex->format, &stencil, data, 1);1130}11311132si_clear_depth_stencil(pipe, sf, clear, depth, stencil, box->x, box->y, box->width,1133box->height, false);1134} else {1135union pipe_color_union color;11361137util_format_unpack_rgba(tex->format, color.ui, data, 1);11381139if (screen->is_format_supported(screen, tex->format, tex->target, 0, 0,1140PIPE_BIND_RENDER_TARGET)) {1141si_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height, false);1142} else {1143/* Software fallback - just for R9G9B9E5_FLOAT */1144util_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height);1145}1146}1147pipe_surface_reference(&sf, NULL);1148}11491150void si_init_clear_functions(struct si_context *sctx)1151{1152sctx->b.clear_render_target = si_clear_render_target;1153sctx->b.clear_texture = si_clear_texture;11541155if (sctx->has_graphics) {1156sctx->b.clear = si_clear;1157sctx->b.clear_depth_stencil = si_clear_depth_stencil;1158}1159}116011611162