Path: blob/21.2-virgl/src/amd/common/ac_surface.c
7132 views
/*1* Copyright © 2011 Red Hat All Rights Reserved.2* Copyright © 2017 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining6* a copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,14* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES15* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND16* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS17* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,19* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE20* USE OR OTHER DEALINGS IN THE SOFTWARE.21*22* The above copyright notice and this permission notice (including the23* next paragraph) shall be included in all copies or substantial portions24* of the Software.25*/2627#define AC_SURFACE_INCLUDE_NIR28#include "ac_surface.h"2930#include "ac_drm_fourcc.h"31#include "ac_gpu_info.h"32#include "addrlib/inc/addrinterface.h"33#include "addrlib/src/amdgpu_asic_addr.h"34#include "amd_family.h"35#include "sid.h"36#include "util/hash_table.h"37#include "util/macros.h"38#include "util/simple_mtx.h"39#include "util/u_atomic.h"40#include "util/format/u_format.h"41#include "util/u_math.h"42#include "util/u_memory.h"4344#include <errno.h>45#include <stdio.h>46#include <stdlib.h>4748#ifdef _WIN3249#define AMDGPU_TILING_ARRAY_MODE_SHIFT 050#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf51#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 452#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f53#define AMDGPU_TILING_TILE_SPLIT_SHIFT 954#define AMDGPU_TILING_TILE_SPLIT_MASK 0x755#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 1256#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x757#define AMDGPU_TILING_BANK_WIDTH_SHIFT 1558#define AMDGPU_TILING_BANK_WIDTH_MASK 0x359#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 1760#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x361#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 1962#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x363#define AMDGPU_TILING_NUM_BANKS_SHIFT 2164#define AMDGPU_TILING_NUM_BANKS_MASK 0x365#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 066#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f67#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 568#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF69#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 2970#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF71#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 4372#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x173#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 4474#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x175#define AMDGPU_TILING_SCANOUT_SHIFT 6376#define AMDGPU_TILING_SCANOUT_MASK 0x177#define AMDGPU_TILING_SET(field, value) \78(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)79#define AMDGPU_TILING_GET(value, field) \80(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)81#else82#include "drm-uapi/amdgpu_drm.h"83#endif8485#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND86#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A87#endif8889#ifndef CIASICIDGFXENGINE_ARCTICISLAND90#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D91#endif9293struct ac_addrlib {94ADDR_HANDLE handle;95};9697bool ac_modifier_has_dcc(uint64_t modifier)98{99return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);100}101102bool ac_modifier_has_dcc_retile(uint64_t modifier)103{104return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier);105}106107static108AddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier)109{110if (modifier == DRM_FORMAT_MOD_LINEAR)111return ADDR_SW_LINEAR;112113return AMD_FMT_MOD_GET(TILE, modifier);114}115static void116ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf,117ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info)118{119assert(ac_modifier_has_dcc(modifier));120121if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {122surf_info->flags.metaPipeUnaligned = 0;123} else {124surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);125}126127/* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on128* non-displayable DCC surfaces just because num_render_backends = 1 */129surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&130AMD_FMT_MOD_GET(RB, modifier) == 0 &&131surf_info->flags.metaPipeUnaligned;132133surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);134surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);135surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);136}137138bool ac_is_modifier_supported(const struct radeon_info *info,139const struct ac_modifier_options *options,140enum pipe_format format,141uint64_t modifier)142{143144if (util_format_is_compressed(format) ||145util_format_is_depth_or_stencil(format) ||146util_format_get_blocksizebits(format) > 64)147return false;148149if (info->chip_class < GFX9)150return false;151152if(modifier == DRM_FORMAT_MOD_LINEAR)153return true;154155/* GFX8 may need a different modifier for each plane */156if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1)157return false;158159uint32_t allowed_swizzles = 0xFFFFFFFF;160switch(info->chip_class) {161case GFX9:162allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;163break;164case GFX10:165case GFX10_3:166allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660;167break;168default:169return false;170}171172if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles))173return false;174175if (ac_modifier_has_dcc(modifier)) {176/* TODO: support multi-planar formats with DCC */177if (util_format_get_num_planes(format) > 1)178return false;179180if (!info->has_graphics)181return false;182183if (!options->dcc)184return false;185186if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile)187return false;188}189190return true;191}192193bool ac_get_supported_modifiers(const struct radeon_info *info,194const struct ac_modifier_options *options,195enum pipe_format format,196unsigned *mod_count,197uint64_t *mods)198{199unsigned current_mod = 0;200201#define ADD_MOD(name) \202if (ac_is_modifier_supported(info, options, format, (name))) { \203if (mods && current_mod < *mod_count) \204mods[current_mod] = (name); \205++current_mod; \206}207208/* The modifiers have to be added in descending order of estimated209* performance. The drivers will prefer modifiers that come earlier210* in the list. */211switch (info->chip_class) {212case GFX9: {213unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +214G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);215unsigned bank_xor_bits = MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits);216unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);217unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +218G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config);219220uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) |221AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |222AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |223AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) |224AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |225AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits);226227ADD_MOD(AMD_FMT_MOD |228AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |229AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |230AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |231common_dcc |232AMD_FMT_MOD_SET(PIPE, pipes) |233AMD_FMT_MOD_SET(RB, rb))234235ADD_MOD(AMD_FMT_MOD |236AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |237AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |238AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |239common_dcc |240AMD_FMT_MOD_SET(PIPE, pipes) |241AMD_FMT_MOD_SET(RB, rb))242243if (util_format_get_blocksizebits(format) == 32) {244if (info->max_render_backends == 1) {245ADD_MOD(AMD_FMT_MOD |246AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |247AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |248common_dcc);249}250251252ADD_MOD(AMD_FMT_MOD |253AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |254AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |255AMD_FMT_MOD_SET(DCC_RETILE, 1) |256common_dcc |257AMD_FMT_MOD_SET(PIPE, pipes) |258AMD_FMT_MOD_SET(RB, rb))259}260261262ADD_MOD(AMD_FMT_MOD |263AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |264AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |265AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |266AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));267268ADD_MOD(AMD_FMT_MOD |269AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |270AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |271AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |272AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));273274ADD_MOD(AMD_FMT_MOD |275AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |276AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));277278ADD_MOD(AMD_FMT_MOD |279AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |280AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));281282ADD_MOD(DRM_FORMAT_MOD_LINEAR)283break;284}285case GFX10:286case GFX10_3: {287bool rbplus = info->chip_class >= GFX10_3;288unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);289unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;290291unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10;292uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) |293AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |294AMD_FMT_MOD_SET(DCC, 1) |295AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |296AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |297AMD_FMT_MOD_SET(PACKERS, pkrs);298299ADD_MOD(AMD_FMT_MOD | common_dcc |300AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |301AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |302AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))303304if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) {305bool independent_128b = info->chip_class >= GFX10_3;306307if (info->max_render_backends == 1) {308ADD_MOD(AMD_FMT_MOD | common_dcc |309AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |310AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |311AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))312}313314ADD_MOD(AMD_FMT_MOD | common_dcc |315AMD_FMT_MOD_SET(DCC_RETILE, 1) |316AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |317AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |318AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))319}320321ADD_MOD(AMD_FMT_MOD |322AMD_FMT_MOD_SET(TILE_VERSION, version) |323AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |324AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |325AMD_FMT_MOD_SET(PACKERS, pkrs))326327ADD_MOD(AMD_FMT_MOD |328AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |329AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |330AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits))331332if (util_format_get_blocksizebits(format) != 32) {333ADD_MOD(AMD_FMT_MOD |334AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |335AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));336}337338ADD_MOD(AMD_FMT_MOD |339AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |340AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));341342ADD_MOD(DRM_FORMAT_MOD_LINEAR)343break;344}345default:346break;347}348349#undef ADD_MOD350351if (!mods) {352*mod_count = current_mod;353return true;354}355356bool complete = current_mod <= *mod_count;357*mod_count = MIN2(*mod_count, current_mod);358return complete;359}360361static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)362{363return malloc(pInput->sizeInBytes);364}365366static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)367{368free(pInput->pVirtAddr);369return ADDR_OK;370}371372struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,373uint64_t *max_alignment)374{375ADDR_CREATE_INPUT addrCreateInput = {0};376ADDR_CREATE_OUTPUT addrCreateOutput = {0};377ADDR_REGISTER_VALUE regValue = {0};378ADDR_CREATE_FLAGS createFlags = {{0}};379ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};380ADDR_E_RETURNCODE addrRet;381382addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);383addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);384385regValue.gbAddrConfig = info->gb_addr_config;386createFlags.value = 0;387388addrCreateInput.chipFamily = info->family_id;389addrCreateInput.chipRevision = info->chip_external_rev;390391if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)392return NULL;393394if (addrCreateInput.chipFamily >= FAMILY_AI) {395addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;396} else {397regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;398regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;399400regValue.backendDisables = info->enabled_rb_mask;401regValue.pTileConfig = info->si_tile_mode_array;402regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array);403if (addrCreateInput.chipFamily == FAMILY_SI) {404regValue.pMacroTileConfig = NULL;405regValue.noOfMacroEntries = 0;406} else {407regValue.pMacroTileConfig = info->cik_macrotile_mode_array;408regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array);409}410411createFlags.useTileIndex = 1;412createFlags.useHtileSliceAlign = 1;413414addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;415}416417addrCreateInput.callbacks.allocSysMem = allocSysMem;418addrCreateInput.callbacks.freeSysMem = freeSysMem;419addrCreateInput.callbacks.debugPrint = 0;420addrCreateInput.createFlags = createFlags;421addrCreateInput.regValue = regValue;422423addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);424if (addrRet != ADDR_OK)425return NULL;426427if (max_alignment) {428addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);429if (addrRet == ADDR_OK) {430*max_alignment = addrGetMaxAlignmentsOutput.baseAlign;431}432}433434struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));435if (!addrlib) {436AddrDestroy(addrCreateOutput.hLib);437return NULL;438}439440addrlib->handle = addrCreateOutput.hLib;441return addrlib;442}443444void ac_addrlib_destroy(struct ac_addrlib *addrlib)445{446AddrDestroy(addrlib->handle);447free(addrlib);448}449450void *ac_addrlib_get_handle(struct ac_addrlib *addrlib)451{452return addrlib->handle;453}454455static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)456{457/* FMASK is allocated together with the color surface and can't be458* allocated separately.459*/460assert(!(flags & RADEON_SURF_FMASK));461if (flags & RADEON_SURF_FMASK)462return -EINVAL;463464/* all dimension must be at least 1 ! */465if (!config->info.width || !config->info.height || !config->info.depth ||466!config->info.array_size || !config->info.levels)467return -EINVAL;468469switch (config->info.samples) {470case 0:471case 1:472case 2:473case 4:474case 8:475break;476case 16:477if (flags & RADEON_SURF_Z_OR_SBUFFER)478return -EINVAL;479break;480default:481return -EINVAL;482}483484if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {485switch (config->info.storage_samples) {486case 0:487case 1:488case 2:489case 4:490case 8:491break;492default:493return -EINVAL;494}495}496497if (config->is_3d && config->info.array_size > 1)498return -EINVAL;499if (config->is_cube && config->info.depth > 1)500return -EINVAL;501502return 0;503}504505static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,506struct radeon_surf *surf, bool is_stencil, unsigned level,507bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,508ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,509ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,510ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,511ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,512ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)513{514struct legacy_surf_level *surf_level;515struct legacy_surf_dcc_level *dcc_level;516ADDR_E_RETURNCODE ret;517518AddrSurfInfoIn->mipLevel = level;519AddrSurfInfoIn->width = u_minify(config->info.width, level);520AddrSurfInfoIn->height = u_minify(config->info.height, level);521522/* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,523* because GFX9 needs linear alignment of 256 bytes.524*/525if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&526AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {527unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);528529AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);530}531532/* addrlib assumes the bytes/pixel is a divisor of 64, which is not533* true for r32g32b32 formats. */534if (AddrSurfInfoIn->bpp == 96) {535assert(config->info.levels == 1);536assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);537538/* The least common multiple of 64 bytes and 12 bytes/pixel is539* 192 bytes, or 16 pixels. */540AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);541}542543if (config->is_3d)544AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);545else if (config->is_cube)546AddrSurfInfoIn->numSlices = 6;547else548AddrSurfInfoIn->numSlices = config->info.array_size;549550if (level > 0) {551/* Set the base level pitch. This is needed for calculation552* of non-zero levels. */553if (is_stencil)554AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x;555else556AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;557558/* Convert blocks to pixels for compressed formats. */559if (compressed)560AddrSurfInfoIn->basePitch *= surf->blk_w;561}562563ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);564if (ret != ADDR_OK) {565return ret;566}567568surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level];569dcc_level = &surf->u.legacy.color.dcc_level[level];570surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256;571surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;572surf_level->nblk_x = AddrSurfInfoOut->pitch;573surf_level->nblk_y = AddrSurfInfoOut->height;574575switch (AddrSurfInfoOut->tileMode) {576case ADDR_TM_LINEAR_ALIGNED:577surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;578break;579case ADDR_TM_1D_TILED_THIN1:580case ADDR_TM_PRT_TILED_THIN1:581surf_level->mode = RADEON_SURF_MODE_1D;582break;583case ADDR_TM_2D_TILED_THIN1:584case ADDR_TM_PRT_2D_TILED_THIN1:585surf_level->mode = RADEON_SURF_MODE_2D;586break;587default:588assert(0);589}590591if (is_stencil)592surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;593else594surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;595596if (AddrSurfInfoIn->flags.prt) {597if (level == 0) {598surf->prt_tile_width = AddrSurfInfoOut->pitchAlign;599surf->prt_tile_height = AddrSurfInfoOut->heightAlign;600}601if (surf_level->nblk_x >= surf->prt_tile_width &&602surf_level->nblk_y >= surf->prt_tile_height) {603/* +1 because the current level is not in the miptail */604surf->first_mip_tail_level = level + 1;605}606}607608surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize;609610/* Clear DCC fields at the beginning. */611if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil)612dcc_level->dcc_offset = 0;613614/* The previous level's flag tells us if we can use DCC for this level. */615if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {616bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;617618AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;619AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;620AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;621AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;622AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;623624ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);625626if (ret == ADDR_OK) {627dcc_level->dcc_offset = surf->meta_size;628surf->num_meta_levels = level + 1;629surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize;630surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign));631632/* If the DCC size of a subresource (1 mip level or 1 slice)633* is not aligned, the DCC memory layout is not contiguous for634* that subresource, which means we can't use fast clear.635*636* We only do fast clears for whole mipmap levels. If we did637* per-slice fast clears, the same restriction would apply.638* (i.e. only compute the slice size and see if it's aligned)639*640* The last level can be non-contiguous and still be clearable641* if it's interleaved with the next level that doesn't exist.642*/643if (AddrDccOut->dccRamSizeAligned ||644(prev_level_clearable && level == config->info.levels - 1))645dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;646else647dcc_level->dcc_fast_clear_size = 0;648649/* Compute the DCC slice size because addrlib doesn't650* provide this info. As DCC memory is linear (each651* slice is the same size) it's easy to compute.652*/653surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size;654655/* For arrays, we have to compute the DCC info again656* with one slice size to get a correct fast clear657* size.658*/659if (config->info.array_size > 1) {660AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;661AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;662AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;663AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;664AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;665666ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);667if (ret == ADDR_OK) {668/* If the DCC memory isn't properly669* aligned, the data are interleaved670* accross slices.671*/672if (AddrDccOut->dccRamSizeAligned)673dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;674else675dcc_level->dcc_slice_fast_clear_size = 0;676}677678if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&679surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) {680surf->meta_size = 0;681surf->num_meta_levels = 0;682AddrDccOut->subLvlCompressible = false;683}684} else {685dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size;686}687}688}689690/* HTILE. */691if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D &&692level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {693AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;694AddrHtileIn->pitch = AddrSurfInfoOut->pitch;695AddrHtileIn->height = AddrSurfInfoOut->height;696AddrHtileIn->numSlices = AddrSurfInfoOut->depth;697AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;698AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;699AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;700AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;701AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;702703ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);704705if (ret == ADDR_OK) {706surf->meta_size = AddrHtileOut->htileBytes;707surf->meta_slice_size = AddrHtileOut->sliceSize;708surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign);709surf->meta_pitch = AddrHtileOut->pitch;710surf->num_meta_levels = level + 1;711}712}713714return 0;715}716717static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)718{719uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];720721if (info->chip_class >= GFX7)722surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);723else724surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);725}726727static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)728{729unsigned index, tileb;730731tileb = 8 * 8 * surf->bpe;732tileb = MIN2(surf->u.legacy.tile_split, tileb);733734for (index = 0; tileb > 64; index++)735tileb >>= 1;736737assert(index < 16);738return index;739}740741static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)742{743unsigned num_channels = config->info.num_channels;744unsigned bpe = surf->bpe;745746/* With modifiers the kernel is in charge of whether it is displayable.747* We need to ensure at least 32 pixels pitch alignment, but this is748* always the case when the blocksize >= 4K.749*/750if (surf->modifier != DRM_FORMAT_MOD_INVALID)751return false;752753if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&754surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&755surf->blk_h == 1) {756/* subsampled */757if (surf->blk_w == 2 && surf->blk_h == 1)758return true;759760if (/* RGBA8 or RGBA16F */761(bpe >= 4 && bpe <= 8 && num_channels == 4) ||762/* R5G6B5 or R5G5B5A1 */763(bpe == 2 && num_channels >= 3) ||764/* C8 palette */765(bpe == 1 && num_channels == 1))766return true;767}768return false;769}770771/**772* This must be called after the first level is computed.773*774* Copy surface-global settings like pipe/bank config from level 0 surface775* computation, and compute tile swizzle.776*/777static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,778const struct ac_surf_config *config,779ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)780{781surf->surf_alignment_log2 = util_logbase2(csio->baseAlign);782surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;783gfx6_set_micro_tile_mode(surf, info);784785/* For 2D modes only. */786if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {787surf->u.legacy.bankw = csio->pTileInfo->bankWidth;788surf->u.legacy.bankh = csio->pTileInfo->bankHeight;789surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;790surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;791surf->u.legacy.num_banks = csio->pTileInfo->banks;792surf->u.legacy.macro_tile_index = csio->macroModeIndex;793} else {794surf->u.legacy.macro_tile_index = 0;795}796797/* Compute tile swizzle. */798/* TODO: fix tile swizzle with mipmapping for GFX6 */799if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index &&800surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&801!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&802!get_display_flag(config, surf)) {803ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};804ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};805806AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);807AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);808809AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;810AddrBaseSwizzleIn.tileIndex = csio->tileIndex;811AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;812AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;813AddrBaseSwizzleIn.tileMode = csio->tileMode;814815int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);816if (r != ADDR_OK)817return r;818819assert(AddrBaseSwizzleOut.tileSwizzle <=820u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));821surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;822}823return 0;824}825826static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,827struct radeon_surf *surf)828{829unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;830unsigned num_pipes = info->num_tile_pipes;831unsigned cl_width, cl_height;832833if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||834(config->info.samples >= 2 && !surf->fmask_size))835return;836837assert(info->chip_class <= GFX8);838839switch (num_pipes) {840case 2:841cl_width = 32;842cl_height = 16;843break;844case 4:845cl_width = 32;846cl_height = 32;847break;848case 8:849cl_width = 64;850cl_height = 32;851break;852case 16: /* Hawaii */853cl_width = 64;854cl_height = 64;855break;856default:857assert(0);858return;859}860861unsigned base_align = num_pipes * pipe_interleave_bytes;862863unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);864unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);865unsigned slice_elements = (width * height) / (8 * 8);866867/* Each element of CMASK is a nibble. */868unsigned slice_bytes = slice_elements / 2;869870surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128);871if (surf->u.legacy.color.cmask_slice_tile_max)872surf->u.legacy.color.cmask_slice_tile_max -= 1;873874unsigned num_layers;875if (config->is_3d)876num_layers = config->info.depth;877else if (config->is_cube)878num_layers = 6;879else880num_layers = config->info.array_size;881882surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align));883surf->cmask_slice_size = align(slice_bytes, base_align);884surf->cmask_size = surf->cmask_slice_size * num_layers;885}886887/**888* Fill in the tiling information in \p surf based on the given surface config.889*890* The following fields of \p surf must be initialized by the caller:891* blk_w, blk_h, bpe, flags.892*/893static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,894const struct ac_surf_config *config, enum radeon_surf_mode mode,895struct radeon_surf *surf)896{897unsigned level;898bool compressed;899ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};900ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};901ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};902ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};903ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};904ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};905ADDR_TILEINFO AddrTileInfoIn = {0};906ADDR_TILEINFO AddrTileInfoOut = {0};907int r;908909AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);910AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);911AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);912AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);913AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);914AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);915AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;916917compressed = surf->blk_w == 4 && surf->blk_h == 4;918919/* MSAA requires 2D tiling. */920if (config->info.samples > 1)921mode = RADEON_SURF_MODE_2D;922923/* DB doesn't support linear layouts. */924if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)925mode = RADEON_SURF_MODE_1D;926927/* Set the requested tiling mode. */928switch (mode) {929case RADEON_SURF_MODE_LINEAR_ALIGNED:930AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;931break;932case RADEON_SURF_MODE_1D:933if (surf->flags & RADEON_SURF_PRT)934AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1;935else936AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;937break;938case RADEON_SURF_MODE_2D:939if (surf->flags & RADEON_SURF_PRT)940AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1;941else942AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;943break;944default:945assert(0);946}947948/* The format must be set correctly for the allocation of compressed949* textures to work. In other cases, setting the bpp is sufficient.950*/951if (compressed) {952switch (surf->bpe) {953case 8:954AddrSurfInfoIn.format = ADDR_FMT_BC1;955break;956case 16:957AddrSurfInfoIn.format = ADDR_FMT_BC3;958break;959default:960assert(0);961}962} else {963AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;964}965966AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);967AddrSurfInfoIn.tileIndex = -1;968969if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {970AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);971}972973/* Set the micro tile type. */974if (surf->flags & RADEON_SURF_SCANOUT)975AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;976else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)977AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;978else979AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;980981AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);982AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;983AddrSurfInfoIn.flags.cube = config->is_cube;984AddrSurfInfoIn.flags.display = get_display_flag(config, surf);985AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;986AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;987AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;988989/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been990* requested, because TC-compatible HTILE requires 2D tiling.991*/992AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&993!AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&994!(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);995996/* DCC notes:997* - If we add MSAA support, keep in mind that CB can't decompress 8bpp998* with samples >= 4.999* - Mipmapped array textures have low performance (discovered by a closed1000* driver team).1001*/1002AddrSurfInfoIn.flags.dccCompatible =1003info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */1004!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&1005!compressed &&1006((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);10071008AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;1009AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);10101011/* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)1012* for Z and stencil. This can cause a number of problems which we work1013* around here:1014*1015* - a depth part that is incompatible with mipmapped texturing1016* - at least on Stoney, entirely incompatible Z/S aspects (e.g.1017* incorrect tiling applied to the stencil part, stencil buffer1018* memory accesses that go out of bounds) even without mipmapping1019*1020* Some piglit tests that are prone to different types of related1021* failures:1022* ./bin/ext_framebuffer_multisample-upsample 2 stencil1023* ./bin/framebuffer-blit-levels {draw,read} stencil1024* ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}1025* ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}1026* ./bin/depthstencil-render-miplevels 1024 d=s=z24_s81027*/1028int stencil_tile_idx = -1;10291030if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&1031(config->info.levels > 1 || info->family == CHIP_STONEY)) {1032/* Compute stencilTileIdx that is compatible with the (depth)1033* tileIdx. This degrades the depth surface if necessary to1034* ensure that a matching stencilTileIdx exists. */1035AddrSurfInfoIn.flags.matchStencilTileCfg = 1;10361037/* Keep the depth mip-tail compatible with texturing. */1038AddrSurfInfoIn.flags.noStencil = 1;1039}10401041/* Set preferred macrotile parameters. This is usually required1042* for shared resources. This is for 2D tiling only. */1043if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&1044AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&1045surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {1046/* If any of these parameters are incorrect, the calculation1047* will fail. */1048AddrTileInfoIn.banks = surf->u.legacy.num_banks;1049AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;1050AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;1051AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;1052AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;1053AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */1054AddrSurfInfoIn.flags.opt4Space = 0;1055AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;10561057/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set1058* the tile index, because we are expected to know it if1059* we know the other parameters.1060*1061* This is something that can easily be fixed in Addrlib.1062* For now, just figure it out here.1063* Note that only 2D_TILE_THIN1 is handled here.1064*/1065assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));1066assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);10671068if (info->chip_class == GFX6) {1069if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {1070if (surf->bpe == 2)1071AddrSurfInfoIn.tileIndex = 11; /* 16bpp */1072else1073AddrSurfInfoIn.tileIndex = 12; /* 32bpp */1074} else {1075if (surf->bpe == 1)1076AddrSurfInfoIn.tileIndex = 14; /* 8bpp */1077else if (surf->bpe == 2)1078AddrSurfInfoIn.tileIndex = 15; /* 16bpp */1079else if (surf->bpe == 4)1080AddrSurfInfoIn.tileIndex = 16; /* 32bpp */1081else1082AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */1083}1084} else {1085/* GFX7 - GFX8 */1086if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)1087AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */1088else1089AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */10901091/* Addrlib doesn't set this if tileIndex is forced like above. */1092AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);1093}1094}10951096surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);1097surf->num_meta_levels = 0;1098surf->surf_size = 0;1099surf->meta_size = 0;1100surf->meta_slice_size = 0;1101surf->meta_alignment_log2 = 0;11021103const bool only_stencil =1104(surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);11051106/* Calculate texture layout information. */1107if (!only_stencil) {1108for (level = 0; level < config->info.levels; level++) {1109r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,1110&AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,1111&AddrHtileOut);1112if (r)1113return r;11141115if (level > 0)1116continue;11171118if (!AddrSurfInfoOut.tcCompatible) {1119AddrSurfInfoIn.flags.tcCompatible = 0;1120surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;1121}11221123if (AddrSurfInfoIn.flags.matchStencilTileCfg) {1124AddrSurfInfoIn.flags.matchStencilTileCfg = 0;1125AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;1126stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;11271128assert(stencil_tile_idx >= 0);1129}11301131r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);1132if (r)1133return r;1134}1135}11361137/* Calculate texture layout information for stencil. */1138if (surf->flags & RADEON_SURF_SBUFFER) {1139AddrSurfInfoIn.tileIndex = stencil_tile_idx;1140AddrSurfInfoIn.bpp = 8;1141AddrSurfInfoIn.flags.depth = 0;1142AddrSurfInfoIn.flags.stencil = 1;1143AddrSurfInfoIn.flags.tcCompatible = 0;1144/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */1145AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;11461147for (level = 0; level < config->info.levels; level++) {1148r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,1149&AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);1150if (r)1151return r;11521153/* DB uses the depth pitch for both stencil and depth. */1154if (!only_stencil) {1155if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)1156surf->u.legacy.stencil_adjusted = true;1157} else {1158surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x;1159}11601161if (level == 0) {1162if (only_stencil) {1163r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);1164if (r)1165return r;1166}11671168/* For 2D modes only. */1169if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {1170surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;1171}1172}1173}1174}11751176/* Compute FMASK. */1177if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&1178!(surf->flags & RADEON_SURF_NO_FMASK)) {1179ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};1180ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};1181ADDR_TILEINFO fmask_tile_info = {0};11821183fin.size = sizeof(fin);1184fout.size = sizeof(fout);11851186fin.tileMode = AddrSurfInfoOut.tileMode;1187fin.pitch = AddrSurfInfoOut.pitch;1188fin.height = config->info.height;1189fin.numSlices = AddrSurfInfoIn.numSlices;1190fin.numSamples = AddrSurfInfoIn.numSamples;1191fin.numFrags = AddrSurfInfoIn.numFrags;1192fin.tileIndex = -1;1193fout.pTileInfo = &fmask_tile_info;11941195r = AddrComputeFmaskInfo(addrlib, &fin, &fout);1196if (r)1197return r;11981199surf->fmask_size = fout.fmaskBytes;1200surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);1201surf->fmask_slice_size = fout.sliceSize;1202surf->fmask_tile_swizzle = 0;12031204surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;1205if (surf->u.legacy.color.fmask.slice_tile_max)1206surf->u.legacy.color.fmask.slice_tile_max -= 1;12071208surf->u.legacy.color.fmask.tiling_index = fout.tileIndex;1209surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight;1210surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch;12111212/* Compute tile swizzle for FMASK. */1213if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {1214ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};1215ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};12161217xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);1218xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);12191220/* This counter starts from 1 instead of 0. */1221xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);1222xin.tileIndex = fout.tileIndex;1223xin.macroModeIndex = fout.macroModeIndex;1224xin.pTileInfo = fout.pTileInfo;1225xin.tileMode = fin.tileMode;12261227int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);1228if (r != ADDR_OK)1229return r;12301231assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));1232surf->fmask_tile_swizzle = xout.tileSwizzle;1233}1234}12351236/* Recalculate the whole DCC miptree size including disabled levels.1237* This is what addrlib does, but calling addrlib would be a lot more1238* complicated.1239*/1240if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {1241/* The smallest miplevels that are never compressed by DCC1242* still read the DCC buffer via TC if the base level uses DCC,1243* and for some reason the DCC buffer needs to be larger if1244* the miptree uses non-zero tile_swizzle. Otherwise there are1245* VM faults.1246*1247* "dcc_alignment * 4" was determined by trial and error.1248*/1249surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4);1250}12511252/* Make sure HTILE covers the whole miptree, because the shader reads1253* TC-compatible HTILE even for levels where it's disabled by DB.1254*/1255if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) &&1256surf->meta_size && config->info.levels > 1) {1257/* MSAA can't occur with levels > 1, so ignore the sample count. */1258const unsigned total_pixels = surf->surf_size / surf->bpe;1259const unsigned htile_block_size = 8 * 8;1260const unsigned htile_element_size = 4;12611262surf->meta_size = (total_pixels / htile_block_size) * htile_element_size;1263surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2);1264} else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) {1265/* Unset this if HTILE is not present. */1266surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;1267}12681269surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;1270surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||1271surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;12721273/* The rotated micro tile mode doesn't work if both CMASK and RB+ are1274* used at the same time. This case is not currently expected to occur1275* because we don't use rotated. Enforce this restriction on all chips1276* to facilitate testing.1277*/1278if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {1279assert(!"rotate micro tile mode is unsupported");1280return ADDR_ERROR;1281}12821283ac_compute_cmask(info, config, surf);1284return 0;1285}12861287/* This is only called when expecting a tiled layout. */1288static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info,1289struct radeon_surf *surf,1290ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,1291AddrSwizzleMode *swizzle_mode)1292{1293ADDR_E_RETURNCODE ret;1294ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};1295ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};12961297sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);1298sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);12991300sin.flags = in->flags;1301sin.resourceType = in->resourceType;1302sin.format = in->format;1303sin.resourceLoction = ADDR_RSRC_LOC_INVIS;1304/* TODO: We could allow some of these: */1305sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */1306sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */1307sin.bpp = in->bpp;1308sin.width = in->width;1309sin.height = in->height;1310sin.numSlices = in->numSlices;1311sin.numMipLevels = in->numMipLevels;1312sin.numSamples = in->numSamples;1313sin.numFrags = in->numFrags;13141315if (is_fmask) {1316sin.flags.display = 0;1317sin.flags.color = 0;1318sin.flags.fmask = 1;1319}13201321/* With PRT images we want to force 64 KiB block size so that the image1322* created is consistent with the format properties returned in Vulkan1323* independent of the image. */1324if (sin.flags.prt) {1325sin.forbiddenBlock.macroThin4KB = 1;1326sin.forbiddenBlock.macroThick4KB = 1;1327sin.forbiddenBlock.linear = 1;1328}13291330if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {1331sin.forbiddenBlock.linear = 1;13321333if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)1334sin.preferredSwSet.sw_D = 1;1335else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)1336sin.preferredSwSet.sw_S = 1;1337else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)1338sin.preferredSwSet.sw_Z = 1;1339else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)1340sin.preferredSwSet.sw_R = 1;1341}13421343if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {1344/* 3D textures should use S swizzle modes for the best performance.1345* THe only exception is 3D render targets, which prefer 64KB_D_X.1346*1347* 3D texture sampler performance with a very large 3D texture:1348* ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off)1349* ADDR_SW_64KB_Z_X = 25 FPS1350* ADDR_SW_64KB_D_X = 53 FPS1351* ADDR_SW_4KB_S = 53 FPS1352* ADDR_SW_64KB_S = 53 FPS1353* ADDR_SW_64KB_S_T = 61 FPS1354* ADDR_SW_4KB_S_X = 63 FPS1355* ADDR_SW_64KB_S_X = 62 FPS1356*/1357sin.preferredSwSet.sw_S = 1;1358}13591360ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);1361if (ret != ADDR_OK)1362return ret;13631364*swizzle_mode = sout.swizzleMode;1365return 0;1366}13671368static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)1369{1370if (info->chip_class >= GFX10)1371return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;13721373return sw_mode != ADDR_SW_LINEAR;1374}13751376ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,1377const struct radeon_surf *surf)1378{1379if (info->chip_class <= GFX9) {1380/* Only independent 64B blocks are supported. */1381return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks &&1382surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;1383}13841385if (info->family == CHIP_NAVI10) {1386/* Only independent 128B blocks are supported. */1387return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks &&1388surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;1389}13901391if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {1392/* Either 64B or 128B can be used, but not both.1393* If 64B is used, DCC image stores are unsupported.1394*/1395return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks &&1396(!surf->u.gfx9.color.dcc.independent_64B_blocks ||1397surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&1398(!surf->u.gfx9.color.dcc.independent_128B_blocks ||1399surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);1400}14011402/* 128B is recommended, but 64B can be set too if needed for 4K by DCN.1403* Since there is no reason to ever disable 128B, require it.1404* DCC image stores are always supported.1405*/1406return surf->u.gfx9.color.dcc.independent_128B_blocks &&1407surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;1408}14091410static bool is_dcc_supported_by_DCN(const struct radeon_info *info,1411const struct ac_surf_config *config,1412const struct radeon_surf *surf, bool rb_aligned,1413bool pipe_aligned)1414{1415if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)1416return false;14171418/* 16bpp and 64bpp are more complicated, so they are disallowed for now. */1419if (surf->bpe != 4)1420return false;14211422/* Handle unaligned DCC. */1423if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))1424return false;14251426switch (info->chip_class) {1427case GFX9:1428/* Only support 64KB_S_X, so that we have only 1 variant of the retile shader. */1429if (info->use_display_dcc_with_retile_blit &&1430surf->u.gfx9.swizzle_mode != ADDR_SW_64KB_S_X)1431return false;14321433/* There are more constraints, but we always set1434* INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,1435* which always works.1436*/1437assert(surf->u.gfx9.color.dcc.independent_64B_blocks &&1438surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);1439return true;1440case GFX10:1441case GFX10_3:1442/* Only support 64KB_R_X, so that we have only 1 variant of the retile shader. */1443if (info->use_display_dcc_with_retile_blit &&1444surf->u.gfx9.swizzle_mode != ADDR_SW_64KB_R_X)1445return false;14461447/* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */1448if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)1449return false;14501451/* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */1452return ((config->info.width <= 2560 && config->info.height <= 2560) ||1453(surf->u.gfx9.color.dcc.independent_64B_blocks &&1454surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));1455default:1456unreachable("unhandled chip");1457return false;1458}1459}14601461static void ac_copy_dcc_equation(const struct radeon_info *info,1462ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc,1463struct gfx9_meta_equation *equation)1464{1465equation->meta_block_width = dcc->metaBlkWidth;1466equation->meta_block_height = dcc->metaBlkHeight;1467equation->meta_block_depth = dcc->metaBlkDepth;14681469if (info->chip_class >= GFX10) {1470/* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */1471for (unsigned i = 0; i < 4; i++)1472assert(dcc->equation.gfx10_bits[i] == 0);14731474for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++)1475assert(dcc->equation.gfx10_bits[i] == 0);14761477memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4,1478sizeof(equation->u.gfx10_bits));1479} else {1480assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));14811482equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits;1483equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits;1484for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {1485for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {1486equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim;1487equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord;1488}1489}1490}1491}14921493static void ac_copy_htile_equation(const struct radeon_info *info,1494ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile,1495struct gfx9_meta_equation *equation)1496{1497equation->meta_block_width = htile->metaBlkWidth;1498equation->meta_block_height = htile->metaBlkHeight;14991500/* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */1501for (unsigned i = 0; i < 8; i++)1502assert(htile->equation.gfx10_bits[i] == 0);15031504for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++)1505assert(htile->equation.gfx10_bits[i] == 0);15061507memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8,1508sizeof(equation->u.gfx10_bits));1509}15101511static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,1512const struct ac_surf_config *config, struct radeon_surf *surf,1513bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)1514{1515ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};1516ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};1517ADDR_E_RETURNCODE ret;15181519out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);1520out.pMipInfo = mip_info;15211522ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);1523if (ret != ADDR_OK)1524return ret;15251526if (in->flags.prt) {1527surf->prt_tile_width = out.blockWidth;1528surf->prt_tile_height = out.blockHeight;15291530for (surf->first_mip_tail_level = 0; surf->first_mip_tail_level < in->numMipLevels;1531++surf->first_mip_tail_level) {1532if(mip_info[surf->first_mip_tail_level].pitch < out.blockWidth ||1533mip_info[surf->first_mip_tail_level].height < out.blockHeight)1534break;1535}15361537for (unsigned i = 0; i < in->numMipLevels; i++) {1538surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;15391540if (info->chip_class >= GFX10)1541surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;1542else1543surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;1544}1545}15461547if (in->flags.stencil) {1548surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;1549surf->u.gfx9.zs.stencil_epitch =1550out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;1551surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));1552surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);1553surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;1554return 0;1555}15561557surf->u.gfx9.swizzle_mode = in->swizzleMode;1558surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;15591560/* CMASK fast clear uses these even if FMASK isn't allocated.1561* FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.1562*/1563if (!in->flags.depth) {1564surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3;1565surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch;1566}15671568surf->u.gfx9.surf_slice_size = out.sliceSize;1569surf->u.gfx9.surf_pitch = out.pitch;1570surf->u.gfx9.surf_height = out.height;1571surf->surf_size = out.surfSize;1572surf->surf_alignment_log2 = util_logbase2(out.baseAlign);15731574if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&1575surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {1576/* Adjust surf_pitch to be in elements units not in pixels */1577surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);1578surf->u.gfx9.epitch =1579MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);1580/* The surface is really a surf->bpe bytes per pixel surface even if we1581* use it as a surf->bpe bytes per element one.1582* Adjust surf_slice_size and surf_size to reflect the change1583* made to surf_pitch.1584*/1585surf->u.gfx9.surf_slice_size =1586MAX2(surf->u.gfx9.surf_slice_size,1587surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);1588surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;1589}15901591if (in->swizzleMode == ADDR_SW_LINEAR) {1592for (unsigned i = 0; i < in->numMipLevels; i++) {1593surf->u.gfx9.offset[i] = mip_info[i].offset;1594surf->u.gfx9.pitch[i] = mip_info[i].pitch;1595}1596}15971598surf->u.gfx9.base_mip_width = mip_info[0].pitch;1599surf->u.gfx9.base_mip_height = mip_info[0].height;16001601if (in->flags.depth) {1602assert(in->swizzleMode != ADDR_SW_LINEAR);16031604if (surf->flags & RADEON_SURF_NO_HTILE)1605return 0;16061607/* HTILE */1608ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};1609ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};1610ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};16111612hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);1613hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);1614hout.pMipInfo = meta_mip_info;16151616assert(in->flags.metaPipeUnaligned == 0);1617assert(in->flags.metaRbUnaligned == 0);16181619hin.hTileFlags.pipeAligned = 1;1620hin.hTileFlags.rbAligned = 1;1621hin.depthFlags = in->flags;1622hin.swizzleMode = in->swizzleMode;1623hin.unalignedWidth = in->width;1624hin.unalignedHeight = in->height;1625hin.numSlices = in->numSlices;1626hin.numMipLevels = in->numMipLevels;1627hin.firstMipIdInTail = out.firstMipIdInTail;16281629ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);1630if (ret != ADDR_OK)1631return ret;16321633surf->meta_size = hout.htileBytes;1634surf->meta_slice_size = hout.sliceSize;1635surf->meta_alignment_log2 = util_logbase2(hout.baseAlign);1636surf->meta_pitch = hout.pitch;1637surf->num_meta_levels = in->numMipLevels;16381639for (unsigned i = 0; i < in->numMipLevels; i++) {1640surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;1641surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;16421643if (meta_mip_info[i].inMiptail) {1644/* GFX10 can only compress the first level1645* in the mip tail.1646*/1647surf->num_meta_levels = i + 1;1648break;1649}1650}16511652if (!surf->num_meta_levels)1653surf->meta_size = 0;16541655if (info->chip_class >= GFX10)1656ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);1657return 0;1658}16591660{1661/* Compute tile swizzle for the color surface.1662* All *_X and *_T modes can use the swizzle.1663*/1664if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&1665!(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {1666ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};1667ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};16681669xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);1670xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);16711672xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;1673xin.flags = in->flags;1674xin.swizzleMode = in->swizzleMode;1675xin.resourceType = in->resourceType;1676xin.format = in->format;1677xin.numSamples = in->numSamples;1678xin.numFrags = in->numFrags;16791680ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);1681if (ret != ADDR_OK)1682return ret;16831684assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));1685surf->tile_swizzle = xout.pipeBankXor;1686}16871688/* DCC */1689if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&1690is_dcc_supported_by_CB(info, in->swizzleMode) &&1691(!in->flags.display ||1692is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,1693!in->flags.metaPipeUnaligned)) &&1694(surf->modifier == DRM_FORMAT_MOD_INVALID ||1695ac_modifier_has_dcc(surf->modifier))) {1696ADDR2_COMPUTE_DCCINFO_INPUT din = {0};1697ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};1698ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};16991700din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);1701dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);1702dout.pMipInfo = meta_mip_info;17031704din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;1705din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;1706din.resourceType = in->resourceType;1707din.swizzleMode = in->swizzleMode;1708din.bpp = in->bpp;1709din.unalignedWidth = in->width;1710din.unalignedHeight = in->height;1711din.numSlices = in->numSlices;1712din.numFrags = in->numFrags;1713din.numMipLevels = in->numMipLevels;1714din.dataSurfaceSize = out.surfSize;1715din.firstMipIdInTail = out.firstMipIdInTail;17161717ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);1718if (ret != ADDR_OK)1719return ret;17201721surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned;1722surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;1723surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth;1724surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight;1725surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth;1726surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1;1727surf->u.gfx9.color.dcc_height = dout.height;1728surf->meta_size = dout.dccRamSize;1729surf->meta_slice_size = dout.dccRamSliceSize;1730surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);1731surf->num_meta_levels = in->numMipLevels;17321733/* Disable DCC for levels that are in the mip tail.1734*1735* There are two issues that this is intended to1736* address:1737*1738* 1. Multiple mip levels may share a cache line. This1739* can lead to corruption when switching between1740* rendering to different mip levels because the1741* RBs don't maintain coherency.1742*1743* 2. Texturing with metadata after rendering sometimes1744* fails with corruption, probably for a similar1745* reason.1746*1747* Working around these issues for all levels in the1748* mip tail may be overly conservative, but it's what1749* Vulkan does.1750*1751* Alternative solutions that also work but are worse:1752* - Disable DCC entirely.1753* - Flush TC L2 after rendering.1754*/1755for (unsigned i = 0; i < in->numMipLevels; i++) {1756surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;1757surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;17581759if (meta_mip_info[i].inMiptail) {1760/* GFX10 can only compress the first level1761* in the mip tail.1762*1763* TODO: Try to do the same thing for gfx91764* if there are no regressions.1765*/1766if (info->chip_class >= GFX10)1767surf->num_meta_levels = i + 1;1768else1769surf->num_meta_levels = i;1770break;1771}1772}17731774if (!surf->num_meta_levels)1775surf->meta_size = 0;17761777surf->u.gfx9.color.display_dcc_size = surf->meta_size;1778surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2;1779surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max;1780surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height;17811782if (in->resourceType == ADDR_RSRC_TEX_2D)1783ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation);17841785/* Compute displayable DCC. */1786if (((in->flags.display && info->use_display_dcc_with_retile_blit) ||1787ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) {1788/* Compute displayable DCC info. */1789din.dccKeyFlags.pipeAligned = 0;1790din.dccKeyFlags.rbAligned = 0;17911792assert(din.numSlices == 1);1793assert(din.numMipLevels == 1);1794assert(din.numFrags == 1);1795assert(surf->tile_swizzle == 0);1796assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);17971798ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);1799if (ret != ADDR_OK)1800return ret;18011802surf->u.gfx9.color.display_dcc_size = dout.dccRamSize;1803surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);1804surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1;1805surf->u.gfx9.color.display_dcc_height = dout.height;1806assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size);18071808ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation);1809surf->u.gfx9.color.dcc.display_equation_valid = true;1810}1811}18121813/* FMASK */1814if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {1815ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};1816ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};18171818fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);1819fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);18201821ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode);1822if (ret != ADDR_OK)1823return ret;18241825fin.unalignedWidth = in->width;1826fin.unalignedHeight = in->height;1827fin.numSlices = in->numSlices;1828fin.numSamples = in->numSamples;1829fin.numFrags = in->numFrags;18301831ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);1832if (ret != ADDR_OK)1833return ret;18341835surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode;1836surf->u.gfx9.color.fmask_epitch = fout.pitch - 1;1837surf->fmask_size = fout.fmaskBytes;1838surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);1839surf->fmask_slice_size = fout.sliceSize;18401841/* Compute tile swizzle for the FMASK surface. */1842if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&1843!(surf->flags & RADEON_SURF_SHAREABLE)) {1844ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};1845ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};18461847xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);1848xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);18491850/* This counter starts from 1 instead of 0. */1851xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);1852xin.flags = in->flags;1853xin.swizzleMode = fin.swizzleMode;1854xin.resourceType = in->resourceType;1855xin.format = in->format;1856xin.numSamples = in->numSamples;1857xin.numFrags = in->numFrags;18581859ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);1860if (ret != ADDR_OK)1861return ret;18621863assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));1864surf->fmask_tile_swizzle = xout.pipeBankXor;1865}1866}18671868/* CMASK -- on GFX10 only for FMASK */1869if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&1870((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&1871in->flags.metaRbUnaligned == 0) ||1872(surf->fmask_size && in->numSamples >= 2))) {1873ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};1874ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};1875ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};18761877cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);1878cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);1879cout.pMipInfo = meta_mip_info;18801881assert(in->flags.metaPipeUnaligned == 0);1882assert(in->flags.metaRbUnaligned == 0);18831884cin.cMaskFlags.pipeAligned = 1;1885cin.cMaskFlags.rbAligned = 1;1886cin.resourceType = in->resourceType;1887cin.unalignedWidth = in->width;1888cin.unalignedHeight = in->height;1889cin.numSlices = in->numSlices;1890cin.numMipLevels = in->numMipLevels;1891cin.firstMipIdInTail = out.firstMipIdInTail;18921893if (in->numSamples > 1)1894cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode;1895else1896cin.swizzleMode = in->swizzleMode;18971898ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);1899if (ret != ADDR_OK)1900return ret;19011902surf->cmask_size = cout.cmaskBytes;1903surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign);1904surf->cmask_slice_size = cout.sliceSize;1905surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset;1906surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize;1907}1908}19091910return 0;1911}19121913static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,1914const struct ac_surf_config *config, enum radeon_surf_mode mode,1915struct radeon_surf *surf)1916{1917bool compressed;1918ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};1919int r;19201921AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);19221923compressed = surf->blk_w == 4 && surf->blk_h == 4;19241925/* The format must be set correctly for the allocation of compressed1926* textures to work. In other cases, setting the bpp is sufficient. */1927if (compressed) {1928switch (surf->bpe) {1929case 8:1930AddrSurfInfoIn.format = ADDR_FMT_BC1;1931break;1932case 16:1933AddrSurfInfoIn.format = ADDR_FMT_BC3;1934break;1935default:1936assert(0);1937}1938} else {1939switch (surf->bpe) {1940case 1:1941assert(!(surf->flags & RADEON_SURF_ZBUFFER));1942AddrSurfInfoIn.format = ADDR_FMT_8;1943break;1944case 2:1945assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));1946AddrSurfInfoIn.format = ADDR_FMT_16;1947break;1948case 4:1949assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));1950AddrSurfInfoIn.format = ADDR_FMT_32;1951break;1952case 8:1953assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));1954AddrSurfInfoIn.format = ADDR_FMT_32_32;1955break;1956case 12:1957assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));1958AddrSurfInfoIn.format = ADDR_FMT_32_32_32;1959break;1960case 16:1961assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));1962AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;1963break;1964default:1965assert(0);1966}1967AddrSurfInfoIn.bpp = surf->bpe * 8;1968}19691970bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);1971AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);1972AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;1973AddrSurfInfoIn.flags.display = get_display_flag(config, surf);1974/* flags.texture currently refers to TC-compatible HTILE */1975AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;1976AddrSurfInfoIn.flags.opt4space = 1;1977AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;19781979AddrSurfInfoIn.numMipLevels = config->info.levels;1980AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);1981AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;19821983if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))1984AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);19851986/* GFX9 doesn't support 1D depth textures, so allocate all 1D textures1987* as 2D to avoid having shader variants for 1D vs 2D, so all shaders1988* must sample 1D textures as 2D. */1989if (config->is_3d)1990AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;1991else if (info->chip_class != GFX9 && config->is_1d)1992AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;1993else1994AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;19951996AddrSurfInfoIn.width = config->info.width;1997AddrSurfInfoIn.height = config->info.height;19981999if (config->is_3d)2000AddrSurfInfoIn.numSlices = config->info.depth;2001else if (config->is_cube)2002AddrSurfInfoIn.numSlices = 6;2003else2004AddrSurfInfoIn.numSlices = config->info.array_size;20052006/* This is propagated to DCC. It must be 0 for HTILE and CMASK. */2007AddrSurfInfoIn.flags.metaPipeUnaligned = 0;2008AddrSurfInfoIn.flags.metaRbUnaligned = 0;20092010if (ac_modifier_has_dcc(surf->modifier)) {2011ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn);2012} else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) {2013/* Optimal values for the L2 cache. */2014if (info->chip_class == GFX9) {2015surf->u.gfx9.color.dcc.independent_64B_blocks = 1;2016surf->u.gfx9.color.dcc.independent_128B_blocks = 0;2017surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;2018} else if (info->chip_class >= GFX10) {2019surf->u.gfx9.color.dcc.independent_64B_blocks = 0;2020surf->u.gfx9.color.dcc.independent_128B_blocks = 1;2021surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;2022}20232024if (AddrSurfInfoIn.flags.display) {2025/* The display hardware can only read DCC with RB_ALIGNED=0 and2026* PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.2027*2028* The CB block requires RB_ALIGNED=1 except 1 RB chips.2029* PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes2030* after rendering, so PIPE_ALIGNED=1 is recommended.2031*/2032if (info->use_display_dcc_unaligned) {2033AddrSurfInfoIn.flags.metaPipeUnaligned = 1;2034AddrSurfInfoIn.flags.metaRbUnaligned = 1;2035}20362037/* Adjust DCC settings to meet DCN requirements. */2038if (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) {2039/* Only Navi12/14 support independent 64B blocks in L2,2040* but without DCC image stores.2041*/2042if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {2043surf->u.gfx9.color.dcc.independent_64B_blocks = 1;2044surf->u.gfx9.color.dcc.independent_128B_blocks = 0;2045surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;2046}20472048if (info->chip_class >= GFX10_3) {2049surf->u.gfx9.color.dcc.independent_64B_blocks = 1;2050surf->u.gfx9.color.dcc.independent_128B_blocks = 1;2051surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;2052}2053}2054}2055}20562057if (surf->modifier == DRM_FORMAT_MOD_INVALID) {2058switch (mode) {2059case RADEON_SURF_MODE_LINEAR_ALIGNED:2060assert(config->info.samples <= 1);2061assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));2062AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;2063break;20642065case RADEON_SURF_MODE_1D:2066case RADEON_SURF_MODE_2D:2067if (surf->flags & RADEON_SURF_IMPORTED ||2068(info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {2069AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;2070break;2071}20722073r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,2074&AddrSurfInfoIn.swizzleMode);2075if (r)2076return r;2077break;20782079default:2080assert(0);2081}2082} else {2083/* We have a valid and required modifier here. */20842085assert(!compressed);2086assert(!ac_modifier_has_dcc(surf->modifier) ||2087!(surf->flags & RADEON_SURF_DISABLE_DCC));20882089AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier);2090}20912092surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;2093surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);20942095surf->num_meta_levels = 0;2096surf->surf_size = 0;2097surf->fmask_size = 0;2098surf->meta_size = 0;2099surf->meta_slice_size = 0;2100surf->u.gfx9.surf_offset = 0;2101if (AddrSurfInfoIn.flags.stencil)2102surf->u.gfx9.zs.stencil_offset = 0;2103surf->cmask_size = 0;21042105const bool only_stencil =2106(surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);21072108/* Calculate texture layout information. */2109if (!only_stencil) {2110r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);2111if (r)2112return r;2113}21142115/* Calculate texture layout information for stencil. */2116if (surf->flags & RADEON_SURF_SBUFFER) {2117AddrSurfInfoIn.flags.stencil = 1;2118AddrSurfInfoIn.bpp = 8;2119AddrSurfInfoIn.format = ADDR_FMT_8;21202121if (!AddrSurfInfoIn.flags.depth) {2122r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,2123&AddrSurfInfoIn.swizzleMode);2124if (r)2125return r;2126} else2127AddrSurfInfoIn.flags.depth = 0;21282129r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);2130if (r)2131return r;2132}21332134surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR;21352136/* Query whether the surface is displayable. */2137/* This is only useful for surfaces that are allocated without SCANOUT. */2138BOOL_32 displayable = false;2139if (!config->is_3d && !config->is_cube) {2140r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode,2141surf->bpe * 8, &displayable);2142if (r)2143return r;21442145/* Display needs unaligned DCC. */2146if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&2147surf->num_meta_levels &&2148(!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,2149surf->u.gfx9.color.dcc.pipe_aligned) ||2150/* Don't set is_displayable if displayable DCC is missing. */2151(info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid)))2152displayable = false;2153}2154surf->is_displayable = displayable;21552156/* Validate that we allocated a displayable surface if requested. */2157assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);21582159/* Validate that DCC is set up correctly. */2160if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) {2161assert(is_dcc_supported_by_L2(info, surf));2162if (AddrSurfInfoIn.flags.color)2163assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode));2164if (AddrSurfInfoIn.flags.display) {2165assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,2166surf->u.gfx9.color.dcc.pipe_aligned));2167}2168}21692170if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&2171AddrSurfInfoIn.flags.color && !surf->is_linear &&2172(1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */2173!(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |2174RADEON_SURF_FORCE_MICRO_TILE_MODE)) &&2175(surf->modifier == DRM_FORMAT_MOD_INVALID ||2176ac_modifier_has_dcc(surf->modifier))) {2177/* Validate that DCC is enabled if DCN can do it. */2178if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&2179AddrSurfInfoIn.flags.display && surf->bpe == 4) {2180assert(surf->num_meta_levels);2181}21822183/* Validate that non-scanout DCC is always enabled. */2184if (!AddrSurfInfoIn.flags.display)2185assert(surf->num_meta_levels);2186}21872188if (!surf->meta_size) {2189/* Unset this if HTILE is not present. */2190surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;2191}21922193switch (surf->u.gfx9.swizzle_mode) {2194/* S = standard. */2195case ADDR_SW_256B_S:2196case ADDR_SW_4KB_S:2197case ADDR_SW_64KB_S:2198case ADDR_SW_64KB_S_T:2199case ADDR_SW_4KB_S_X:2200case ADDR_SW_64KB_S_X:2201surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;2202break;22032204/* D = display. */2205case ADDR_SW_LINEAR:2206case ADDR_SW_256B_D:2207case ADDR_SW_4KB_D:2208case ADDR_SW_64KB_D:2209case ADDR_SW_64KB_D_T:2210case ADDR_SW_4KB_D_X:2211case ADDR_SW_64KB_D_X:2212surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;2213break;22142215/* R = rotated (gfx9), render target (gfx10). */2216case ADDR_SW_256B_R:2217case ADDR_SW_4KB_R:2218case ADDR_SW_64KB_R:2219case ADDR_SW_64KB_R_T:2220case ADDR_SW_4KB_R_X:2221case ADDR_SW_64KB_R_X:2222case ADDR_SW_VAR_R_X:2223/* The rotated micro tile mode doesn't work if both CMASK and RB+ are2224* used at the same time. We currently do not use rotated2225* in gfx9.2226*/2227assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported");2228surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;2229break;22302231/* Z = depth. */2232case ADDR_SW_4KB_Z:2233case ADDR_SW_64KB_Z:2234case ADDR_SW_64KB_Z_T:2235case ADDR_SW_4KB_Z_X:2236case ADDR_SW_64KB_Z_X:2237case ADDR_SW_VAR_Z_X:2238surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;2239break;22402241default:2242assert(0);2243}22442245return 0;2246}22472248int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,2249const struct ac_surf_config *config, enum radeon_surf_mode mode,2250struct radeon_surf *surf)2251{2252int r;22532254r = surf_config_sanity(config, surf->flags);2255if (r)2256return r;22572258if (info->family_id >= FAMILY_AI)2259r = gfx9_compute_surface(addrlib, info, config, mode, surf);2260else2261r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);22622263if (r)2264return r;22652266/* Determine the memory layout of multiple allocations in one buffer. */2267surf->total_size = surf->surf_size;2268surf->alignment_log2 = surf->surf_alignment_log2;22692270/* Ensure the offsets are always 0 if not available. */2271surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0;22722273if (surf->fmask_size) {2274assert(config->info.samples >= 2);2275surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2);2276surf->total_size = surf->fmask_offset + surf->fmask_size;2277surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2);2278}22792280/* Single-sample CMASK is in a separate buffer. */2281if (surf->cmask_size && config->info.samples >= 2) {2282surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);2283surf->total_size = surf->cmask_offset + surf->cmask_size;2284surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);2285}22862287if (surf->is_displayable)2288surf->flags |= RADEON_SURF_SCANOUT;22892290if (surf->meta_size &&2291/* dcc_size is computed on GFX9+ only if it's displayable. */2292(info->chip_class >= GFX9 || !get_display_flag(config, surf))) {2293/* It's better when displayable DCC is immediately after2294* the image due to hw-specific reasons.2295*/2296if (info->chip_class >= GFX9 &&2297!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&2298surf->u.gfx9.color.dcc.display_equation_valid) {2299/* Add space for the displayable DCC buffer. */2300surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2);2301surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size;2302}23032304surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2);2305surf->total_size = surf->meta_offset + surf->meta_size;2306surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2);2307}23082309return 0;2310}23112312/* This is meant to be used for disabling DCC. */2313void ac_surface_zero_dcc_fields(struct radeon_surf *surf)2314{2315if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)2316return;23172318surf->meta_offset = 0;2319surf->display_dcc_offset = 0;2320if (!surf->fmask_offset && !surf->cmask_offset) {2321surf->total_size = surf->surf_size;2322surf->alignment_log2 = surf->surf_alignment_log2;2323}2324}23252326static unsigned eg_tile_split(unsigned tile_split)2327{2328switch (tile_split) {2329case 0:2330tile_split = 64;2331break;2332case 1:2333tile_split = 128;2334break;2335case 2:2336tile_split = 256;2337break;2338case 3:2339tile_split = 512;2340break;2341default:2342case 4:2343tile_split = 1024;2344break;2345case 5:2346tile_split = 2048;2347break;2348case 6:2349tile_split = 4096;2350break;2351}2352return tile_split;2353}23542355static unsigned eg_tile_split_rev(unsigned eg_tile_split)2356{2357switch (eg_tile_split) {2358case 64:2359return 0;2360case 128:2361return 1;2362case 256:2363return 2;2364case 512:2365return 3;2366default:2367case 1024:2368return 4;2369case 2048:2370return 5;2371case 4096:2372return 6;2373}2374}23752376#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 452377#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x323782379/* This should be called before ac_compute_surface. */2380void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,2381uint64_t tiling_flags, enum radeon_surf_mode *mode)2382{2383bool scanout;23842385if (info->chip_class >= GFX9) {2386surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);2387surf->u.gfx9.color.dcc.independent_64B_blocks =2388AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);2389surf->u.gfx9.color.dcc.independent_128B_blocks =2390AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);2391surf->u.gfx9.color.dcc.max_compressed_block_size =2392AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);2393surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);2394scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);2395*mode =2396surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;2397} else {2398surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);2399surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);2400surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);2401surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));2402surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);2403surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);2404scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */24052406if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */2407*mode = RADEON_SURF_MODE_2D;2408else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */2409*mode = RADEON_SURF_MODE_1D;2410else2411*mode = RADEON_SURF_MODE_LINEAR_ALIGNED;2412}24132414if (scanout)2415surf->flags |= RADEON_SURF_SCANOUT;2416else2417surf->flags &= ~RADEON_SURF_SCANOUT;2418}24192420void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,2421uint64_t *tiling_flags)2422{2423*tiling_flags = 0;24242425if (info->chip_class >= GFX9) {2426uint64_t dcc_offset = 0;24272428if (surf->meta_offset) {2429dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset;2430assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));2431}24322433*tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);2434*tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);2435*tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max);2436*tiling_flags |=2437AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks);2438*tiling_flags |=2439AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks);2440*tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,2441surf->u.gfx9.color.dcc.max_compressed_block_size);2442*tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);2443} else {2444if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)2445*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */2446else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)2447*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */2448else2449*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */24502451*tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);2452*tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));2453*tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));2454if (surf->u.legacy.tile_split)2455*tiling_flags |=2456AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));2457*tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));2458*tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);24592460if (surf->flags & RADEON_SURF_SCANOUT)2461*tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */2462else2463*tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */2464}2465}24662467static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)2468{2469return (ATI_VENDOR_ID << 16) | info->pci_id;2470}24712472/* This should be called after ac_compute_surface. */2473bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,2474unsigned num_storage_samples, unsigned num_mipmap_levels,2475unsigned size_metadata, const uint32_t metadata[64])2476{2477const uint32_t *desc = &metadata[2];2478uint64_t offset;24792480if (surf->modifier != DRM_FORMAT_MOD_INVALID)2481return true;24822483if (info->chip_class >= GFX9)2484offset = surf->u.gfx9.surf_offset;2485else2486offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;24872488if (offset || /* Non-zero planes ignore metadata. */2489size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */2490metadata[0] == 0 || /* invalid version number */2491metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {2492/* Disable DCC because it might not be enabled. */2493ac_surface_zero_dcc_fields(surf);24942495/* Don't report an error if the texture comes from an incompatible driver,2496* but this might not work.2497*/2498return true;2499}25002501/* Validate that sample counts and the number of mipmap levels match. */2502unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]);2503unsigned type = G_008F1C_TYPE(desc[3]);25042505if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {2506unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));25072508if (desc_last_level != log_samples) {2509fprintf(stderr,2510"amdgpu: invalid MSAA texture import, "2511"metadata has log2(samples) = %u, the caller set %u\n",2512desc_last_level, log_samples);2513return false;2514}2515} else {2516if (desc_last_level != num_mipmap_levels - 1) {2517fprintf(stderr,2518"amdgpu: invalid mipmapped texture import, "2519"metadata has last_level = %u, the caller set %u\n",2520desc_last_level, num_mipmap_levels - 1);2521return false;2522}2523}25242525if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {2526/* Read DCC information. */2527switch (info->chip_class) {2528case GFX8:2529surf->meta_offset = (uint64_t)desc[7] << 8;2530break;25312532case GFX9:2533surf->meta_offset =2534((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);2535surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);2536surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);25372538/* If DCC is unaligned, this can only be a displayable image. */2539if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned)2540assert(surf->is_displayable);2541break;25422543case GFX10:2544case GFX10_3:2545surf->meta_offset =2546((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);2547surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);2548break;25492550default:2551assert(0);2552return false;2553}2554} else {2555/* Disable DCC. dcc_offset is always set by texture_from_handle2556* and must be cleared here.2557*/2558ac_surface_zero_dcc_fields(surf);2559}25602561return true;2562}25632564void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,2565unsigned num_mipmap_levels, uint32_t desc[8],2566unsigned *size_metadata, uint32_t metadata[64])2567{2568/* Clear the base address and set the relative DCC offset. */2569desc[0] = 0;2570desc[1] &= C_008F14_BASE_ADDRESS_HI;25712572switch (info->chip_class) {2573case GFX6:2574case GFX7:2575break;2576case GFX8:2577desc[7] = surf->meta_offset >> 8;2578break;2579case GFX9:2580desc[7] = surf->meta_offset >> 8;2581desc[5] &= C_008F24_META_DATA_ADDRESS;2582desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40);2583break;2584case GFX10:2585case GFX10_3:2586desc[6] &= C_00A018_META_DATA_ADDRESS_LO;2587desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8);2588desc[7] = surf->meta_offset >> 16;2589break;2590default:2591assert(0);2592}25932594/* Metadata image format format version 1:2595* [0] = 1 (metadata format identifier)2596* [1] = (VENDOR_ID << 16) | PCI_ID2597* [2:9] = image descriptor for the whole resource2598* [2] is always 0, because the base address is cleared2599* [9] is the DCC offset bits [39:8] from the beginning of2600* the buffer2601* [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level2602*/26032604metadata[0] = 1; /* metadata image format version 1 */26052606/* Tiling modes are ambiguous without a PCI ID. */2607metadata[1] = ac_get_umd_metadata_word1(info);26082609/* Dwords [2:9] contain the image descriptor. */2610memcpy(&metadata[2], desc, 8 * 4);2611*size_metadata = 10 * 4;26122613/* Dwords [10:..] contain the mipmap level offsets. */2614if (info->chip_class <= GFX8) {2615for (unsigned i = 0; i < num_mipmap_levels; i++)2616metadata[10 + i] = surf->u.legacy.level[i].offset_256B;26172618*size_metadata += num_mipmap_levels * 4;2619}2620}26212622static uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf)2623{2624if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR)2625return 256 / surf->bpe;26262627if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)2628return 1; /* TODO */26292630unsigned bpe_shift = util_logbase2(surf->bpe) / 2;2631switch(surf->u.gfx9.swizzle_mode & ~3) {2632case ADDR_SW_LINEAR: /* 256B block. */2633return 16 >> bpe_shift;2634case ADDR_SW_4KB_Z:2635case ADDR_SW_4KB_Z_X:2636return 64 >> bpe_shift;2637case ADDR_SW_64KB_Z:2638case ADDR_SW_64KB_Z_T:2639case ADDR_SW_64KB_Z_X:2640return 256 >> bpe_shift;2641case ADDR_SW_VAR_Z_X:2642default:2643return 1; /* TODO */2644}2645}26462647bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,2648unsigned num_mipmap_levels, uint64_t offset, unsigned pitch)2649{2650/*2651* GFX10 and newer don't support custom strides. Furthermore, for2652* multiple miplevels or compression data we'd really need to rerun2653* addrlib to update all the fields in the surface. That, however, is a2654* software limitation and could be relaxed later.2655*/2656bool require_equal_pitch = surf->surf_size != surf->total_size ||2657num_mipmap_levels != 1 ||2658info->chip_class >= GFX10;26592660if (info->chip_class >= GFX9) {2661if (pitch) {2662if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)2663return false;26642665if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch)2666return false;26672668if (pitch != surf->u.gfx9.surf_pitch) {2669unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size;26702671surf->u.gfx9.surf_pitch = pitch;2672surf->u.gfx9.epitch = pitch - 1;2673surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;2674surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices;2675}2676}2677surf->u.gfx9.surf_offset = offset;2678if (surf->u.gfx9.zs.stencil_offset)2679surf->u.gfx9.zs.stencil_offset += offset;2680} else {2681if (pitch) {2682if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch)2683return false;26842685surf->u.legacy.level[0].nblk_x = pitch;2686surf->u.legacy.level[0].slice_size_dw =2687((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;2688}26892690if (offset) {2691for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)2692surf->u.legacy.level[i].offset_256B += offset / 256;2693}2694}26952696if (offset & ((1 << surf->alignment_log2) - 1) ||2697offset >= UINT64_MAX - surf->total_size)2698return false;26992700if (surf->meta_offset)2701surf->meta_offset += offset;2702if (surf->fmask_offset)2703surf->fmask_offset += offset;2704if (surf->cmask_offset)2705surf->cmask_offset += offset;2706if (surf->display_dcc_offset)2707surf->display_dcc_offset += offset;2708return true;2709}27102711unsigned ac_surface_get_nplanes(const struct radeon_surf *surf)2712{2713if (surf->modifier == DRM_FORMAT_MOD_INVALID)2714return 1;2715else if (surf->display_dcc_offset)2716return 3;2717else if (surf->meta_offset)2718return 2;2719else2720return 1;2721}27222723uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,2724const struct radeon_surf *surf,2725unsigned plane, unsigned layer)2726{2727switch (plane) {2728case 0:2729if (chip_class >= GFX9) {2730return surf->u.gfx9.surf_offset +2731layer * surf->u.gfx9.surf_slice_size;2732} else {2733return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 +2734layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4;2735}2736case 1:2737assert(!layer);2738return surf->display_dcc_offset ?2739surf->display_dcc_offset : surf->meta_offset;2740case 2:2741assert(!layer);2742return surf->meta_offset;2743default:2744unreachable("Invalid plane index");2745}2746}27472748uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,2749const struct radeon_surf *surf,2750unsigned plane)2751{2752switch (plane) {2753case 0:2754if (chip_class >= GFX9) {2755return surf->u.gfx9.surf_pitch * surf->bpe;2756} else {2757return surf->u.legacy.level[0].nblk_x * surf->bpe;2758}2759case 1:2760return 1 + (surf->display_dcc_offset ?2761surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max);2762case 2:2763return surf->u.gfx9.color.dcc_pitch_max + 1;2764default:2765unreachable("Invalid plane index");2766}2767}27682769uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,2770unsigned plane)2771{2772switch (plane) {2773case 0:2774return surf->surf_size;2775case 1:2776return surf->display_dcc_offset ?2777surf->u.gfx9.color.display_dcc_size : surf->meta_size;2778case 2:2779return surf->meta_size;2780default:2781unreachable("Invalid plane index");2782}2783}27842785void ac_surface_print_info(FILE *out, const struct radeon_info *info,2786const struct radeon_surf *surf)2787{2788if (info->chip_class >= GFX9) {2789fprintf(out,2790" Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "2791"alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, "2792"blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n",2793surf->surf_size, surf->u.gfx9.surf_slice_size,27941 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode,2795surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch,2796surf->blk_w, surf->blk_h, surf->bpe, surf->flags);27972798if (surf->fmask_offset)2799fprintf(out,2800" FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "2801"alignment=%u, swmode=%u, epitch=%u\n",2802surf->fmask_offset, surf->fmask_size,28031 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode,2804surf->u.gfx9.color.fmask_epitch);28052806if (surf->cmask_offset)2807fprintf(out,2808" CMask: offset=%" PRIu64 ", size=%u, "2809"alignment=%u\n",2810surf->cmask_offset, surf->cmask_size,28111 << surf->cmask_alignment_log2);28122813if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)2814fprintf(out,2815" HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",2816surf->meta_offset, surf->meta_size,28171 << surf->meta_alignment_log2);28182819if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)2820fprintf(out,2821" DCC: offset=%" PRIu64 ", size=%u, "2822"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",2823surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2,2824surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels);28252826if (surf->has_stencil)2827fprintf(out,2828" Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n",2829surf->u.gfx9.zs.stencil_offset,2830surf->u.gfx9.zs.stencil_swizzle_mode,2831surf->u.gfx9.zs.stencil_epitch);2832} else {2833fprintf(out,2834" Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, "2835"bpe=%u, flags=0x%"PRIx64"\n",2836surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w,2837surf->blk_h, surf->bpe, surf->flags);28382839fprintf(out,2840" Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, "2841"nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",2842surf->surf_size, 1 << surf->surf_alignment_log2,2843surf->u.legacy.bankw, surf->u.legacy.bankh,2844surf->u.legacy.num_banks, surf->u.legacy.mtilea,2845surf->u.legacy.tile_split, surf->u.legacy.pipe_config,2846(surf->flags & RADEON_SURF_SCANOUT) != 0);28472848if (surf->fmask_offset)2849fprintf(out,2850" FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "2851"alignment=%u, pitch_in_pixels=%u, bankh=%u, "2852"slice_tile_max=%u, tile_mode_index=%u\n",2853surf->fmask_offset, surf->fmask_size,28541 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels,2855surf->u.legacy.color.fmask.bankh,2856surf->u.legacy.color.fmask.slice_tile_max,2857surf->u.legacy.color.fmask.tiling_index);28582859if (surf->cmask_offset)2860fprintf(out,2861" CMask: offset=%" PRIu64 ", size=%u, alignment=%u, "2862"slice_tile_max=%u\n",2863surf->cmask_offset, surf->cmask_size,28641 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max);28652866if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)2867fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",2868surf->meta_offset, surf->meta_size,28691 << surf->meta_alignment_log2);28702871if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)2872fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n",2873surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2);28742875if (surf->has_stencil)2876fprintf(out, " StencilLayout: tilesplit=%u\n",2877surf->u.legacy.stencil_tile_split);2878}2879}28802881static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,2882struct gfx9_meta_equation *equation,2883int blkSizeBias, unsigned blkStart,2884nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,2885nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,2886nir_ssa_def *pipe_xor)2887{2888nir_ssa_def *zero = nir_imm_int(b, 0);2889nir_ssa_def *one = nir_imm_int(b, 1);28902891assert(info->chip_class >= GFX10);28922893unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);2894unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);2895unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;28962897nir_ssa_def *coord[] = {x, y, z, 0};2898nir_ssa_def *address = zero;28992900for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {2901nir_ssa_def *v = zero;29022903for (unsigned c = 0; c < 4; c++) {2904unsigned index = i * 4 + c - (blkStart * 4);2905if (equation->u.gfx10_bits[index]) {2906unsigned mask = equation->u.gfx10_bits[index];2907nir_ssa_def *bits = coord[c];29082909while (mask)2910v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));2911}2912}29132914address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i)));2915}29162917unsigned blkMask = (1 << blkSizeLog2) - 1;2918unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;2919unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);2920nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);2921nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);2922nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);2923nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);2924nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask),2925nir_imm_int(b, m_pipeInterleaveLog2)), blkMask);29262927return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z),2928nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))),2929nir_ixor(b, nir_ushr(b, address, one), pipeXor));2930}29312932nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,2933unsigned bpe, struct gfx9_meta_equation *equation,2934nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,2935nir_ssa_def *dcc_slice_size,2936nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,2937nir_ssa_def *sample, nir_ssa_def *pipe_xor)2938{2939nir_ssa_def *zero = nir_imm_int(b, 0);2940nir_ssa_def *one = nir_imm_int(b, 1);29412942if (info->chip_class >= GFX10) {2943unsigned bpp_log2 = util_logbase2(bpe);29442945return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,2946dcc_pitch, dcc_slice_size,2947x, y, z, pipe_xor);2948} else {2949assert(info->chip_class == GFX9);29502951unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);2952unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);2953unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth);29542955unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);2956unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;2957nir_ssa_def *pitchInBlock = nir_ushr_imm(b, dcc_pitch, meta_block_width_log2);2958nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, dcc_height, meta_block_height_log2),2959pitchInBlock);29602961nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);2962nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);2963nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);29642965nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),2966nir_imul(b, yb, pitchInBlock)), xb);2967nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};29682969nir_ssa_def *address = zero;2970unsigned num_bits = equation->u.gfx9.num_bits;2971assert(num_bits <= 32);29722973/* Compute the address up until the last bit that doesn't use the block index. */2974for (unsigned i = 0; i < num_bits - 1; i++) {2975nir_ssa_def *xor = zero;29762977for (unsigned c = 0; c < 5; c++) {2978if (equation->u.gfx9.bit[i].coord[c].dim >= 5)2979continue;29802981assert(equation->u.gfx9.bit[i].coord[c].ord < 32);2982nir_ssa_def *ison =2983nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],2984equation->u.gfx9.bit[i].coord[c].ord), one);29852986xor = nir_ixor(b, xor, ison);2987}2988address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i)));2989}29902991/* Fill the remaining bits with the block index. */2992unsigned last = num_bits - 1;2993address = nir_ior(b, address,2994nir_ishl(b, nir_ushr_imm(b, blockIndex,2995equation->u.gfx9.bit[last].coord[0].ord),2996nir_imm_int(b, last)));29972998nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);2999return nir_ixor(b, nir_ushr(b, address, one),3000nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2)));3001}3002}30033004nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,3005struct gfx9_meta_equation *equation,3006nir_ssa_def *htile_pitch,3007nir_ssa_def *htile_slice_size,3008nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,3009nir_ssa_def *pipe_xor)3010{3011return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,3012htile_pitch, htile_slice_size,3013x, y, z, pipe_xor);3014}301530163017