Path: blob/21.2-virgl/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
7086 views
/*1* Copyright © 2007-2019 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining5* a copy of this software and associated documentation files (the6* "Software"), to deal in the Software without restriction, including7* without limitation the rights to use, copy, modify, merge, publish,8* distribute, sub license, and/or sell copies of the Software, and to9* permit persons to whom the Software is furnished to do so, subject to10* the following conditions:11*12* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,13* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES14* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND15* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS16* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE19* USE OR OTHER DEALINGS IN THE SOFTWARE.20*21* The above copyright notice and this permission notice (including the22* next paragraph) shall be included in all copies or substantial portions23* of the Software.24*/2526/**27************************************************************************************************************************28* @file gfx10addrlib.cpp29* @brief Contain the implementation for the Gfx10Lib class.30************************************************************************************************************************31*/3233#include "gfx10addrlib.h"34#include "gfx10_gb_reg.h"3536#include "amdgpu_asic_addr.h"3738////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////39////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////4041namespace Addr42{43/**44************************************************************************************************************************45* Gfx10HwlInit46*47* @brief48* Creates an Gfx10Lib object.49*50* @return51* Returns an Gfx10Lib object pointer.52************************************************************************************************************************53*/54Addr::Lib* Gfx10HwlInit(const Client* pClient)55{56return V2::Gfx10Lib::CreateObj(pClient);57}5859namespace V260{6162////////////////////////////////////////////////////////////////////////////////////////////////////63// Static Const Member64////////////////////////////////////////////////////////////////////////////////////////////////////6566const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =67{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved68{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR69{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S70{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D71{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved7273{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved74{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S75{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D76{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved7778{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved79{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S80{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D81{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved8283{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved84{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved85{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved86{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved8788{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved89{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T90{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T91{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved9293{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved94{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X95{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X96{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved9798{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X99{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X100{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X101{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X102103{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X104{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved105{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved106{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X107{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL108};109110const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};111112const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};113const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};114115/**116************************************************************************************************************************117* Gfx10Lib::Gfx10Lib118*119* @brief120* Constructor121*122************************************************************************************************************************123*/124Gfx10Lib::Gfx10Lib(const Client* pClient)125:126Lib(pClient),127m_numPkrLog2(0),128m_numSaLog2(0),129m_colorBaseIndex(0),130m_xmaskBaseIndex(0),131m_dccBaseIndex(0)132{133memset(&m_settings, 0, sizeof(m_settings));134memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));135}136137/**138************************************************************************************************************************139* Gfx10Lib::~Gfx10Lib140*141* @brief142* Destructor143************************************************************************************************************************144*/145Gfx10Lib::~Gfx10Lib()146{147}148149/**150************************************************************************************************************************151* Gfx10Lib::HwlComputeHtileInfo152*153* @brief154* Interface function stub of AddrComputeHtilenfo155*156* @return157* ADDR_E_RETURNCODE158************************************************************************************************************************159*/160ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(161const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure162ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure163) const164{165ADDR_E_RETURNCODE ret = ADDR_OK;166167if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&168((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||169(pIn->hTileFlags.pipeAligned != TRUE))170{171ret = ADDR_INVALIDPARAMS;172}173else174{175Dim3d metaBlk = {};176const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,177ADDR_RSRC_TEX_2D,178pIn->swizzleMode,1790,1800,181TRUE,182&metaBlk);183184pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);185pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);186pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));187pOut->metaBlkWidth = metaBlk.w;188pOut->metaBlkHeight = metaBlk.h;189190if (pIn->numMipLevels > 1)191{192ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);193194UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;195196for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)197{198UINT_32 mipWidth, mipHeight;199200GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);201202mipWidth = PowTwoAlign(mipWidth, metaBlk.w);203mipHeight = PowTwoAlign(mipHeight, metaBlk.h);204205const UINT_32 pitchInM = mipWidth / metaBlk.w;206const UINT_32 heightInM = mipHeight / metaBlk.h;207const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;208209if (pOut->pMipInfo != NULL)210{211pOut->pMipInfo[i].inMiptail = FALSE;212pOut->pMipInfo[i].offset = offset;213pOut->pMipInfo[i].sliceSize = mipSliceSize;214}215216offset += mipSliceSize;217}218219pOut->sliceSize = offset;220pOut->metaBlkNumPerSlice = offset / metaBlkSize;221pOut->htileBytes = pOut->sliceSize * pIn->numSlices;222223if (pOut->pMipInfo != NULL)224{225for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)226{227pOut->pMipInfo[i].inMiptail = TRUE;228pOut->pMipInfo[i].offset = 0;229pOut->pMipInfo[i].sliceSize = 0;230}231232if (pIn->firstMipIdInTail != pIn->numMipLevels)233{234pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;235}236}237}238else239{240const UINT_32 pitchInM = pOut->pitch / metaBlk.w;241const UINT_32 heightInM = pOut->height / metaBlk.h;242243pOut->metaBlkNumPerSlice = pitchInM * heightInM;244pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;245pOut->htileBytes = pOut->sliceSize * pIn->numSlices;246247if (pOut->pMipInfo != NULL)248{249pOut->pMipInfo[0].inMiptail = FALSE;250pOut->pMipInfo[0].offset = 0;251pOut->pMipInfo[0].sliceSize = pOut->sliceSize;252}253}254255// Get the HTILE address equation (copied from HtileAddrFromCoord).256// HTILE addressing depends on the number of samples, but this code doesn't support it yet.257const UINT_32 index = m_xmaskBaseIndex;258const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;259260ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);261pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];262}263264return ret;265}266267/**268************************************************************************************************************************269* Gfx10Lib::HwlComputeCmaskInfo270*271* @brief272* Interface function stub of AddrComputeCmaskInfo273*274* @return275* ADDR_E_RETURNCODE276************************************************************************************************************************277*/278ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(279const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure280ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure281) const282{283ADDR_E_RETURNCODE ret = ADDR_OK;284285if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||286(pIn->cMaskFlags.pipeAligned != TRUE) ||287((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&288((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))289{290ret = ADDR_INVALIDPARAMS;291}292else293{294Dim3d metaBlk = {};295const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,296ADDR_RSRC_TEX_2D,297pIn->swizzleMode,2980,2990,300TRUE,301&metaBlk);302303pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);304pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);305pOut->baseAlign = metaBlkSize;306pOut->metaBlkWidth = metaBlk.w;307pOut->metaBlkHeight = metaBlk.h;308309if (pIn->numMipLevels > 1)310{311ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);312313UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;314315for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)316{317UINT_32 mipWidth, mipHeight;318319GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);320321mipWidth = PowTwoAlign(mipWidth, metaBlk.w);322mipHeight = PowTwoAlign(mipHeight, metaBlk.h);323324const UINT_32 pitchInM = mipWidth / metaBlk.w;325const UINT_32 heightInM = mipHeight / metaBlk.h;326327if (pOut->pMipInfo != NULL)328{329pOut->pMipInfo[i].inMiptail = FALSE;330pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;331pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;332}333334metaBlkPerSlice += pitchInM * heightInM;335}336337pOut->metaBlkNumPerSlice = metaBlkPerSlice;338339if (pOut->pMipInfo != NULL)340{341for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)342{343pOut->pMipInfo[i].inMiptail = TRUE;344pOut->pMipInfo[i].offset = 0;345pOut->pMipInfo[i].sliceSize = 0;346}347348if (pIn->firstMipIdInTail != pIn->numMipLevels)349{350pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;351}352}353}354else355{356const UINT_32 pitchInM = pOut->pitch / metaBlk.w;357const UINT_32 heightInM = pOut->height / metaBlk.h;358359pOut->metaBlkNumPerSlice = pitchInM * heightInM;360361if (pOut->pMipInfo != NULL)362{363pOut->pMipInfo[0].inMiptail = FALSE;364pOut->pMipInfo[0].offset = 0;365pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;366}367}368369pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;370pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;371}372373return ret;374}375376/**377************************************************************************************************************************378* Gfx10Lib::HwlComputeDccInfo379*380* @brief381* Interface function to compute DCC key info382*383* @return384* ADDR_E_RETURNCODE385************************************************************************************************************************386*/387ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(388const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure389ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure390) const391{392ADDR_E_RETURNCODE ret = ADDR_OK;393394if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))395{396// Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only397// select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.398ret = ADDR_INVALIDPARAMS;399}400else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))401{402// DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1403ret = ADDR_INVALIDPARAMS;404}405else406{407const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);408409{410// only SW_*_R_X surfaces may be DCC compressed when attached to the CB411ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));412413const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);414415pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;416pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;417pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;418}419420if (ret == ADDR_OK)421{422Dim3d metaBlk = {};423const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));424const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,425pIn->resourceType,426pIn->swizzleMode,427elemLog2,428numFragLog2,429pIn->dccKeyFlags.pipeAligned,430&metaBlk);431432pOut->dccRamBaseAlign = metaBlkSize;433pOut->metaBlkWidth = metaBlk.w;434pOut->metaBlkHeight = metaBlk.h;435pOut->metaBlkDepth = metaBlk.d;436pOut->metaBlkSize = metaBlkSize;437438pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);439pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);440pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);441442if (pIn->numMipLevels > 1)443{444ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);445446UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;447448for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)449{450UINT_32 mipWidth, mipHeight;451452GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);453454mipWidth = PowTwoAlign(mipWidth, metaBlk.w);455mipHeight = PowTwoAlign(mipHeight, metaBlk.h);456457const UINT_32 pitchInM = mipWidth / metaBlk.w;458const UINT_32 heightInM = mipHeight / metaBlk.h;459const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;460461if (pOut->pMipInfo != NULL)462{463pOut->pMipInfo[i].inMiptail = FALSE;464pOut->pMipInfo[i].offset = offset;465pOut->pMipInfo[i].sliceSize = mipSliceSize;466}467468offset += mipSliceSize;469}470471pOut->dccRamSliceSize = offset;472pOut->metaBlkNumPerSlice = offset / metaBlkSize;473pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);474475if (pOut->pMipInfo != NULL)476{477for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)478{479pOut->pMipInfo[i].inMiptail = TRUE;480pOut->pMipInfo[i].offset = 0;481pOut->pMipInfo[i].sliceSize = 0;482}483484if (pIn->firstMipIdInTail != pIn->numMipLevels)485{486pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;487}488}489}490else491{492const UINT_32 pitchInM = pOut->pitch / metaBlk.w;493const UINT_32 heightInM = pOut->height / metaBlk.h;494495pOut->metaBlkNumPerSlice = pitchInM * heightInM;496pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;497pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);498499if (pOut->pMipInfo != NULL)500{501pOut->pMipInfo[0].inMiptail = FALSE;502pOut->pMipInfo[0].offset = 0;503pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;504}505}506507// Get the DCC address equation (copied from DccAddrFromCoord)508const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);509const UINT_32 numPipeLog2 = m_pipesLog2;510UINT_32 index = m_dccBaseIndex + elemLog2;511const UINT_8* patIdxTable;512513if (m_settings.supportRbPlus)514{515patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;516517if (pIn->dccKeyFlags.pipeAligned)518{519index += MaxNumOfBpp;520521if (m_numPkrLog2 < 2)522{523index += m_pipesLog2 * MaxNumOfBpp;524}525else526{527// 4 groups for "m_numPkrLog2 < 2" case528index += 4 * MaxNumOfBpp;529530const UINT_32 dccPipePerPkr = 3;531532index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +533(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;534}535}536}537else538{539patIdxTable = GFX10_DCC_64K_R_X_PATIDX;540541if (pIn->dccKeyFlags.pipeAligned)542{543index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;544}545else546{547index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;548}549}550551ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);552pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];553}554}555556return ret;557}558559/**560************************************************************************************************************************561* Gfx10Lib::HwlComputeCmaskAddrFromCoord562*563* @brief564* Interface function stub of AddrComputeCmaskAddrFromCoord565*566* @return567* ADDR_E_RETURNCODE568************************************************************************************************************************569*/570ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(571const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure572ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure573{574// Only support pipe aligned CMask575ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);576577ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};578input.size = sizeof(input);579input.cMaskFlags = pIn->cMaskFlags;580input.colorFlags = pIn->colorFlags;581input.unalignedWidth = Max(pIn->unalignedWidth, 1u);582input.unalignedHeight = Max(pIn->unalignedHeight, 1u);583input.numSlices = Max(pIn->numSlices, 1u);584input.swizzleMode = pIn->swizzleMode;585input.resourceType = pIn->resourceType;586587ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};588output.size = sizeof(output);589590ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);591592if (returnCode == ADDR_OK)593{594const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);595const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);596const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;597const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;598const UINT_8* patIdxTable =599(pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :600(m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);601602603const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;604const UINT_32 blkMask = (1 << blkSizeLog2) - 1;605const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],606blkSizeLog2 + 1, // +1 for nibble offset607pIn->x,608pIn->y,609pIn->slice,6100);611const UINT_32 xb = pIn->x / output.metaBlkWidth;612const UINT_32 yb = pIn->y / output.metaBlkHeight;613const UINT_32 pb = output.pitch / output.metaBlkWidth;614const UINT_32 blkIndex = (yb * pb) + xb;615const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;616617pOut->addr = (output.sliceSize * pIn->slice) +618(blkIndex * (1 << blkSizeLog2)) +619((blkOffset >> 1) ^ pipeXor);620pOut->bitPosition = (blkOffset & 1) << 2;621}622623return returnCode;624}625626/**627************************************************************************************************************************628* Gfx10Lib::HwlComputeHtileAddrFromCoord629*630* @brief631* Interface function stub of AddrComputeHtileAddrFromCoord632*633* @return634* ADDR_E_RETURNCODE635************************************************************************************************************************636*/637ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(638const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure639ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure640{641ADDR_E_RETURNCODE returnCode = ADDR_OK;642643if (pIn->numMipLevels > 1)644{645returnCode = ADDR_NOTIMPLEMENTED;646}647else648{649ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};650input.size = sizeof(input);651input.hTileFlags = pIn->hTileFlags;652input.depthFlags = pIn->depthflags;653input.swizzleMode = pIn->swizzleMode;654input.unalignedWidth = Max(pIn->unalignedWidth, 1u);655input.unalignedHeight = Max(pIn->unalignedHeight, 1u);656input.numSlices = Max(pIn->numSlices, 1u);657input.numMipLevels = 1;658659ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};660output.size = sizeof(output);661662returnCode = ComputeHtileInfo(&input, &output);663664if (returnCode == ADDR_OK)665{666const UINT_32 numSampleLog2 = Log2(pIn->numSamples);667const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;668const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;669const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;670671672const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;673const UINT_32 blkMask = (1 << blkSizeLog2) - 1;674const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],675blkSizeLog2 + 1, // +1 for nibble offset676pIn->x,677pIn->y,678pIn->slice,6790);680const UINT_32 xb = pIn->x / output.metaBlkWidth;681const UINT_32 yb = pIn->y / output.metaBlkHeight;682const UINT_32 pb = output.pitch / output.metaBlkWidth;683const UINT_32 blkIndex = (yb * pb) + xb;684const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;685686pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +687(blkIndex * (1 << blkSizeLog2)) +688((blkOffset >> 1) ^ pipeXor);689}690}691692return returnCode;693}694695/**696************************************************************************************************************************697* Gfx10Lib::HwlComputeHtileCoordFromAddr698*699* @brief700* Interface function stub of AddrComputeHtileCoordFromAddr701*702* @return703* ADDR_E_RETURNCODE704************************************************************************************************************************705*/706ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(707const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure708ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure709{710ADDR_NOT_IMPLEMENTED();711712return ADDR_OK;713}714715/**716************************************************************************************************************************717* Gfx10Lib::HwlSupportComputeDccAddrFromCoord718*719* @brief720* Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter721*722* @return723* ADDR_E_RETURNCODE724************************************************************************************************************************725*/726ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(727const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)728{729ADDR_E_RETURNCODE returnCode = ADDR_OK;730731if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||732(pIn->swizzleMode != ADDR_SW_64KB_R_X) ||733(pIn->dccKeyFlags.linear == TRUE) ||734(pIn->numFrags > 1) ||735(pIn->numMipLevels > 1) ||736(pIn->mipId > 0))737{738returnCode = ADDR_NOTSUPPORTED;739}740else if ((pIn->pitch == 0) ||741(pIn->metaBlkWidth == 0) ||742(pIn->metaBlkHeight == 0) ||743(pIn->slice > 0 && pIn->dccRamSliceSize == 0))744{745returnCode = ADDR_NOTSUPPORTED;746}747748return returnCode;749}750751/**752************************************************************************************************************************753* Gfx10Lib::HwlComputeDccAddrFromCoord754*755* @brief756* Interface function stub of AddrComputeDccAddrFromCoord757*758* @return759* N/A760************************************************************************************************************************761*/762VOID Gfx10Lib::HwlComputeDccAddrFromCoord(763const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure764ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure765{766const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);767const UINT_32 numPipeLog2 = m_pipesLog2;768const UINT_32 pipeMask = (1 << numPipeLog2) - 1;769UINT_32 index = m_dccBaseIndex + elemLog2;770const UINT_8* patIdxTable;771772if (m_settings.supportRbPlus)773{774patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;775776if (pIn->dccKeyFlags.pipeAligned)777{778index += MaxNumOfBpp;779780if (m_numPkrLog2 < 2)781{782index += m_pipesLog2 * MaxNumOfBpp;783}784else785{786// 4 groups for "m_numPkrLog2 < 2" case787index += 4 * MaxNumOfBpp;788789const UINT_32 dccPipePerPkr = 3;790791index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +792(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;793}794}795}796else797{798patIdxTable = GFX10_DCC_64K_R_X_PATIDX;799800if (pIn->dccKeyFlags.pipeAligned)801{802index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;803}804else805{806index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;807}808}809810const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;811const UINT_32 blkMask = (1 << blkSizeLog2) - 1;812const UINT_32 blkOffset =813ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],814blkSizeLog2 + 1, // +1 for nibble offset815pIn->x,816pIn->y,817pIn->slice,8180);819const UINT_32 xb = pIn->x / pIn->metaBlkWidth;820const UINT_32 yb = pIn->y / pIn->metaBlkHeight;821const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;822const UINT_32 blkIndex = (yb * pb) + xb;823const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;824825pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +826(blkIndex * (1 << blkSizeLog2)) +827((blkOffset >> 1) ^ pipeXor);828}829830/**831************************************************************************************************************************832* Gfx10Lib::HwlInitGlobalParams833*834* @brief835* Initializes global parameters836*837* @return838* TRUE if all settings are valid839*840************************************************************************************************************************841*/842BOOL_32 Gfx10Lib::HwlInitGlobalParams(843const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input844{845BOOL_32 valid = TRUE;846GB_ADDR_CONFIG_GFX10 gbAddrConfig;847848gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;849850// These values are copied from CModel code851switch (gbAddrConfig.bits.NUM_PIPES)852{853case ADDR_CONFIG_1_PIPE:854m_pipes = 1;855m_pipesLog2 = 0;856break;857case ADDR_CONFIG_2_PIPE:858m_pipes = 2;859m_pipesLog2 = 1;860break;861case ADDR_CONFIG_4_PIPE:862m_pipes = 4;863m_pipesLog2 = 2;864break;865case ADDR_CONFIG_8_PIPE:866m_pipes = 8;867m_pipesLog2 = 3;868break;869case ADDR_CONFIG_16_PIPE:870m_pipes = 16;871m_pipesLog2 = 4;872break;873case ADDR_CONFIG_32_PIPE:874m_pipes = 32;875m_pipesLog2 = 5;876break;877case ADDR_CONFIG_64_PIPE:878m_pipes = 64;879m_pipesLog2 = 6;880break;881default:882ADDR_ASSERT_ALWAYS();883valid = FALSE;884break;885}886887switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)888{889case ADDR_CONFIG_PIPE_INTERLEAVE_256B:890m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;891m_pipeInterleaveLog2 = 8;892break;893case ADDR_CONFIG_PIPE_INTERLEAVE_512B:894m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;895m_pipeInterleaveLog2 = 9;896break;897case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:898m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;899m_pipeInterleaveLog2 = 10;900break;901case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:902m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;903m_pipeInterleaveLog2 = 11;904break;905default:906ADDR_ASSERT_ALWAYS();907valid = FALSE;908break;909}910911// Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and912// any larger value requires a post-process (left shift) on the output pipeBankXor bits.913// And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.914ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);915916switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)917{918case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:919m_maxCompFrag = 1;920m_maxCompFragLog2 = 0;921break;922case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:923m_maxCompFrag = 2;924m_maxCompFragLog2 = 1;925break;926case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:927m_maxCompFrag = 4;928m_maxCompFragLog2 = 2;929break;930case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:931m_maxCompFrag = 8;932m_maxCompFragLog2 = 3;933break;934default:935ADDR_ASSERT_ALWAYS();936valid = FALSE;937break;938}939940{941// Skip unaligned case942m_xmaskBaseIndex += MaxNumOfAA;943944m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;945m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;946947if (m_settings.supportRbPlus)948{949m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;950m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;951952ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));953954ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==955sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));956957if (m_numPkrLog2 >= 2)958{959m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;960m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;961}962}963else964{965const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -966static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +9671;968969ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);970971ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==972sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));973}974}975976if (m_settings.supportRbPlus)977{978// VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the979// corresponding SW_64KB_* mode980m_blockVarSizeLog2 = m_pipesLog2 + 14;981}982983984if (valid)985{986InitEquationTable();987}988989return valid;990}991992/**993************************************************************************************************************************994* Gfx10Lib::HwlConvertChipFamily995*996* @brief997* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision998* @return999* ChipFamily1000************************************************************************************************************************1001*/1002ChipFamily Gfx10Lib::HwlConvertChipFamily(1003UINT_32 chipFamily, ///< [in] chip family defined in atiih.h1004UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h1005{1006ChipFamily family = ADDR_CHIP_FAMILY_NAVI;10071008m_settings.dccUnsup3DSwDis = 1;1009m_settings.dsMipmapHtileFix = 1;10101011switch (chipFamily)1012{1013case FAMILY_NV:1014if (ASICREV_IS_NAVI10_P(chipRevision))1015{1016m_settings.dsMipmapHtileFix = 0;1017m_settings.isDcn20 = 1;1018}10191020if (ASICREV_IS_NAVI12_P(chipRevision))1021{1022m_settings.isDcn20 = 1;1023}10241025if (ASICREV_IS_NAVI14_M(chipRevision))1026{1027m_settings.isDcn20 = 1;1028}10291030if (ASICREV_IS_SIENNA_CICHLID(chipRevision))1031{1032m_settings.supportRbPlus = 1;1033m_settings.dccUnsup3DSwDis = 0;1034}10351036if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))1037{1038m_settings.supportRbPlus = 1;1039m_settings.dccUnsup3DSwDis = 0;1040}10411042if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))1043{1044m_settings.supportRbPlus = 1;1045m_settings.dccUnsup3DSwDis = 0;1046}10471048if (ASICREV_IS_BEIGE_GOBY(chipRevision))1049{1050m_settings.supportRbPlus = 1;1051m_settings.dccUnsup3DSwDis = 0;1052}1053break;10541055case FAMILY_VGH:1056if (ASICREV_IS_VANGOGH(chipRevision))1057{1058m_settings.supportRbPlus = 1;1059m_settings.dccUnsup3DSwDis = 0;1060}1061else1062{1063ADDR_ASSERT(!"Unknown chip revision");1064}10651066break;10671068case FAMILY_YC:1069if (ASICREV_IS_YELLOW_CARP(chipRevision))1070{1071m_settings.supportRbPlus = 1;1072m_settings.dccUnsup3DSwDis = 0;1073}1074else1075{1076ADDR_ASSERT(!"Unknown chip revision");1077}10781079break;10801081default:1082ADDR_ASSERT(!"Unknown chip family");1083break;1084}10851086m_configFlags.use32bppFor422Fmt = TRUE;10871088return family;1089}10901091/**1092************************************************************************************************************************1093* Gfx10Lib::GetBlk256SizeLog21094*1095* @brief1096* Get block 256 size1097*1098* @return1099* N/A1100************************************************************************************************************************1101*/1102void Gfx10Lib::GetBlk256SizeLog2(1103AddrResourceType resourceType, ///< [in] Resource type1104AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode1105UINT_32 elemLog2, ///< [in] element size log21106UINT_32 numSamplesLog2, ///< [in] number of samples1107Dim3d* pBlock ///< [out] block size1108) const1109{1110if (IsThin(resourceType, swizzleMode))1111{1112UINT_32 blockBits = 8 - elemLog2;11131114if (IsZOrderSwizzle(swizzleMode))1115{1116blockBits -= numSamplesLog2;1117}11181119pBlock->w = (blockBits >> 1) + (blockBits & 1);1120pBlock->h = (blockBits >> 1);1121pBlock->d = 0;1122}1123else1124{1125ADDR_ASSERT(IsThick(resourceType, swizzleMode));11261127UINT_32 blockBits = 8 - elemLog2;11281129pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);1130pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);1131pBlock->h = (blockBits / 3);1132}1133}11341135/**1136************************************************************************************************************************1137* Gfx10Lib::GetCompressedBlockSizeLog21138*1139* @brief1140* Get compress block size1141*1142* @return1143* N/A1144************************************************************************************************************************1145*/1146void Gfx10Lib::GetCompressedBlockSizeLog2(1147Gfx10DataType dataType, ///< [in] Data type1148AddrResourceType resourceType, ///< [in] Resource type1149AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode1150UINT_32 elemLog2, ///< [in] element size log21151UINT_32 numSamplesLog2, ///< [in] number of samples1152Dim3d* pBlock ///< [out] block size1153) const1154{1155if (dataType == Gfx10DataColor)1156{1157GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);1158}1159else1160{1161ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));1162pBlock->w = 3;1163pBlock->h = 3;1164pBlock->d = 0;1165}1166}11671168/**1169************************************************************************************************************************1170* Gfx10Lib::GetMetaOverlapLog21171*1172* @brief1173* Get meta block overlap1174*1175* @return1176* N/A1177************************************************************************************************************************1178*/1179INT_32 Gfx10Lib::GetMetaOverlapLog2(1180Gfx10DataType dataType, ///< [in] Data type1181AddrResourceType resourceType, ///< [in] Resource type1182AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode1183UINT_32 elemLog2, ///< [in] element size log21184UINT_32 numSamplesLog2 ///< [in] number of samples1185) const1186{1187Dim3d compBlock;1188Dim3d microBlock;11891190GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);1191GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, µBlock);11921193const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;1194const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;1195const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);1196const INT_32 numPipesLog2 = GetEffectiveNumPipes();1197INT_32 overlap = numPipesLog2 - maxSizeLog2;11981199if ((numPipesLog2 > 1) && m_settings.supportRbPlus)1200{1201overlap++;1202}12031204// In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)1205if ((elemLog2 == 4) && (numSamplesLog2 == 3))1206{1207overlap--;1208}1209overlap = Max(overlap, 0);1210return overlap;1211}12121213/**1214************************************************************************************************************************1215* Gfx10Lib::Get3DMetaOverlapLog21216*1217* @brief1218* Get 3d meta block overlap1219*1220* @return1221* N/A1222************************************************************************************************************************1223*/1224INT_32 Gfx10Lib::Get3DMetaOverlapLog2(1225AddrResourceType resourceType, ///< [in] Resource type1226AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode1227UINT_32 elemLog2 ///< [in] element size log21228) const1229{1230Dim3d microBlock;1231GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, µBlock);12321233INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);12341235if (m_settings.supportRbPlus)1236{1237overlap++;1238}12391240if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))1241{1242overlap = 0;1243}1244return overlap;1245}12461247/**1248************************************************************************************************************************1249* Gfx10Lib::GetPipeRotateAmount1250*1251* @brief1252* Get pipe rotate amount1253*1254* @return1255* Pipe rotate amount1256************************************************************************************************************************1257*/12581259INT_32 Gfx10Lib::GetPipeRotateAmount(1260AddrResourceType resourceType, ///< [in] Resource type1261AddrSwizzleMode swizzleMode ///< [in] Swizzle mode1262) const1263{1264INT_32 amount = 0;12651266if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))1267{1268amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?12691 : m_pipesLog2 - (m_numSaLog2 + 1);1270}12711272return amount;1273}12741275/**1276************************************************************************************************************************1277* Gfx10Lib::GetMetaBlkSize1278*1279* @brief1280* Get metadata block size1281*1282* @return1283* Meta block size1284************************************************************************************************************************1285*/1286UINT_32 Gfx10Lib::GetMetaBlkSize(1287Gfx10DataType dataType, ///< [in] Data type1288AddrResourceType resourceType, ///< [in] Resource type1289AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode1290UINT_32 elemLog2, ///< [in] element size log21291UINT_32 numSamplesLog2, ///< [in] number of samples1292BOOL_32 pipeAlign, ///< [in] pipe align1293Dim3d* pBlock ///< [out] block size1294) const1295{1296INT_32 metablkSizeLog2;12971298{1299const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);1300const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);1301const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;1302const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?1303numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);1304const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);1305INT_32 numPipesLog2 = m_pipesLog2;13061307if (IsThin(resourceType, swizzleMode))1308{1309if ((pipeAlign == FALSE) ||1310(IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||1311(IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))1312{1313if (pipeAlign)1314{1315metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);1316metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);1317}1318else1319{1320metablkSizeLog2 = Min(dataBlkSizeLog2, 12);1321}1322}1323else1324{1325if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))1326{1327numPipesLog2++;1328}13291330INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);13311332if (numPipesLog2 >= 4)1333{1334INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);13351336// In 16Bpe 8xaa, we have an extra overlap bit1337if ((pipeRotateLog2 > 0) &&1338(elemLog2 == 4) &&1339(numSamplesLog2 == 3) &&1340(IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))1341{1342overlapLog2++;1343}13441345metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;1346metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);13471348if (m_settings.supportRbPlus &&1349IsRtOptSwizzle(swizzleMode) &&1350(numPipesLog2 == 6) &&1351(numSamplesLog2 == 3) &&1352(m_maxCompFragLog2 == 3) &&1353(metablkSizeLog2 < 15))1354{1355metablkSizeLog2 = 15;1356}1357}1358else1359{1360metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);1361}13621363if (dataType == Gfx10DataDepthStencil)1364{1365// For htile surfaces, pad meta block size to 2K * num_pipes1366metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);1367}13681369const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);13701371if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))1372{1373const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);13741375metablkSizeLog2 = Max(metablkSizeLog2, tmp);1376}1377}13781379const INT_32 metablkBitsLog2 =1380metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;1381pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));1382pBlock->h = 1 << (metablkBitsLog2 >> 1);1383pBlock->d = 1;1384}1385else1386{1387ADDR_ASSERT(IsThick(resourceType, swizzleMode));13881389if (pipeAlign)1390{1391if (m_settings.supportRbPlus &&1392(m_pipesLog2 == m_numSaLog2 + 1) &&1393(m_pipesLog2 > 1) &&1394IsRbAligned(resourceType, swizzleMode))1395{1396numPipesLog2++;1397}13981399const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);14001401metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;1402metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);1403metablkSizeLog2 = Max(metablkSizeLog2, 12);1404}1405else1406{1407metablkSizeLog2 = 12;1408}14091410const INT_32 metablkBitsLog2 =1411metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;1412pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));1413pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));1414pBlock->d = 1 << (metablkBitsLog2 / 3);1415}1416}14171418return (1 << static_cast<UINT_32>(metablkSizeLog2));1419}14201421/**1422************************************************************************************************************************1423* Gfx10Lib::ConvertSwizzlePatternToEquation1424*1425* @brief1426* Convert swizzle pattern to equation.1427*1428* @return1429* N/A1430************************************************************************************************************************1431*/1432VOID Gfx10Lib::ConvertSwizzlePatternToEquation(1433UINT_32 elemLog2, ///< [in] element bytes log21434AddrResourceType rsrcType, ///< [in] resource type1435AddrSwizzleMode swMode, ///< [in] swizzle mode1436const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor1437ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern1438const1439{1440ADDR_BIT_SETTING fullSwizzlePattern[20];1441GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);14421443const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;1444const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);14451446pEquation->numBits = blockSizeLog2;1447pEquation->stackedDepthSlices = FALSE;14481449for (UINT_32 i = 0; i < elemLog2; i++)1450{1451pEquation->addr[i].channel = 0;1452pEquation->addr[i].valid = 1;1453pEquation->addr[i].index = i;1454}14551456if (IsXor(swMode) == FALSE)1457{1458for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)1459{1460ADDR_ASSERT(IsPow2(pSwizzle[i].value));14611462if (pSwizzle[i].x != 0)1463{1464ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));14651466pEquation->addr[i].channel = 0;1467pEquation->addr[i].valid = 1;1468pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;1469}1470else if (pSwizzle[i].y != 0)1471{1472ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));14731474pEquation->addr[i].channel = 1;1475pEquation->addr[i].valid = 1;1476pEquation->addr[i].index = Log2(pSwizzle[i].y);1477}1478else1479{1480ADDR_ASSERT(pSwizzle[i].z != 0);1481ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));14821483pEquation->addr[i].channel = 2;1484pEquation->addr[i].valid = 1;1485pEquation->addr[i].index = Log2(pSwizzle[i].z);1486}14871488pEquation->xor1[i].value = 0;1489pEquation->xor2[i].value = 0;1490}1491}1492else if (IsThin(rsrcType, swMode))1493{1494Dim3d dim;1495ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);14961497const UINT_32 blkXLog2 = Log2(dim.w);1498const UINT_32 blkYLog2 = Log2(dim.h);1499const UINT_32 blkXMask = dim.w - 1;1500const UINT_32 blkYMask = dim.h - 1;15011502ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};1503UINT_32 xMask = 0;1504UINT_32 yMask = 0;1505UINT_32 bMask = (1 << elemLog2) - 1;15061507for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)1508{1509if (IsPow2(pSwizzle[i].value))1510{1511if (pSwizzle[i].x != 0)1512{1513ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);1514xMask |= pSwizzle[i].x;15151516const UINT_32 xLog2 = Log2(pSwizzle[i].x);15171518ADDR_ASSERT(xLog2 < blkXLog2);15191520pEquation->addr[i].channel = 0;1521pEquation->addr[i].valid = 1;1522pEquation->addr[i].index = xLog2 + elemLog2;1523}1524else1525{1526ADDR_ASSERT(pSwizzle[i].y != 0);1527ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);1528yMask |= pSwizzle[i].y;15291530pEquation->addr[i].channel = 1;1531pEquation->addr[i].valid = 1;1532pEquation->addr[i].index = Log2(pSwizzle[i].y);15331534ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);1535}15361537swizzle[i].value = 0;1538bMask |= 1 << i;1539}1540else1541{1542if (pSwizzle[i].z != 0)1543{1544ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));15451546pEquation->xor2[i].channel = 2;1547pEquation->xor2[i].valid = 1;1548pEquation->xor2[i].index = Log2(pSwizzle[i].z);1549}15501551swizzle[i].x = pSwizzle[i].x;1552swizzle[i].y = pSwizzle[i].y;1553swizzle[i].z = swizzle[i].s = 0;15541555ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);15561557const UINT_32 xHi = swizzle[i].x & (~blkXMask);15581559if (xHi != 0)1560{1561ADDR_ASSERT(IsPow2(xHi));1562ADDR_ASSERT(pEquation->xor1[i].value == 0);15631564pEquation->xor1[i].channel = 0;1565pEquation->xor1[i].valid = 1;1566pEquation->xor1[i].index = Log2(xHi) + elemLog2;15671568swizzle[i].x &= blkXMask;1569}15701571const UINT_32 yHi = swizzle[i].y & (~blkYMask);15721573if (yHi != 0)1574{1575ADDR_ASSERT(IsPow2(yHi));15761577if (xHi == 0)1578{1579ADDR_ASSERT(pEquation->xor1[i].value == 0);1580pEquation->xor1[i].channel = 1;1581pEquation->xor1[i].valid = 1;1582pEquation->xor1[i].index = Log2(yHi);1583}1584else1585{1586ADDR_ASSERT(pEquation->xor2[i].value == 0);1587pEquation->xor2[i].channel = 1;1588pEquation->xor2[i].valid = 1;1589pEquation->xor2[i].index = Log2(yHi);1590}15911592swizzle[i].y &= blkYMask;1593}15941595if (swizzle[i].value == 0)1596{1597bMask |= 1 << i;1598}1599}1600}16011602const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;1603const UINT_32 blockMask = (1 << blockSizeLog2) - 1;16041605ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);16061607while (bMask != blockMask)1608{1609for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)1610{1611if ((bMask & (1 << i)) == 0)1612{1613if (IsPow2(swizzle[i].value))1614{1615if (swizzle[i].x != 0)1616{1617ADDR_ASSERT((xMask & swizzle[i].x) == 0);1618xMask |= swizzle[i].x;16191620const UINT_32 xLog2 = Log2(swizzle[i].x);16211622ADDR_ASSERT(xLog2 < blkXLog2);16231624pEquation->addr[i].channel = 0;1625pEquation->addr[i].valid = 1;1626pEquation->addr[i].index = xLog2 + elemLog2;1627}1628else1629{1630ADDR_ASSERT(swizzle[i].y != 0);1631ADDR_ASSERT((yMask & swizzle[i].y) == 0);1632yMask |= swizzle[i].y;16331634pEquation->addr[i].channel = 1;1635pEquation->addr[i].valid = 1;1636pEquation->addr[i].index = Log2(swizzle[i].y);16371638ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);1639}16401641swizzle[i].value = 0;1642bMask |= 1 << i;1643}1644else1645{1646const UINT_32 x = swizzle[i].x & xMask;1647const UINT_32 y = swizzle[i].y & yMask;16481649if (x != 0)1650{1651ADDR_ASSERT(IsPow2(x));16521653if (pEquation->xor1[i].value == 0)1654{1655pEquation->xor1[i].channel = 0;1656pEquation->xor1[i].valid = 1;1657pEquation->xor1[i].index = Log2(x) + elemLog2;1658}1659else1660{1661ADDR_ASSERT(pEquation->xor2[i].value == 0);1662pEquation->xor2[i].channel = 0;1663pEquation->xor2[i].valid = 1;1664pEquation->xor2[i].index = Log2(x) + elemLog2;1665}1666}16671668if (y != 0)1669{1670ADDR_ASSERT(IsPow2(y));16711672if (pEquation->xor1[i].value == 0)1673{1674pEquation->xor1[i].channel = 1;1675pEquation->xor1[i].valid = 1;1676pEquation->xor1[i].index = Log2(y);1677}1678else1679{1680ADDR_ASSERT(pEquation->xor2[i].value == 0);1681pEquation->xor2[i].channel = 1;1682pEquation->xor2[i].valid = 1;1683pEquation->xor2[i].index = Log2(y);1684}1685}16861687swizzle[i].x &= ~x;1688swizzle[i].y &= ~y;1689}1690}1691}1692}16931694ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));1695}1696else1697{1698const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;1699const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;1700const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;1701const UINT_32 blkXMask = (1 << blkXLog2) - 1;1702const UINT_32 blkYMask = (1 << blkYLog2) - 1;1703const UINT_32 blkZMask = (1 << blkZLog2) - 1;17041705ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};1706UINT_32 xMask = 0;1707UINT_32 yMask = 0;1708UINT_32 zMask = 0;1709UINT_32 bMask = (1 << elemLog2) - 1;17101711for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)1712{1713if (IsPow2(pSwizzle[i].value))1714{1715if (pSwizzle[i].x != 0)1716{1717ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);1718xMask |= pSwizzle[i].x;17191720const UINT_32 xLog2 = Log2(pSwizzle[i].x);17211722ADDR_ASSERT(xLog2 < blkXLog2);17231724pEquation->addr[i].channel = 0;1725pEquation->addr[i].valid = 1;1726pEquation->addr[i].index = xLog2 + elemLog2;1727}1728else if (pSwizzle[i].y != 0)1729{1730ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);1731yMask |= pSwizzle[i].y;17321733pEquation->addr[i].channel = 1;1734pEquation->addr[i].valid = 1;1735pEquation->addr[i].index = Log2(pSwizzle[i].y);17361737ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);1738}1739else1740{1741ADDR_ASSERT(pSwizzle[i].z != 0);1742ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);1743zMask |= pSwizzle[i].z;17441745pEquation->addr[i].channel = 2;1746pEquation->addr[i].valid = 1;1747pEquation->addr[i].index = Log2(pSwizzle[i].z);17481749ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);1750}17511752swizzle[i].value = 0;1753bMask |= 1 << i;1754}1755else1756{1757swizzle[i].x = pSwizzle[i].x;1758swizzle[i].y = pSwizzle[i].y;1759swizzle[i].z = pSwizzle[i].z;1760swizzle[i].s = 0;17611762ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);17631764const UINT_32 xHi = swizzle[i].x & (~blkXMask);1765const UINT_32 yHi = swizzle[i].y & (~blkYMask);1766const UINT_32 zHi = swizzle[i].z & (~blkZMask);17671768ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));17691770if (xHi != 0)1771{1772ADDR_ASSERT(IsPow2(xHi));1773ADDR_ASSERT(pEquation->xor1[i].value == 0);17741775pEquation->xor1[i].channel = 0;1776pEquation->xor1[i].valid = 1;1777pEquation->xor1[i].index = Log2(xHi) + elemLog2;17781779swizzle[i].x &= blkXMask;1780}17811782if (yHi != 0)1783{1784ADDR_ASSERT(IsPow2(yHi));17851786if (pEquation->xor1[i].value == 0)1787{1788pEquation->xor1[i].channel = 1;1789pEquation->xor1[i].valid = 1;1790pEquation->xor1[i].index = Log2(yHi);1791}1792else1793{1794ADDR_ASSERT(pEquation->xor2[i].value == 0);1795pEquation->xor2[i].channel = 1;1796pEquation->xor2[i].valid = 1;1797pEquation->xor2[i].index = Log2(yHi);1798}17991800swizzle[i].y &= blkYMask;1801}18021803if (zHi != 0)1804{1805ADDR_ASSERT(IsPow2(zHi));18061807if (pEquation->xor1[i].value == 0)1808{1809pEquation->xor1[i].channel = 2;1810pEquation->xor1[i].valid = 1;1811pEquation->xor1[i].index = Log2(zHi);1812}1813else1814{1815ADDR_ASSERT(pEquation->xor2[i].value == 0);1816pEquation->xor2[i].channel = 2;1817pEquation->xor2[i].valid = 1;1818pEquation->xor2[i].index = Log2(zHi);1819}18201821swizzle[i].z &= blkZMask;1822}18231824if (swizzle[i].value == 0)1825{1826bMask |= 1 << i;1827}1828}1829}18301831const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;1832const UINT_32 blockMask = (1 << blockSizeLog2) - 1;18331834ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);18351836while (bMask != blockMask)1837{1838for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)1839{1840if ((bMask & (1 << i)) == 0)1841{1842if (IsPow2(swizzle[i].value))1843{1844if (swizzle[i].x != 0)1845{1846ADDR_ASSERT((xMask & swizzle[i].x) == 0);1847xMask |= swizzle[i].x;18481849const UINT_32 xLog2 = Log2(swizzle[i].x);18501851ADDR_ASSERT(xLog2 < blkXLog2);18521853pEquation->addr[i].channel = 0;1854pEquation->addr[i].valid = 1;1855pEquation->addr[i].index = xLog2 + elemLog2;1856}1857else if (swizzle[i].y != 0)1858{1859ADDR_ASSERT((yMask & swizzle[i].y) == 0);1860yMask |= swizzle[i].y;18611862pEquation->addr[i].channel = 1;1863pEquation->addr[i].valid = 1;1864pEquation->addr[i].index = Log2(swizzle[i].y);18651866ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);1867}1868else1869{1870ADDR_ASSERT(swizzle[i].z != 0);1871ADDR_ASSERT((zMask & swizzle[i].z) == 0);1872zMask |= swizzle[i].z;18731874pEquation->addr[i].channel = 2;1875pEquation->addr[i].valid = 1;1876pEquation->addr[i].index = Log2(swizzle[i].z);18771878ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);1879}18801881swizzle[i].value = 0;1882bMask |= 1 << i;1883}1884else1885{1886const UINT_32 x = swizzle[i].x & xMask;1887const UINT_32 y = swizzle[i].y & yMask;1888const UINT_32 z = swizzle[i].z & zMask;18891890if (x != 0)1891{1892ADDR_ASSERT(IsPow2(x));18931894if (pEquation->xor1[i].value == 0)1895{1896pEquation->xor1[i].channel = 0;1897pEquation->xor1[i].valid = 1;1898pEquation->xor1[i].index = Log2(x) + elemLog2;1899}1900else1901{1902ADDR_ASSERT(pEquation->xor2[i].value == 0);1903pEquation->xor2[i].channel = 0;1904pEquation->xor2[i].valid = 1;1905pEquation->xor2[i].index = Log2(x) + elemLog2;1906}1907}19081909if (y != 0)1910{1911ADDR_ASSERT(IsPow2(y));19121913if (pEquation->xor1[i].value == 0)1914{1915pEquation->xor1[i].channel = 1;1916pEquation->xor1[i].valid = 1;1917pEquation->xor1[i].index = Log2(y);1918}1919else1920{1921ADDR_ASSERT(pEquation->xor2[i].value == 0);1922pEquation->xor2[i].channel = 1;1923pEquation->xor2[i].valid = 1;1924pEquation->xor2[i].index = Log2(y);1925}1926}19271928if (z != 0)1929{1930ADDR_ASSERT(IsPow2(z));19311932if (pEquation->xor1[i].value == 0)1933{1934pEquation->xor1[i].channel = 2;1935pEquation->xor1[i].valid = 1;1936pEquation->xor1[i].index = Log2(z);1937}1938else1939{1940ADDR_ASSERT(pEquation->xor2[i].value == 0);1941pEquation->xor2[i].channel = 2;1942pEquation->xor2[i].valid = 1;1943pEquation->xor2[i].index = Log2(z);1944}1945}19461947swizzle[i].x &= ~x;1948swizzle[i].y &= ~y;1949swizzle[i].z &= ~z;1950}1951}1952}1953}19541955ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));1956}1957}19581959/**1960************************************************************************************************************************1961* Gfx10Lib::InitEquationTable1962*1963* @brief1964* Initialize Equation table.1965*1966* @return1967* N/A1968************************************************************************************************************************1969*/1970VOID Gfx10Lib::InitEquationTable()1971{1972memset(m_equationTable, 0, sizeof(m_equationTable));19731974for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)1975{1976const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);19771978for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)1979{1980const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);19811982for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)1983{1984UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;1985const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);19861987if (pPatInfo != NULL)1988{1989ADDR_ASSERT(IsValidSwMode(swMode));19901991if (pPatInfo->maxItemCount <= 3)1992{1993ADDR_EQUATION equation = {};19941995ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);19961997equationIndex = m_numEquations;1998ADDR_ASSERT(equationIndex < EquationTableSize);19992000m_equationTable[equationIndex] = equation;20012002m_numEquations++;2003}2004else2005{2006// We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case2007ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));2008ADDR_ASSERT(rsrcTypeIdx == 1);2009ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);2010ADDR_ASSERT(m_settings.supportRbPlus == 1);2011}2012}20132014m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;2015}2016}2017}2018}20192020/**2021************************************************************************************************************************2022* Gfx10Lib::HwlGetEquationIndex2023*2024* @brief2025* Interface function stub of GetEquationIndex2026*2027* @return2028* ADDR_E_RETURNCODE2029************************************************************************************************************************2030*/2031UINT_32 Gfx10Lib::HwlGetEquationIndex(2032const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure2033ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure2034) const2035{2036UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;20372038if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||2039(pIn->resourceType == ADDR_RSRC_TEX_3D))2040{2041const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;2042const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);2043const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);20442045equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];2046}20472048if (pOut->pMipInfo != NULL)2049{2050for (UINT_32 i = 0; i < pIn->numMipLevels; i++)2051{2052pOut->pMipInfo[i].equationIndex = equationIdx;2053}2054}20552056return equationIdx;2057}20582059/**2060************************************************************************************************************************2061* Gfx10Lib::GetValidDisplaySwizzleModes2062*2063* @brief2064* Get valid swizzle modes mask for displayable surface2065*2066* @return2067* Valid swizzle modes mask for displayable surface2068************************************************************************************************************************2069*/2070UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(2071UINT_32 bpp2072) const2073{2074UINT_32 swModeMask = 0;20752076if (bpp <= 64)2077{2078if (m_settings.isDcn20)2079{2080swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;2081}2082else2083{2084swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;2085}2086}20872088return swModeMask;2089}20902091/**2092************************************************************************************************************************2093* Gfx10Lib::IsValidDisplaySwizzleMode2094*2095* @brief2096* Check if a swizzle mode is supported by display engine2097*2098* @return2099* TRUE is swizzle mode is supported by display engine2100************************************************************************************************************************2101*/2102BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(2103const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure2104) const2105{2106ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);21072108return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;2109}21102111/**2112************************************************************************************************************************2113* Gfx10Lib::GetMaxNumMipsInTail2114*2115* @brief2116* Return max number of mips in tails2117*2118* @return2119* Max number of mips in tails2120************************************************************************************************************************2121*/2122UINT_32 Gfx10Lib::GetMaxNumMipsInTail(2123UINT_32 blockSizeLog2, ///< block size log22124BOOL_32 isThin ///< is thin or thick2125) const2126{2127UINT_32 effectiveLog2 = blockSizeLog2;21282129if (isThin == FALSE)2130{2131effectiveLog2 -= (blockSizeLog2 - 8) / 3;2132}21332134return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);2135}21362137/**2138************************************************************************************************************************2139* Gfx10Lib::HwlComputePipeBankXor2140*2141* @brief2142* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address2143*2144* @return2145* PipeBankXor value2146************************************************************************************************************************2147*/2148ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(2149const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure2150ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure2151) const2152{2153if (IsNonPrtXor(pIn->swizzleMode))2154{2155const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));21562157// No pipe xor...2158const UINT_32 pipeXor = 0;2159UINT_32 bankXor = 0;21602161const UINT_32 XorPatternLen = 8;2162static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};2163static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};2164static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};2165static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};2166static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};21672168switch (bankBits)2169{2170case 1:2171case 2:2172case 3:2173case 4:2174bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);2175break;2176default:2177// valid bank bits should be 0~42178ADDR_ASSERT_ALWAYS();2179case 0:2180break;2181}21822183pOut->pipeBankXor = bankXor | pipeXor;2184}2185else2186{2187pOut->pipeBankXor = 0;2188}21892190return ADDR_OK;2191}21922193/**2194************************************************************************************************************************2195* Gfx10Lib::HwlComputeSlicePipeBankXor2196*2197* @brief2198* Generate slice PipeBankXor value based on base PipeBankXor value and slice id2199*2200* @return2201* PipeBankXor value2202************************************************************************************************************************2203*/2204ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(2205const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure2206ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure2207) const2208{2209if (IsNonPrtXor(pIn->swizzleMode))2210{2211const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);2212const UINT_32 pipeBits = GetPipeXorBits(blockBits);2213const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);22142215pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;22162217if (pIn->bpe != 0)2218{2219const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,2220pIn->resourceType,2221Log2(pIn->bpe >> 3),22221);22232224if (pPatInfo != NULL)2225{2226ADDR_BIT_SETTING fullSwizzlePattern[20];2227GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);22282229const UINT_32 pipeBankXorOffset =2230ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),2231blockBits,22320,22330,2234pIn->slice,22350);22362237const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;22382239// Should have no bit set under pipe interleave2240ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);22412242// This assertion firing means old approach doesn't calculate a correct sliceXor value...2243ADDR_ASSERT(pipeBankXor == pipeXor);22442245pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;2246}2247}2248}2249else2250{2251pOut->pipeBankXor = 0;2252}22532254return ADDR_OK;2255}22562257/**2258************************************************************************************************************************2259* Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern2260*2261* @brief2262* Compute sub resource offset to support swizzle pattern2263*2264* @return2265* Offset2266************************************************************************************************************************2267*/2268ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(2269const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure2270ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure2271) const2272{2273ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));22742275pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;22762277return ADDR_OK;2278}22792280/**2281************************************************************************************************************************2282* Gfx10Lib::HwlComputeNonBlockCompressedView2283*2284* @brief2285* Compute non-block-compressed view for a given mipmap level/slice.2286*2287* @return2288* ADDR_E_RETURNCODE2289************************************************************************************************************************2290*/2291ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(2292const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure2293ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure2294) const2295{2296ADDR_E_RETURNCODE returnCode = ADDR_OK;22972298if (pIn->resourceType != ADDR_RSRC_TEX_2D)2299{2300// Only 2D resource can have a NonBC view...2301returnCode = ADDR_INVALIDPARAMS;2302}2303else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&2304((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))2305{2306// Only support BC1~BC7 or ASTC_8x8 for now...2307returnCode = ADDR_NOTSUPPORTED;2308}2309else2310{2311UINT_32 bcWidth, bcHeight;2312UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);23132314ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};2315infoIn.flags = pIn->flags;2316infoIn.swizzleMode = pIn->swizzleMode;2317infoIn.resourceType = pIn->resourceType;2318infoIn.bpp = bpp;2319infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth;2320infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight;2321infoIn.numSlices = pIn->numSlices;2322infoIn.numMipLevels = pIn->numMipLevels;2323infoIn.numSamples = 1;2324infoIn.numFrags = 1;23252326ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};23272328ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};2329infoOut.pMipInfo = mipInfo;23302331const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;23322333if (tiled)2334{2335returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);2336}2337else2338{2339returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);2340}23412342if (returnCode == ADDR_OK)2343{2344ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};2345subOffIn.swizzleMode = infoIn.swizzleMode;2346subOffIn.resourceType = infoIn.resourceType;2347subOffIn.slice = pIn->slice;2348subOffIn.sliceSize = infoOut.sliceSize;2349subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;2350subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;23512352ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};23532354// For any mipmap level, move nonBc view base address by offset2355HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);2356pOut->offset = subOffOut.offset;23572358ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};2359slicePbXorIn.bpe = infoIn.bpp;2360slicePbXorIn.swizzleMode = infoIn.swizzleMode;2361slicePbXorIn.resourceType = infoIn.resourceType;2362slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;2363slicePbXorIn.slice = pIn->slice;23642365ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};23662367// For any mipmap level, nonBc view should use computed pbXor2368HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);2369pOut->pipeBankXor = slicePbXorOut.pipeBankXor;23702371const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;2372const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;2373const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;23742375if (inTail)2376{2377// For mipmap level that is in mip tail block, hack a lot of things...2378// Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels2379// are fit in tail block:23802381// - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)2382pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;23832384// - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)2385pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);23862387// - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold2388pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);23892390// - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold2391pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);2392}2393// This check should cover at least mipId == 02394else if (requestMipWidth << pIn->mipId == infoIn.width)2395{2396// For mipmap level [N] that is not in mip tail block and downgraded without losing element:2397// - only one mipmap level and mipId = 02398pOut->mipId = 0;2399pOut->numMipLevels = 1;24002401// (mip0) width = requestMipWidth2402pOut->unalignedWidth = requestMipWidth;24032404// (mip0) height = requestMipHeight2405pOut->unalignedHeight = requestMipHeight;2406}2407else2408{2409// For mipmap level [N] that is not in mip tail block and downgraded with element losing,2410// We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,2411// because single mip view may have different pitch value than original (multiple) mip view...2412// A simple case would be:2413// - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]2414// - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view2415// mip0 width = 0x101/mip1 width = 0x802416// By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in2417// GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.24182419// - 2 levels and mipId = 12420pOut->mipId = 1;2421pOut->numMipLevels = 2;24222423const UINT_32 upperMipWidth =2424PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;2425const UINT_32 upperMipHeight =2426PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;24272428const BOOL_32 needToAvoidInTail =2429tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?2430TRUE : FALSE;24312432const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);2433const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);24342435const BOOL_32 needExtraWidth =2436((upperMipWidth < requestMipWidth * 2) ||2437((upperMipWidth == requestMipWidth * 2) &&2438((needToAvoidInTail == TRUE) ||2439(hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;24402441const BOOL_32 needExtraHeight =2442((upperMipHeight < requestMipHeight * 2) ||2443((upperMipHeight == requestMipHeight * 2) &&2444((needToAvoidInTail == TRUE) ||2445(hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;24462447// (mip0) width = requestLastMipLevelWidth2448pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);24492450// (mip0) height = requestLastMipLevelHeight2451pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);2452}24532454// Assert the downgrading from this mip[0] width would still generate correct mip[N] width2455ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);2456// Assert the downgrading from this mip[0] height would still generate correct mip[N] height2457ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);2458}2459}24602461return returnCode;2462}24632464/**2465************************************************************************************************************************2466* Gfx10Lib::ValidateNonSwModeParams2467*2468* @brief2469* Validate compute surface info params except swizzle mode2470*2471* @return2472* TRUE if parameters are valid, FALSE otherwise2473************************************************************************************************************************2474*/2475BOOL_32 Gfx10Lib::ValidateNonSwModeParams(2476const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const2477{2478BOOL_32 valid = TRUE;24792480if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))2481{2482ADDR_ASSERT_ALWAYS();2483valid = FALSE;2484}24852486if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)2487{2488ADDR_ASSERT_ALWAYS();2489valid = FALSE;2490}24912492const ADDR2_SURFACE_FLAGS flags = pIn->flags;2493const AddrResourceType rsrcType = pIn->resourceType;2494const BOOL_32 mipmap = (pIn->numMipLevels > 1);2495const BOOL_32 msaa = (pIn->numFrags > 1);2496const BOOL_32 display = flags.display;2497const BOOL_32 tex3d = IsTex3d(rsrcType);2498const BOOL_32 tex2d = IsTex2d(rsrcType);2499const BOOL_32 tex1d = IsTex1d(rsrcType);2500const BOOL_32 stereo = flags.qbStereo;250125022503// Resource type check2504if (tex1d)2505{2506if (msaa || display || stereo)2507{2508ADDR_ASSERT_ALWAYS();2509valid = FALSE;2510}2511}2512else if (tex2d)2513{2514if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))2515{2516ADDR_ASSERT_ALWAYS();2517valid = FALSE;2518}2519}2520else if (tex3d)2521{2522if (msaa || display || stereo)2523{2524ADDR_ASSERT_ALWAYS();2525valid = FALSE;2526}2527}2528else2529{2530ADDR_ASSERT_ALWAYS();2531valid = FALSE;2532}25332534return valid;2535}25362537/**2538************************************************************************************************************************2539* Gfx10Lib::ValidateSwModeParams2540*2541* @brief2542* Validate compute surface info related to swizzle mode2543*2544* @return2545* TRUE if parameters are valid, FALSE otherwise2546************************************************************************************************************************2547*/2548BOOL_32 Gfx10Lib::ValidateSwModeParams(2549const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const2550{2551BOOL_32 valid = TRUE;25522553if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)2554{2555ADDR_ASSERT_ALWAYS();2556valid = FALSE;2557}2558else if (IsValidSwMode(pIn->swizzleMode) == FALSE)2559{2560{2561ADDR_ASSERT_ALWAYS();2562valid = FALSE;2563}2564}25652566const ADDR2_SURFACE_FLAGS flags = pIn->flags;2567const AddrResourceType rsrcType = pIn->resourceType;2568const AddrSwizzleMode swizzle = pIn->swizzleMode;2569const BOOL_32 msaa = (pIn->numFrags > 1);2570const BOOL_32 zbuffer = flags.depth || flags.stencil;2571const BOOL_32 color = flags.color;2572const BOOL_32 display = flags.display;2573const BOOL_32 tex3d = IsTex3d(rsrcType);2574const BOOL_32 tex2d = IsTex2d(rsrcType);2575const BOOL_32 tex1d = IsTex1d(rsrcType);2576const BOOL_32 thin3d = flags.view3dAs2dArray;2577const BOOL_32 linear = IsLinear(swizzle);2578const BOOL_32 blk256B = IsBlock256b(swizzle);2579const BOOL_32 blkVar = IsBlockVariable(swizzle);2580const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);2581const BOOL_32 prt = flags.prt;2582const BOOL_32 fmask = flags.fmask;25832584// Misc check2585if ((pIn->numFrags > 1) &&2586(GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))2587{2588// MSAA surface must have blk_bytes/pipe_interleave >= num_samples2589ADDR_ASSERT_ALWAYS();2590valid = FALSE;2591}25922593if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))2594{2595ADDR_ASSERT_ALWAYS();2596valid = FALSE;2597}25982599if ((pIn->bpp == 96) && (linear == FALSE))2600{2601ADDR_ASSERT_ALWAYS();2602valid = FALSE;2603}26042605const UINT_32 swizzleMask = 1 << swizzle;26062607// Resource type check2608if (tex1d)2609{2610if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)2611{2612ADDR_ASSERT_ALWAYS();2613valid = FALSE;2614}2615}2616else if (tex2d)2617{2618if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)2619{2620{2621ADDR_ASSERT_ALWAYS();2622valid = FALSE;2623}2624}2625else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||2626(fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))2627{2628ADDR_ASSERT_ALWAYS();2629valid = FALSE;2630}26312632}2633else if (tex3d)2634{2635if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||2636(prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||2637(thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))2638{2639ADDR_ASSERT_ALWAYS();2640valid = FALSE;2641}2642}26432644// Swizzle type check2645if (linear)2646{2647if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))2648{2649ADDR_ASSERT_ALWAYS();2650valid = FALSE;2651}2652}2653else if (IsZOrderSwizzle(swizzle))2654{2655if ((pIn->bpp > 64) ||2656(msaa && (color || (pIn->bpp > 32))) ||2657ElemLib::IsBlockCompressed(pIn->format) ||2658ElemLib::IsMacroPixelPacked(pIn->format))2659{2660ADDR_ASSERT_ALWAYS();2661valid = FALSE;2662}2663}2664else if (IsStandardSwizzle(rsrcType, swizzle))2665{2666if (zbuffer || msaa)2667{2668ADDR_ASSERT_ALWAYS();2669valid = FALSE;2670}2671}2672else if (IsDisplaySwizzle(rsrcType, swizzle))2673{2674if (zbuffer || msaa)2675{2676ADDR_ASSERT_ALWAYS();2677valid = FALSE;2678}2679}2680else if (IsRtOptSwizzle(swizzle))2681{2682if (zbuffer)2683{2684ADDR_ASSERT_ALWAYS();2685valid = FALSE;2686}2687}2688else2689{2690{2691ADDR_ASSERT_ALWAYS();2692valid = FALSE;2693}2694}26952696// Block type check2697if (blk256B)2698{2699if (zbuffer || tex3d || msaa)2700{2701ADDR_ASSERT_ALWAYS();2702valid = FALSE;2703}2704}2705else if (blkVar)2706{2707if (m_blockVarSizeLog2 == 0)2708{2709ADDR_ASSERT_ALWAYS();2710valid = FALSE;2711}2712}27132714return valid;2715}27162717/**2718************************************************************************************************************************2719* Gfx10Lib::HwlComputeSurfaceInfoSanityCheck2720*2721* @brief2722* Compute surface info sanity check2723*2724* @return2725* Offset2726************************************************************************************************************************2727*/2728ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(2729const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure2730) const2731{2732return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;2733}27342735/**2736************************************************************************************************************************2737* Gfx10Lib::HwlGetPreferredSurfaceSetting2738*2739* @brief2740* Internal function to get suggested surface information for cliet to use2741*2742* @return2743* ADDR_E_RETURNCODE2744************************************************************************************************************************2745*/2746ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(2747const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure2748ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure2749) const2750{2751ADDR_E_RETURNCODE returnCode = ADDR_OK;27522753if (pIn->flags.fmask)2754{2755const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;2756const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));27572758if (forbid64KbBlockType && forbidVarBlockType)2759{2760// Invalid combination...2761ADDR_ASSERT_ALWAYS();2762returnCode = ADDR_INVALIDPARAMS;2763}2764else2765{2766pOut->resourceType = ADDR_RSRC_TEX_2D;2767pOut->validBlockSet.value = 0;2768pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;2769pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;2770pOut->validSwModeSet.value = 0;2771pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;2772pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;2773pOut->canXor = TRUE;2774pOut->validSwTypeSet.value = AddrSwSetZ;2775pOut->clientPreferredSwSet = pOut->validSwTypeSet;27762777BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);27782779if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))2780{2781const UINT_8 maxFmaskSwizzleModeType = 2;2782const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);2783const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);2784const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);2785const UINT_32 numSlices = Max(pIn->numSlices, 1u);2786const UINT_32 width = Max(pIn->width, 1u);2787const UINT_32 height = Max(pIn->height, 1u);2788const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);27892790AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};2791Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};2792Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};2793UINT_64 padSize[maxFmaskSwizzleModeType] = {};27942795for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)2796{2797ComputeBlockDimensionForSurf(&blkDim[i].w,2798&blkDim[i].h,2799&blkDim[i].d,2800fmaskBpp,28011,2802pOut->resourceType,2803swMode[i]);28042805padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);2806padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);2807}28082809if (BlockTypeWithinMemoryBudget(padSize[0],2810padSize[1],2811ratioLow,2812ratioHi,2813pIn->memoryBudget,2814GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))2815{2816use64KbBlockType = FALSE;2817}2818}2819else if (forbidVarBlockType)2820{2821use64KbBlockType = TRUE;2822}28232824if (use64KbBlockType)2825{2826pOut->swizzleMode = ADDR_SW_64KB_Z_X;2827}2828else2829{2830pOut->swizzleMode = ADDR_SW_VAR_Z_X;2831}2832}2833}2834else2835{2836UINT_32 bpp = pIn->bpp;2837UINT_32 width = Max(pIn->width, 1u);2838UINT_32 height = Max(pIn->height, 1u);28392840// Set format to INVALID will skip this conversion2841if (pIn->format != ADDR_FMT_INVALID)2842{2843ElemMode elemMode = ADDR_UNCOMPRESSED;2844UINT_32 expandX, expandY;28452846// Get compression/expansion factors and element mode which indicates compression/expansion2847bpp = GetElemLib()->GetBitsPerPixel(pIn->format,2848&elemMode,2849&expandX,2850&expandY);28512852UINT_32 basePitch = 0;2853GetElemLib()->AdjustSurfaceInfo(elemMode,2854expandX,2855expandY,2856&bpp,2857&basePitch,2858&width,2859&height);2860}28612862const UINT_32 numSlices = Max(pIn->numSlices, 1u);2863const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);2864const UINT_32 numSamples = Max(pIn->numSamples, 1u);2865const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;2866const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);28672868// Pre sanity check on non swizzle mode parameters2869ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};2870localIn.flags = pIn->flags;2871localIn.resourceType = pIn->resourceType;2872localIn.format = pIn->format;2873localIn.bpp = bpp;2874localIn.width = width;2875localIn.height = height;2876localIn.numSlices = numSlices;2877localIn.numMipLevels = numMipLevels;2878localIn.numSamples = numSamples;2879localIn.numFrags = numFrags;28802881if (ValidateNonSwModeParams(&localIn))2882{2883// Forbid swizzle mode(s) by client setting2884ADDR2_SWMODE_SET allowedSwModeSet = {};2885allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;2886allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;2887allowedSwModeSet.value |=2888pIn->forbiddenBlock.macroThin4KB ? 0 :2889((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);2890allowedSwModeSet.value |=2891pIn->forbiddenBlock.macroThick4KB ? 0 :2892((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);2893allowedSwModeSet.value |=2894pIn->forbiddenBlock.macroThin64KB ? 0 :2895((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);2896allowedSwModeSet.value |=2897pIn->forbiddenBlock.macroThick64KB ? 0 :2898((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);2899allowedSwModeSet.value |=2900pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);29012902if (pIn->preferredSwSet.value != 0)2903{2904allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;2905allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;2906allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;2907allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;2908}29092910if (pIn->noXor)2911{2912allowedSwModeSet.value &= ~Gfx10XorSwModeMask;2913}29142915if (pIn->maxAlign > 0)2916{2917if (pIn->maxAlign < (1u << m_blockVarSizeLog2))2918{2919allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;2920}29212922if (pIn->maxAlign < Size64K)2923{2924allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;2925}29262927if (pIn->maxAlign < Size4K)2928{2929allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;2930}29312932if (pIn->maxAlign < Size256)2933{2934allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;2935}2936}29372938// Filter out invalid swizzle mode(s) by image attributes and HW restrictions2939switch (pIn->resourceType)2940{2941case ADDR_RSRC_TEX_1D:2942allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;2943break;29442945case ADDR_RSRC_TEX_2D:2946allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;29472948break;29492950case ADDR_RSRC_TEX_3D:2951allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;29522953if (pIn->flags.view3dAs2dArray)2954{2955allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;2956}2957break;29582959default:2960ADDR_ASSERT_ALWAYS();2961allowedSwModeSet.value = 0;2962break;2963}29642965if (ElemLib::IsBlockCompressed(pIn->format) ||2966ElemLib::IsMacroPixelPacked(pIn->format) ||2967(bpp > 64) ||2968(msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))2969{2970allowedSwModeSet.value &= ~Gfx10ZSwModeMask;2971}29722973if (pIn->format == ADDR_FMT_32_32_32)2974{2975allowedSwModeSet.value &= Gfx10LinearSwModeMask;2976}29772978if (msaa)2979{2980allowedSwModeSet.value &= Gfx10MsaaSwModeMask;2981}29822983if (pIn->flags.depth || pIn->flags.stencil)2984{2985allowedSwModeSet.value &= Gfx10ZSwModeMask;2986}29872988if (pIn->flags.display)2989{2990allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);2991}29922993if (allowedSwModeSet.value != 0)2994{2995#if DEBUG2996// Post sanity check, at least AddrLib should accept the output generated by its own2997UINT_32 validateSwModeSet = allowedSwModeSet.value;29982999for (UINT_32 i = 0; validateSwModeSet != 0; i++)3000{3001if (validateSwModeSet & 1)3002{3003localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);3004ADDR_ASSERT(ValidateSwModeParams(&localIn));3005}30063007validateSwModeSet >>= 1;3008}3009#endif30103011pOut->resourceType = pIn->resourceType;3012pOut->validSwModeSet = allowedSwModeSet;3013pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;3014pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);3015pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);30163017pOut->clientPreferredSwSet = pIn->preferredSwSet;30183019if (pOut->clientPreferredSwSet.value == 0)3020{3021pOut->clientPreferredSwSet.value = AddrSwSetAll;3022}30233024// Apply optional restrictions3025if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)3026{3027if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)3028{3029// MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from3030// the GL2 in VAR mode, so it should be avoided.3031allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;3032}3033else3034{3035// We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.3036// But we have to suffer from low performance because there is no other choice...3037ADDR_ASSERT_ALWAYS();3038}3039}30403041if (pIn->flags.needEquation)3042{3043FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));3044}30453046if (allowedSwModeSet.value == Gfx10LinearSwModeMask)3047{3048pOut->swizzleMode = ADDR_SW_LINEAR;3049}3050else3051{3052const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);30533054if ((height > 1) && (computeMinSize == FALSE))3055{3056// Always ignore linear swizzle mode if:3057// 1. This is a (2D/3D) resource with height > 13058// 2. Client doesn't require computing minimize size3059allowedSwModeSet.swLinear = 0;3060}30613062ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);30633064// Determine block size if there are 2 or more block type candidates3065if (IsPow2(allowedBlockSet.value) == FALSE)3066{3067AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};30683069swMode[AddrBlockLinear] = ADDR_SW_LINEAR;30703071if (m_blockVarSizeLog2 != 0)3072{3073swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;3074}30753076if (pOut->resourceType == ADDR_RSRC_TEX_3D)3077{3078swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;3079swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;3080swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;3081}3082else3083{3084swMode[AddrBlockMicro] = ADDR_SW_256B_S;3085swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;3086swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;3087}30883089UINT_64 padSize[AddrBlockMaxTiledType] = {};30903091const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);3092const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);3093const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);3094UINT_32 minSizeBlk = AddrBlockMicro;3095UINT_64 minSize = 0;30963097ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};30983099for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)3100{3101if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))3102{3103localIn.swizzleMode = swMode[i];31043105if (localIn.swizzleMode == ADDR_SW_LINEAR)3106{3107returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);3108}3109else3110{3111returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);3112}31133114if (returnCode == ADDR_OK)3115{3116padSize[i] = localOut.surfSize;31173118if (minSize == 0)3119{3120minSize = padSize[i];3121minSizeBlk = i;3122}3123else3124{3125if (BlockTypeWithinMemoryBudget(3126minSize,3127padSize[i],3128ratioLow,3129ratioHi,31300.0,3131GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))3132{3133minSize = padSize[i];3134minSizeBlk = i;3135}3136}3137}3138else3139{3140ADDR_ASSERT_ALWAYS();3141break;3142}3143}3144}31453146if (pIn->memoryBudget > 1.0)3147{3148// If minimum size is given by swizzle mode with bigger-block type, then don't ever check3149// smaller-block type again in coming loop3150switch (minSizeBlk)3151{3152case AddrBlockThick64KB:3153allowedBlockSet.macroThin64KB = 0;3154case AddrBlockThinVar:3155case AddrBlockThin64KB:3156allowedBlockSet.macroThick4KB = 0;3157case AddrBlockThick4KB:3158allowedBlockSet.macroThin4KB = 0;3159case AddrBlockThin4KB:3160allowedBlockSet.micro = 0;3161case AddrBlockMicro:3162allowedBlockSet.linear = 0;3163case AddrBlockLinear:3164break;31653166default:3167ADDR_ASSERT_ALWAYS();3168break;3169}31703171for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)3172{3173if ((i != minSizeBlk) &&3174IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))3175{3176if (BlockTypeWithinMemoryBudget(3177minSize,3178padSize[i],31790,31800,3181pIn->memoryBudget,3182GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)3183{3184// Clear the block type if the memory waste is unacceptable3185allowedBlockSet.value &= ~(1u << (i - 1));3186}3187}3188}31893190// Remove VAR block type if bigger block type is allowed3191if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))3192{3193if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)3194{3195allowedBlockSet.var = 0;3196}3197}31983199// Remove linear block type if 2 or more block types are allowed3200if (IsPow2(allowedBlockSet.value) == FALSE)3201{3202allowedBlockSet.linear = 0;3203}32043205// Select the biggest allowed block type3206minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;32073208if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))3209{3210minSizeBlk = AddrBlockLinear;3211}3212}32133214switch (minSizeBlk)3215{3216case AddrBlockLinear:3217allowedSwModeSet.value &= Gfx10LinearSwModeMask;3218break;32193220case AddrBlockMicro:3221ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);3222allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;3223break;32243225case AddrBlockThin4KB:3226ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);3227allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;3228break;32293230case AddrBlockThick4KB:3231ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);3232allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;3233break;32343235case AddrBlockThin64KB:3236allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?3237Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;3238break;32393240case AddrBlockThick64KB:3241ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);3242allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;3243break;32443245case AddrBlockThinVar:3246allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;3247break;32483249default:3250ADDR_ASSERT_ALWAYS();3251allowedSwModeSet.value = 0;3252break;3253}3254}32553256// Block type should be determined.3257ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));32583259ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);32603261// Determine swizzle type if there are 2 or more swizzle type candidates3262if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))3263{3264if (ElemLib::IsBlockCompressed(pIn->format))3265{3266if (allowedSwSet.sw_D)3267{3268allowedSwModeSet.value &= Gfx10DisplaySwModeMask;3269}3270else if (allowedSwSet.sw_S)3271{3272allowedSwModeSet.value &= Gfx10StandardSwModeMask;3273}3274else3275{3276ADDR_ASSERT(allowedSwSet.sw_R);3277allowedSwModeSet.value &= Gfx10RenderSwModeMask;3278}3279}3280else if (ElemLib::IsMacroPixelPacked(pIn->format))3281{3282if (allowedSwSet.sw_S)3283{3284allowedSwModeSet.value &= Gfx10StandardSwModeMask;3285}3286else if (allowedSwSet.sw_D)3287{3288allowedSwModeSet.value &= Gfx10DisplaySwModeMask;3289}3290else3291{3292ADDR_ASSERT(allowedSwSet.sw_R);3293allowedSwModeSet.value &= Gfx10RenderSwModeMask;3294}3295}3296else if (pIn->resourceType == ADDR_RSRC_TEX_3D)3297{3298if (pIn->flags.color &&3299GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&3300allowedSwSet.sw_D)3301{3302allowedSwModeSet.value &= Gfx10DisplaySwModeMask;3303}3304else if (allowedSwSet.sw_S)3305{3306allowedSwModeSet.value &= Gfx10StandardSwModeMask;3307}3308else if (allowedSwSet.sw_R)3309{3310allowedSwModeSet.value &= Gfx10RenderSwModeMask;3311}3312else3313{3314ADDR_ASSERT(allowedSwSet.sw_Z);3315allowedSwModeSet.value &= Gfx10ZSwModeMask;3316}3317}3318else3319{3320if (allowedSwSet.sw_R)3321{3322allowedSwModeSet.value &= Gfx10RenderSwModeMask;3323}3324else if (allowedSwSet.sw_D)3325{3326allowedSwModeSet.value &= Gfx10DisplaySwModeMask;3327}3328else if (allowedSwSet.sw_S)3329{3330allowedSwModeSet.value &= Gfx10StandardSwModeMask;3331}3332else3333{3334ADDR_ASSERT(allowedSwSet.sw_Z);3335allowedSwModeSet.value &= Gfx10ZSwModeMask;3336}3337}33383339// Swizzle type should be determined.3340ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));3341}33423343// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +3344// swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's3345// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).3346pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));3347}3348}3349else3350{3351// Invalid combination...3352ADDR_ASSERT_ALWAYS();3353returnCode = ADDR_INVALIDPARAMS;3354}3355}3356else3357{3358// Invalid combination...3359ADDR_ASSERT_ALWAYS();3360returnCode = ADDR_INVALIDPARAMS;3361}3362}33633364return returnCode;3365}33663367/**3368************************************************************************************************************************3369* Gfx10Lib::ComputeStereoInfo3370*3371* @brief3372* Compute height alignment and right eye pipeBankXor for stereo surface3373*3374* @return3375* Error code3376*3377************************************************************************************************************************3378*/3379ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(3380const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info3381UINT_32* pAlignY, ///< Stereo requested additional alignment in Y3382UINT_32* pRightXor ///< Right eye xor3383) const3384{3385ADDR_E_RETURNCODE ret = ADDR_OK;33863387*pRightXor = 0;33883389if (IsNonPrtXor(pIn->swizzleMode))3390{3391const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);3392const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);3393const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;3394const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);3395const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];33963397if (eqIndex != ADDR_INVALID_EQUATION_INDEX)3398{3399UINT_32 yMax = 0;3400UINT_32 yPosMask = 0;34013402// First get "max y bit"3403for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)3404{3405ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);34063407if ((m_equationTable[eqIndex].addr[i].channel == 1) &&3408(m_equationTable[eqIndex].addr[i].index > yMax))3409{3410yMax = m_equationTable[eqIndex].addr[i].index;3411}34123413if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&3414(m_equationTable[eqIndex].xor1[i].channel == 1) &&3415(m_equationTable[eqIndex].xor1[i].index > yMax))3416{3417yMax = m_equationTable[eqIndex].xor1[i].index;3418}34193420if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&3421(m_equationTable[eqIndex].xor2[i].channel == 1) &&3422(m_equationTable[eqIndex].xor2[i].index > yMax))3423{3424yMax = m_equationTable[eqIndex].xor2[i].index;3425}3426}34273428// Then loop again for populating a position mask of "max Y bit"3429for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)3430{3431if ((m_equationTable[eqIndex].addr[i].channel == 1) &&3432(m_equationTable[eqIndex].addr[i].index == yMax))3433{3434yPosMask |= 1u << i;3435}3436else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&3437(m_equationTable[eqIndex].xor1[i].channel == 1) &&3438(m_equationTable[eqIndex].xor1[i].index == yMax))3439{3440yPosMask |= 1u << i;3441}3442else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&3443(m_equationTable[eqIndex].xor2[i].channel == 1) &&3444(m_equationTable[eqIndex].xor2[i].index == yMax))3445{3446yPosMask |= 1u << i;3447}3448}34493450const UINT_32 additionalAlign = 1 << yMax;34513452if (additionalAlign >= *pAlignY)3453{3454*pAlignY = additionalAlign;34553456const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);34573458if ((alignedHeight >> yMax) & 1)3459{3460*pRightXor = yPosMask >> m_pipeInterleaveLog2;3461}3462}3463}3464else3465{3466ret = ADDR_INVALIDPARAMS;3467}3468}34693470return ret;3471}34723473/**3474************************************************************************************************************************3475* Gfx10Lib::HwlComputeSurfaceInfoTiled3476*3477* @brief3478* Internal function to calculate alignment for tiled surface3479*3480* @return3481* ADDR_E_RETURNCODE3482************************************************************************************************************************3483*/3484ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(3485const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure3486ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure3487) const3488{3489ADDR_E_RETURNCODE ret;34903491// Mip chain dimesion and epitch has no meaning in GFX10, set to default value3492pOut->mipChainPitch = 0;3493pOut->mipChainHeight = 0;3494pOut->mipChainSlice = 0;3495pOut->epitchIsHeight = FALSE;34963497// Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary3498pOut->mipChainInTail = FALSE;3499pOut->firstMipIdInTail = pIn->numMipLevels;35003501if (IsBlock256b(pIn->swizzleMode))3502{3503ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);3504}3505else3506{3507ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);3508}35093510return ret;3511}351235133514/**3515************************************************************************************************************************3516* Gfx10Lib::ComputeSurfaceInfoMicroTiled3517*3518* @brief3519* Internal function to calculate alignment for micro tiled surface3520*3521* @return3522* ADDR_E_RETURNCODE3523************************************************************************************************************************3524*/3525ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(3526const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure3527ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure3528) const3529{3530ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,3531&pOut->blockHeight,3532&pOut->blockSlices,3533pIn->bpp,3534pIn->numFrags,3535pIn->resourceType,3536pIn->swizzleMode);35373538if (ret == ADDR_OK)3539{3540const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);35413542pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);3543pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);3544pOut->numSlices = pIn->numSlices;3545pOut->baseAlign = blockSize;35463547if (pIn->numMipLevels > 1)3548{3549const UINT_32 mip0Width = pIn->width;3550const UINT_32 mip0Height = pIn->height;3551UINT_64 mipSliceSize = 0;35523553for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)3554{3555UINT_32 mipWidth, mipHeight;35563557GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);35583559const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);3560const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);35613562if (pOut->pMipInfo != NULL)3563{3564pOut->pMipInfo[i].pitch = mipActualWidth;3565pOut->pMipInfo[i].height = mipActualHeight;3566pOut->pMipInfo[i].depth = 1;3567pOut->pMipInfo[i].offset = mipSliceSize;3568pOut->pMipInfo[i].mipTailOffset = 0;3569pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;3570}35713572mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);3573}35743575pOut->sliceSize = mipSliceSize;3576pOut->surfSize = mipSliceSize * pOut->numSlices;3577}3578else3579{3580pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);3581pOut->surfSize = pOut->sliceSize * pOut->numSlices;35823583if (pOut->pMipInfo != NULL)3584{3585pOut->pMipInfo[0].pitch = pOut->pitch;3586pOut->pMipInfo[0].height = pOut->height;3587pOut->pMipInfo[0].depth = 1;3588pOut->pMipInfo[0].offset = 0;3589pOut->pMipInfo[0].mipTailOffset = 0;3590pOut->pMipInfo[0].macroBlockOffset = 0;3591}3592}35933594}35953596return ret;3597}35983599/**3600************************************************************************************************************************3601* Gfx10Lib::ComputeSurfaceInfoMacroTiled3602*3603* @brief3604* Internal function to calculate alignment for macro tiled surface3605*3606* @return3607* ADDR_E_RETURNCODE3608************************************************************************************************************************3609*/3610ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(3611const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure3612ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure3613) const3614{3615ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,3616&pOut->blockHeight,3617&pOut->blockSlices,3618pIn->bpp,3619pIn->numFrags,3620pIn->resourceType,3621pIn->swizzleMode);36223623if (returnCode == ADDR_OK)3624{3625UINT_32 heightAlign = pOut->blockHeight;36263627if (pIn->flags.qbStereo)3628{3629UINT_32 rightXor = 0;36303631returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);36323633if (returnCode == ADDR_OK)3634{3635pOut->pStereoInfo->rightSwizzle = rightXor;3636}3637}36383639if (returnCode == ADDR_OK)3640{3641const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);3642const UINT_32 blockSize = 1 << blockSizeLog2;36433644pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);3645pOut->height = PowTwoAlign(pIn->height, heightAlign);3646pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);3647pOut->baseAlign = blockSize;36483649if (pIn->numMipLevels > 1)3650{3651const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,3652pIn->swizzleMode,3653pOut->blockWidth,3654pOut->blockHeight,3655pOut->blockSlices);3656const UINT_32 mip0Width = pIn->width;3657const UINT_32 mip0Height = pIn->height;3658const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);3659const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;3660const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);3661const UINT_32 index = Log2(pIn->bpp >> 3);3662UINT_32 firstMipInTail = pIn->numMipLevels;3663UINT_64 mipChainSliceSize = 0;3664UINT_64 mipSize[MaxMipLevels];3665UINT_64 mipSliceSize[MaxMipLevels];36663667Dim3d fixedTailMaxDim = tailMaxDim;36683669if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))3670{3671fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;3672fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;3673}36743675for (UINT_32 i = 0; i < pIn->numMipLevels; i++)3676{3677UINT_32 mipWidth, mipHeight, mipDepth;36783679GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);36803681if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))3682{3683firstMipInTail = i;3684mipChainSliceSize += blockSize / pOut->blockSlices;3685break;3686}3687else3688{3689const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);3690const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);3691const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);3692const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);36933694mipSize[i] = sliceSize * depth;3695mipSliceSize[i] = sliceSize * pOut->blockSlices;3696mipChainSliceSize += sliceSize;36973698if (pOut->pMipInfo != NULL)3699{3700pOut->pMipInfo[i].pitch = pitch;3701pOut->pMipInfo[i].height = height;3702pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;3703}3704}3705}37063707pOut->sliceSize = mipChainSliceSize;3708pOut->surfSize = mipChainSliceSize * pOut->numSlices;3709pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;3710pOut->firstMipIdInTail = firstMipInTail;37113712if (pOut->pMipInfo != NULL)3713{3714UINT_64 offset = 0;3715UINT_64 macroBlkOffset = 0;3716UINT_32 tailMaxDepth = 0;37173718if (firstMipInTail != pIn->numMipLevels)3719{3720UINT_32 mipWidth, mipHeight;37213722GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,3723&mipWidth, &mipHeight, &tailMaxDepth);37243725offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;3726macroBlkOffset = blockSize;3727}37283729for (INT_32 i = firstMipInTail - 1; i >= 0; i--)3730{3731pOut->pMipInfo[i].offset = offset;3732pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;3733pOut->pMipInfo[i].mipTailOffset = 0;37343735offset += mipSize[i];3736macroBlkOffset += mipSliceSize[i];3737}37383739UINT_32 pitch = tailMaxDim.w;3740UINT_32 height = tailMaxDim.h;3741UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);37423743tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);37443745for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)3746{3747const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);3748const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);37493750pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;3751pOut->pMipInfo[i].mipTailOffset = mipOffset;3752pOut->pMipInfo[i].macroBlockOffset = 0;37533754pOut->pMipInfo[i].pitch = pitch;3755pOut->pMipInfo[i].height = height;3756pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;37573758UINT_32 mipX = ((mipOffset >> 9) & 1) |3759((mipOffset >> 10) & 2) |3760((mipOffset >> 11) & 4) |3761((mipOffset >> 12) & 8) |3762((mipOffset >> 13) & 16) |3763((mipOffset >> 14) & 32);3764UINT_32 mipY = ((mipOffset >> 8) & 1) |3765((mipOffset >> 9) & 2) |3766((mipOffset >> 10) & 4) |3767((mipOffset >> 11) & 8) |3768((mipOffset >> 12) & 16) |3769((mipOffset >> 13) & 32);37703771if (blockSizeLog2 & 1)3772{3773const UINT_32 temp = mipX;3774mipX = mipY;3775mipY = temp;37763777if (index & 1)3778{3779mipY = (mipY << 1) | (mipX & 1);3780mipX = mipX >> 1;3781}3782}37833784if (isThin)3785{3786pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;3787pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;3788pOut->pMipInfo[i].mipTailCoordZ = 0;37893790pitch = Max(pitch >> 1, Block256_2d[index].w);3791height = Max(height >> 1, Block256_2d[index].h);3792}3793else3794{3795pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;3796pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;3797pOut->pMipInfo[i].mipTailCoordZ = 0;37983799pitch = Max(pitch >> 1, Block256_3d[index].w);3800height = Max(height >> 1, Block256_3d[index].h);3801}3802}3803}3804}3805else3806{3807pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;3808pOut->surfSize = pOut->sliceSize * pOut->numSlices;38093810if (pOut->pMipInfo != NULL)3811{3812pOut->pMipInfo[0].pitch = pOut->pitch;3813pOut->pMipInfo[0].height = pOut->height;3814pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;3815pOut->pMipInfo[0].offset = 0;3816pOut->pMipInfo[0].mipTailOffset = 0;3817pOut->pMipInfo[0].macroBlockOffset = 0;3818pOut->pMipInfo[0].mipTailCoordX = 0;3819pOut->pMipInfo[0].mipTailCoordY = 0;3820pOut->pMipInfo[0].mipTailCoordZ = 0;3821}3822}3823}3824}38253826return returnCode;3827}38283829/**3830************************************************************************************************************************3831* Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled3832*3833* @brief3834* Internal function to calculate address from coord for tiled swizzle surface3835*3836* @return3837* ADDR_E_RETURNCODE3838************************************************************************************************************************3839*/3840ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(3841const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure3842ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure3843) const3844{3845ADDR_E_RETURNCODE ret;38463847if (IsBlock256b(pIn->swizzleMode))3848{3849ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);3850}3851else3852{3853ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);3854}38553856return ret;3857}38583859/**3860************************************************************************************************************************3861* Gfx10Lib::ComputeOffsetFromEquation3862*3863* @brief3864* Compute offset from equation3865*3866* @return3867* Offset3868************************************************************************************************************************3869*/3870UINT_32 Gfx10Lib::ComputeOffsetFromEquation(3871const ADDR_EQUATION* pEq, ///< Equation3872UINT_32 x, ///< x coord in bytes3873UINT_32 y, ///< y coord in pixel3874UINT_32 z ///< z coord in slice3875) const3876{3877UINT_32 offset = 0;38783879for (UINT_32 i = 0; i < pEq->numBits; i++)3880{3881UINT_32 v = 0;38823883if (pEq->addr[i].valid)3884{3885if (pEq->addr[i].channel == 0)3886{3887v ^= (x >> pEq->addr[i].index) & 1;3888}3889else if (pEq->addr[i].channel == 1)3890{3891v ^= (y >> pEq->addr[i].index) & 1;3892}3893else3894{3895ADDR_ASSERT(pEq->addr[i].channel == 2);3896v ^= (z >> pEq->addr[i].index) & 1;3897}3898}38993900if (pEq->xor1[i].valid)3901{3902if (pEq->xor1[i].channel == 0)3903{3904v ^= (x >> pEq->xor1[i].index) & 1;3905}3906else if (pEq->xor1[i].channel == 1)3907{3908v ^= (y >> pEq->xor1[i].index) & 1;3909}3910else3911{3912ADDR_ASSERT(pEq->xor1[i].channel == 2);3913v ^= (z >> pEq->xor1[i].index) & 1;3914}3915}39163917if (pEq->xor2[i].valid)3918{3919if (pEq->xor2[i].channel == 0)3920{3921v ^= (x >> pEq->xor2[i].index) & 1;3922}3923else if (pEq->xor2[i].channel == 1)3924{3925v ^= (y >> pEq->xor2[i].index) & 1;3926}3927else3928{3929ADDR_ASSERT(pEq->xor2[i].channel == 2);3930v ^= (z >> pEq->xor2[i].index) & 1;3931}3932}39333934offset |= (v << i);3935}39363937return offset;3938}39393940/**3941************************************************************************************************************************3942* Gfx10Lib::ComputeOffsetFromSwizzlePattern3943*3944* @brief3945* Compute offset from swizzle pattern3946*3947* @return3948* Offset3949************************************************************************************************************************3950*/3951UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(3952const UINT_64* pPattern, ///< Swizzle pattern3953UINT_32 numBits, ///< Number of bits in pattern3954UINT_32 x, ///< x coord in pixel3955UINT_32 y, ///< y coord in pixel3956UINT_32 z, ///< z coord in slice3957UINT_32 s ///< sample id3958) const3959{3960UINT_32 offset = 0;3961const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);39623963for (UINT_32 i = 0; i < numBits; i++)3964{3965UINT_32 v = 0;39663967if (pSwizzlePattern[i].x != 0)3968{3969UINT_16 mask = pSwizzlePattern[i].x;3970UINT_32 xBits = x;39713972while (mask != 0)3973{3974if (mask & 1)3975{3976v ^= xBits & 1;3977}39783979xBits >>= 1;3980mask >>= 1;3981}3982}39833984if (pSwizzlePattern[i].y != 0)3985{3986UINT_16 mask = pSwizzlePattern[i].y;3987UINT_32 yBits = y;39883989while (mask != 0)3990{3991if (mask & 1)3992{3993v ^= yBits & 1;3994}39953996yBits >>= 1;3997mask >>= 1;3998}3999}40004001if (pSwizzlePattern[i].z != 0)4002{4003UINT_16 mask = pSwizzlePattern[i].z;4004UINT_32 zBits = z;40054006while (mask != 0)4007{4008if (mask & 1)4009{4010v ^= zBits & 1;4011}40124013zBits >>= 1;4014mask >>= 1;4015}4016}40174018if (pSwizzlePattern[i].s != 0)4019{4020UINT_16 mask = pSwizzlePattern[i].s;4021UINT_32 sBits = s;40224023while (mask != 0)4024{4025if (mask & 1)4026{4027v ^= sBits & 1;4028}40294030sBits >>= 1;4031mask >>= 1;4032}4033}40344035offset |= (v << i);4036}40374038return offset;4039}40404041/**4042************************************************************************************************************************4043* Gfx10Lib::GetSwizzlePatternInfo4044*4045* @brief4046* Get swizzle pattern4047*4048* @return4049* Swizzle pattern information4050************************************************************************************************************************4051*/4052const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(4053AddrSwizzleMode swizzleMode, ///< Swizzle mode4054AddrResourceType resourceType, ///< Resource type4055UINT_32 elemLog2, ///< Element size in bytes log24056UINT_32 numFrag ///< Number of fragment4057) const4058{4059const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;4060const ADDR_SW_PATINFO* patInfo = NULL;4061const UINT_32 swizzleMask = 1 << swizzleMode;40624063if (IsBlockVariable(swizzleMode))4064{4065if (m_blockVarSizeLog2 != 0)4066{4067ADDR_ASSERT(m_settings.supportRbPlus);40684069if (IsRtOptSwizzle(swizzleMode))4070{4071if (numFrag == 1)4072{4073patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;4074}4075else if (numFrag == 2)4076{4077patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;4078}4079else if (numFrag == 4)4080{4081patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;4082}4083else4084{4085ADDR_ASSERT(numFrag == 8);4086patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;4087}4088}4089else if (IsZOrderSwizzle(swizzleMode))4090{4091if (numFrag == 1)4092{4093patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;4094}4095else if (numFrag == 2)4096{4097patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;4098}4099else if (numFrag == 4)4100{4101patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;4102}4103else4104{4105ADDR_ASSERT(numFrag == 8);4106patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;4107}4108}4109}4110}4111else if (IsLinear(swizzleMode) == FALSE)4112{4113if (resourceType == ADDR_RSRC_TEX_3D)4114{4115ADDR_ASSERT(numFrag == 1);41164117if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)4118{4119if (IsRtOptSwizzle(swizzleMode))4120{4121patInfo = m_settings.supportRbPlus ?4122GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;4123}4124else if (IsZOrderSwizzle(swizzleMode))4125{4126patInfo = m_settings.supportRbPlus ?4127GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;4128}4129else if (IsDisplaySwizzle(resourceType, swizzleMode))4130{4131ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);4132patInfo = m_settings.supportRbPlus ?4133GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;4134}4135else4136{4137ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));41384139if (IsBlock4kb(swizzleMode))4140{4141if (swizzleMode == ADDR_SW_4KB_S)4142{4143patInfo = m_settings.supportRbPlus ?4144GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;4145}4146else4147{4148ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);4149patInfo = m_settings.supportRbPlus ?4150GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;4151}4152}4153else4154{4155if (swizzleMode == ADDR_SW_64KB_S)4156{4157patInfo = m_settings.supportRbPlus ?4158GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;4159}4160else if (swizzleMode == ADDR_SW_64KB_S_X)4161{4162patInfo = m_settings.supportRbPlus ?4163GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;4164}4165else4166{4167ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);4168patInfo = m_settings.supportRbPlus ?4169GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;4170}4171}4172}4173}4174}4175else4176{4177if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)4178{4179if (IsBlock256b(swizzleMode))4180{4181if (swizzleMode == ADDR_SW_256B_S)4182{4183patInfo = m_settings.supportRbPlus ?4184GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;4185}4186else4187{4188ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);4189patInfo = m_settings.supportRbPlus ?4190GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;4191}4192}4193else if (IsBlock4kb(swizzleMode))4194{4195if (IsStandardSwizzle(resourceType, swizzleMode))4196{4197if (swizzleMode == ADDR_SW_4KB_S)4198{4199patInfo = m_settings.supportRbPlus ?4200GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;4201}4202else4203{4204ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);4205patInfo = m_settings.supportRbPlus ?4206GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;4207}4208}4209else4210{4211if (swizzleMode == ADDR_SW_4KB_D)4212{4213patInfo = m_settings.supportRbPlus ?4214GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;4215}4216else4217{4218ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);4219patInfo = m_settings.supportRbPlus ?4220GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;4221}4222}4223}4224else4225{4226if (IsRtOptSwizzle(swizzleMode))4227{4228if (numFrag == 1)4229{4230patInfo = m_settings.supportRbPlus ?4231GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;4232}4233else if (numFrag == 2)4234{4235patInfo = m_settings.supportRbPlus ?4236GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;4237}4238else if (numFrag == 4)4239{4240patInfo = m_settings.supportRbPlus ?4241GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;4242}4243else4244{4245ADDR_ASSERT(numFrag == 8);4246patInfo = m_settings.supportRbPlus ?4247GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;4248}4249}4250else if (IsZOrderSwizzle(swizzleMode))4251{4252if (numFrag == 1)4253{4254patInfo = m_settings.supportRbPlus ?4255GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;4256}4257else if (numFrag == 2)4258{4259patInfo = m_settings.supportRbPlus ?4260GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;4261}4262else if (numFrag == 4)4263{4264patInfo = m_settings.supportRbPlus ?4265GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;4266}4267else4268{4269ADDR_ASSERT(numFrag == 8);4270patInfo = m_settings.supportRbPlus ?4271GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;4272}4273}4274else if (IsDisplaySwizzle(resourceType, swizzleMode))4275{4276if (swizzleMode == ADDR_SW_64KB_D)4277{4278patInfo = m_settings.supportRbPlus ?4279GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;4280}4281else if (swizzleMode == ADDR_SW_64KB_D_X)4282{4283patInfo = m_settings.supportRbPlus ?4284GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;4285}4286else4287{4288ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);4289patInfo = m_settings.supportRbPlus ?4290GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;4291}4292}4293else4294{4295if (swizzleMode == ADDR_SW_64KB_S)4296{4297patInfo = m_settings.supportRbPlus ?4298GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;4299}4300else if (swizzleMode == ADDR_SW_64KB_S_X)4301{4302patInfo = m_settings.supportRbPlus ?4303GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;4304}4305else4306{4307ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);4308patInfo = m_settings.supportRbPlus ?4309GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;4310}4311}4312}4313}4314}4315}43164317return (patInfo != NULL) ? &patInfo[index] : NULL;4318}431943204321/**4322************************************************************************************************************************4323* Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled4324*4325* @brief4326* Internal function to calculate address from coord for micro tiled swizzle surface4327*4328* @return4329* ADDR_E_RETURNCODE4330************************************************************************************************************************4331*/4332ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(4333const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure4334ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure4335) const4336{4337ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};4338ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};4339ADDR2_MIP_INFO mipInfo[MaxMipLevels];43404341localIn.swizzleMode = pIn->swizzleMode;4342localIn.flags = pIn->flags;4343localIn.resourceType = pIn->resourceType;4344localIn.bpp = pIn->bpp;4345localIn.width = Max(pIn->unalignedWidth, 1u);4346localIn.height = Max(pIn->unalignedHeight, 1u);4347localIn.numSlices = Max(pIn->numSlices, 1u);4348localIn.numMipLevels = Max(pIn->numMipLevels, 1u);4349localIn.numSamples = Max(pIn->numSamples, 1u);4350localIn.numFrags = Max(pIn->numFrags, 1u);4351localOut.pMipInfo = mipInfo;43524353ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);43544355if (ret == ADDR_OK)4356{4357const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);4358const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;4359const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);4360const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];43614362if (eqIndex != ADDR_INVALID_EQUATION_INDEX)4363{4364const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;4365const UINT_32 yb = pIn->y / localOut.blockHeight;4366const UINT_32 xb = pIn->x / localOut.blockWidth;4367const UINT_32 blockIndex = yb * pb + xb;4368const UINT_32 blockSize = 256;4369const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],4370pIn->x << elemLog2,4371pIn->y,43720);4373pOut->addr = localOut.sliceSize * pIn->slice +4374mipInfo[pIn->mipId].macroBlockOffset +4375(blockIndex * blockSize) +4376blk256Offset;4377}4378else4379{4380ret = ADDR_INVALIDPARAMS;4381}4382}43834384return ret;4385}43864387/**4388************************************************************************************************************************4389* Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled4390*4391* @brief4392* Internal function to calculate address from coord for macro tiled swizzle surface4393*4394* @return4395* ADDR_E_RETURNCODE4396************************************************************************************************************************4397*/4398ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(4399const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure4400ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure4401) const4402{4403ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};4404ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};4405ADDR2_MIP_INFO mipInfo[MaxMipLevels];44064407localIn.swizzleMode = pIn->swizzleMode;4408localIn.flags = pIn->flags;4409localIn.resourceType = pIn->resourceType;4410localIn.bpp = pIn->bpp;4411localIn.width = Max(pIn->unalignedWidth, 1u);4412localIn.height = Max(pIn->unalignedHeight, 1u);4413localIn.numSlices = Max(pIn->numSlices, 1u);4414localIn.numMipLevels = Max(pIn->numMipLevels, 1u);4415localIn.numSamples = Max(pIn->numSamples, 1u);4416localIn.numFrags = Max(pIn->numFrags, 1u);4417localOut.pMipInfo = mipInfo;44184419ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);44204421if (ret == ADDR_OK)4422{4423const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);4424const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);4425const UINT_32 blkMask = (1 << blkSizeLog2) - 1;4426const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;4427const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);4428const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?4429(((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;44304431if (localIn.numFrags > 1)4432{4433const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,4434pIn->resourceType,4435elemLog2,4436localIn.numFrags);44374438if (pPatInfo != NULL)4439{4440const UINT_32 pb = localOut.pitch / localOut.blockWidth;4441const UINT_32 yb = pIn->y / localOut.blockHeight;4442const UINT_32 xb = pIn->x / localOut.blockWidth;4443const UINT_64 blkIdx = yb * pb + xb;44444445ADDR_BIT_SETTING fullSwizzlePattern[20];4446GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);44474448const UINT_32 blkOffset =4449ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),4450blkSizeLog2,4451pIn->x,4452pIn->y,4453pIn->slice,4454pIn->sample);44554456pOut->addr = (localOut.sliceSize * pIn->slice) +4457(blkIdx << blkSizeLog2) +4458(blkOffset ^ pipeBankXor);4459}4460else4461{4462ret = ADDR_INVALIDPARAMS;4463}4464}4465else4466{4467const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;4468const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);4469const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];44704471if (eqIndex != ADDR_INVALID_EQUATION_INDEX)4472{4473const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;4474const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);4475const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);4476const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);4477const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;4478const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;4479const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;4480const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;4481const UINT_32 yb = pIn->y / localOut.blockHeight;4482const UINT_32 xb = pIn->x / localOut.blockWidth;4483const UINT_64 blkIdx = yb * pb + xb;4484const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],4485x << elemLog2,4486y,4487z);4488pOut->addr = sliceSize * sliceId +4489mipInfo[pIn->mipId].macroBlockOffset +4490(blkIdx << blkSizeLog2) +4491(blkOffset ^ pipeBankXor);4492}4493else4494{4495ret = ADDR_INVALIDPARAMS;4496}4497}4498}44994500return ret;4501}45024503/**4504************************************************************************************************************************4505* Gfx10Lib::HwlComputeMaxBaseAlignments4506*4507* @brief4508* Gets maximum alignments4509* @return4510* maximum alignments4511************************************************************************************************************************4512*/4513UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const4514{4515return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;4516}45174518/**4519************************************************************************************************************************4520* Gfx10Lib::HwlComputeMaxMetaBaseAlignments4521*4522* @brief4523* Gets maximum alignments for metadata4524* @return4525* maximum alignments for metadata4526************************************************************************************************************************4527*/4528UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const4529{4530Dim3d metaBlk;45314532const AddrSwizzleMode ValidSwizzleModeForXmask[] =4533{4534ADDR_SW_64KB_Z_X,4535m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,4536};45374538UINT_32 maxBaseAlignHtile = 0;4539UINT_32 maxBaseAlignCmask = 0;45404541for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)4542{4543for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)4544{4545for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)4546{4547// Max base alignment for Htile4548const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,4549ADDR_RSRC_TEX_2D,4550ValidSwizzleModeForXmask[swIdx],4551bppLog2,4552numFragLog2,4553TRUE,4554&metaBlk);45554556maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);4557}4558}45594560// Max base alignment for Cmask4561const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,4562ADDR_RSRC_TEX_2D,4563ValidSwizzleModeForXmask[swIdx],45640,45650,4566TRUE,4567&metaBlk);45684569maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);4570}45714572// Max base alignment for 2D Dcc4573const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =4574{4575ADDR_SW_64KB_S_X,4576ADDR_SW_64KB_D_X,4577ADDR_SW_64KB_R_X,4578m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,4579};45804581UINT_32 maxBaseAlignDcc2D = 0;45824583for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)4584{4585for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)4586{4587for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)4588{4589const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,4590ADDR_RSRC_TEX_2D,4591ValidSwizzleModeForDcc2D[swIdx],4592bppLog2,4593numFragLog2,4594TRUE,4595&metaBlk);45964597maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);4598}4599}4600}46014602// Max base alignment for 3D Dcc4603const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =4604{4605ADDR_SW_64KB_Z_X,4606ADDR_SW_64KB_S_X,4607ADDR_SW_64KB_D_X,4608ADDR_SW_64KB_R_X,4609m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,4610};46114612UINT_32 maxBaseAlignDcc3D = 0;46134614for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)4615{4616for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)4617{4618const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,4619ADDR_RSRC_TEX_3D,4620ValidSwizzleModeForDcc3D[swIdx],4621bppLog2,46220,4623TRUE,4624&metaBlk);46254626maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);4627}4628}46294630return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));4631}46324633/**4634************************************************************************************************************************4635* Gfx10Lib::GetMetaElementSizeLog24636*4637* @brief4638* Gets meta data element size log24639* @return4640* Meta data element size log24641************************************************************************************************************************4642*/4643INT_32 Gfx10Lib::GetMetaElementSizeLog2(4644Gfx10DataType dataType) ///< Data surface type4645{4646INT_32 elemSizeLog2 = 0;46474648if (dataType == Gfx10DataColor)4649{4650elemSizeLog2 = 0;4651}4652else if (dataType == Gfx10DataDepthStencil)4653{4654elemSizeLog2 = 2;4655}4656else4657{4658ADDR_ASSERT(dataType == Gfx10DataFmask);4659elemSizeLog2 = -1;4660}46614662return elemSizeLog2;4663}46644665/**4666************************************************************************************************************************4667* Gfx10Lib::GetMetaCacheSizeLog24668*4669* @brief4670* Gets meta data cache line size log24671* @return4672* Meta data cache line size log24673************************************************************************************************************************4674*/4675INT_32 Gfx10Lib::GetMetaCacheSizeLog2(4676Gfx10DataType dataType) ///< Data surface type4677{4678INT_32 cacheSizeLog2 = 0;46794680if (dataType == Gfx10DataColor)4681{4682cacheSizeLog2 = 6;4683}4684else if (dataType == Gfx10DataDepthStencil)4685{4686cacheSizeLog2 = 8;4687}4688else4689{4690ADDR_ASSERT(dataType == Gfx10DataFmask);4691cacheSizeLog2 = 8;4692}4693return cacheSizeLog2;4694}46954696/**4697************************************************************************************************************************4698* Gfx10Lib::HwlComputeSurfaceInfoLinear4699*4700* @brief4701* Internal function to calculate alignment for linear surface4702*4703* @return4704* ADDR_E_RETURNCODE4705************************************************************************************************************************4706*/4707ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(4708const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure4709ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure4710) const4711{4712ADDR_E_RETURNCODE returnCode = ADDR_OK;47134714if (IsTex1d(pIn->resourceType) && (pIn->height > 1))4715{4716returnCode = ADDR_INVALIDPARAMS;4717}4718else4719{4720const UINT_32 elementBytes = pIn->bpp >> 3;4721const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);4722const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;4723UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);4724UINT_32 actualHeight = pIn->height;4725UINT_64 sliceSize = 0;47264727if (pIn->numMipLevels > 1)4728{4729for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)4730{4731UINT_32 mipWidth, mipHeight;47324733GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);47344735const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);47364737if (pOut->pMipInfo != NULL)4738{4739pOut->pMipInfo[i].pitch = mipActualWidth;4740pOut->pMipInfo[i].height = mipHeight;4741pOut->pMipInfo[i].depth = mipDepth;4742pOut->pMipInfo[i].offset = sliceSize;4743pOut->pMipInfo[i].mipTailOffset = 0;4744pOut->pMipInfo[i].macroBlockOffset = sliceSize;4745}47464747sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;4748}4749}4750else4751{4752returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);47534754if (returnCode == ADDR_OK)4755{4756sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;47574758if (pOut->pMipInfo != NULL)4759{4760pOut->pMipInfo[0].pitch = pitch;4761pOut->pMipInfo[0].height = actualHeight;4762pOut->pMipInfo[0].depth = mipDepth;4763pOut->pMipInfo[0].offset = 0;4764pOut->pMipInfo[0].mipTailOffset = 0;4765pOut->pMipInfo[0].macroBlockOffset = 0;4766}4767}4768}47694770if (returnCode == ADDR_OK)4771{4772pOut->pitch = pitch;4773pOut->height = actualHeight;4774pOut->numSlices = pIn->numSlices;4775pOut->sliceSize = sliceSize;4776pOut->surfSize = sliceSize * pOut->numSlices;4777pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;4778pOut->blockWidth = pitchAlign;4779pOut->blockHeight = 1;4780pOut->blockSlices = 1;47814782// Following members are useless on GFX104783pOut->mipChainPitch = 0;4784pOut->mipChainHeight = 0;4785pOut->mipChainSlice = 0;4786pOut->epitchIsHeight = FALSE;47874788// Post calculation validate4789ADDR_ASSERT(pOut->sliceSize > 0);4790}4791}47924793return returnCode;4794}47954796} // V24797} // Addr479847994800