Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/addrlib/src/gfx10/gfx10addrlib.cpp
7086 views
1
/*
2
* Copyright © 2007-2019 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining
6
* a copy of this software and associated documentation files (the
7
* "Software"), to deal in the Software without restriction, including
8
* without limitation the rights to use, copy, modify, merge, publish,
9
* distribute, sub license, and/or sell copies of the Software, and to
10
* permit persons to whom the Software is furnished to do so, subject to
11
* the following conditions:
12
*
13
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
* USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*
22
* The above copyright notice and this permission notice (including the
23
* next paragraph) shall be included in all copies or substantial portions
24
* of the Software.
25
*/
26
27
/**
28
************************************************************************************************************************
29
* @file gfx10addrlib.cpp
30
* @brief Contain the implementation for the Gfx10Lib class.
31
************************************************************************************************************************
32
*/
33
34
#include "gfx10addrlib.h"
35
#include "gfx10_gb_reg.h"
36
37
#include "amdgpu_asic_addr.h"
38
39
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
40
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
41
42
namespace Addr
43
{
44
/**
45
************************************************************************************************************************
46
* Gfx10HwlInit
47
*
48
* @brief
49
* Creates an Gfx10Lib object.
50
*
51
* @return
52
* Returns an Gfx10Lib object pointer.
53
************************************************************************************************************************
54
*/
55
Addr::Lib* Gfx10HwlInit(const Client* pClient)
56
{
57
return V2::Gfx10Lib::CreateObj(pClient);
58
}
59
60
namespace V2
61
{
62
63
////////////////////////////////////////////////////////////////////////////////////////////////////
64
// Static Const Member
65
////////////////////////////////////////////////////////////////////////////////////////////////////
66
67
const SwizzleModeFlags Gfx10Lib::SwizzleModeTable[ADDR_SW_MAX_TYPE] =
68
{//Linear 256B 4KB 64KB Var Z Std Disp Rot XOR T RtOpt Reserved
69
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR
70
{0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_256B_S
71
{0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_256B_D
72
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
73
74
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
75
{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_S
76
{0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_4KB_D
77
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
78
79
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
80
{0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_S
81
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0}, // ADDR_SW_64KB_D
82
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
83
84
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
85
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
86
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
87
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
88
89
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
90
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_S_T
91
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0}, // ADDR_SW_64KB_D_T
92
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
93
94
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
95
{0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_S_X
96
{0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_4KB_D_X
97
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
98
99
{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_Z_X
100
{0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_S_X
101
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0}, // ADDR_SW_64KB_D_X
102
{0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_64KB_R_X
103
104
{0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0}, // ADDR_SW_VAR_Z_X
105
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
106
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // Reserved
107
{0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0}, // ADDR_SW_VAR_R_X
108
{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, // ADDR_SW_LINEAR_GENERAL
109
};
110
111
const Dim3d Gfx10Lib::Block256_3d[] = {{8, 4, 8}, {4, 4, 8}, {4, 4, 4}, {4, 2, 4}, {2, 2, 4}};
112
113
const Dim3d Gfx10Lib::Block64K_Log2_3d[] = {{6, 5, 5}, {5, 5, 5}, {5, 5, 4}, {5, 4, 4}, {4, 4, 4}};
114
const Dim3d Gfx10Lib::Block4K_Log2_3d[] = {{4, 4, 4}, {3, 4, 4}, {3, 4, 3}, {3, 3, 3}, {2, 3, 3}};
115
116
/**
117
************************************************************************************************************************
118
* Gfx10Lib::Gfx10Lib
119
*
120
* @brief
121
* Constructor
122
*
123
************************************************************************************************************************
124
*/
125
Gfx10Lib::Gfx10Lib(const Client* pClient)
126
:
127
Lib(pClient),
128
m_numPkrLog2(0),
129
m_numSaLog2(0),
130
m_colorBaseIndex(0),
131
m_xmaskBaseIndex(0),
132
m_dccBaseIndex(0)
133
{
134
memset(&m_settings, 0, sizeof(m_settings));
135
memcpy(m_swizzleModeTable, SwizzleModeTable, sizeof(SwizzleModeTable));
136
}
137
138
/**
139
************************************************************************************************************************
140
* Gfx10Lib::~Gfx10Lib
141
*
142
* @brief
143
* Destructor
144
************************************************************************************************************************
145
*/
146
Gfx10Lib::~Gfx10Lib()
147
{
148
}
149
150
/**
151
************************************************************************************************************************
152
* Gfx10Lib::HwlComputeHtileInfo
153
*
154
* @brief
155
* Interface function stub of AddrComputeHtilenfo
156
*
157
* @return
158
* ADDR_E_RETURNCODE
159
************************************************************************************************************************
160
*/
161
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileInfo(
162
const ADDR2_COMPUTE_HTILE_INFO_INPUT* pIn, ///< [in] input structure
163
ADDR2_COMPUTE_HTILE_INFO_OUTPUT* pOut ///< [out] output structure
164
) const
165
{
166
ADDR_E_RETURNCODE ret = ADDR_OK;
167
168
if (((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
169
((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))) ||
170
(pIn->hTileFlags.pipeAligned != TRUE))
171
{
172
ret = ADDR_INVALIDPARAMS;
173
}
174
else
175
{
176
Dim3d metaBlk = {};
177
const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataDepthStencil,
178
ADDR_RSRC_TEX_2D,
179
pIn->swizzleMode,
180
0,
181
0,
182
TRUE,
183
&metaBlk);
184
185
pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
186
pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
187
pOut->baseAlign = Max(metaBlkSize, 1u << (m_pipesLog2 + 11u));
188
pOut->metaBlkWidth = metaBlk.w;
189
pOut->metaBlkHeight = metaBlk.h;
190
191
if (pIn->numMipLevels > 1)
192
{
193
ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
194
195
UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
196
197
for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >=0; i--)
198
{
199
UINT_32 mipWidth, mipHeight;
200
201
GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
202
203
mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
204
mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
205
206
const UINT_32 pitchInM = mipWidth / metaBlk.w;
207
const UINT_32 heightInM = mipHeight / metaBlk.h;
208
const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
209
210
if (pOut->pMipInfo != NULL)
211
{
212
pOut->pMipInfo[i].inMiptail = FALSE;
213
pOut->pMipInfo[i].offset = offset;
214
pOut->pMipInfo[i].sliceSize = mipSliceSize;
215
}
216
217
offset += mipSliceSize;
218
}
219
220
pOut->sliceSize = offset;
221
pOut->metaBlkNumPerSlice = offset / metaBlkSize;
222
pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
223
224
if (pOut->pMipInfo != NULL)
225
{
226
for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
227
{
228
pOut->pMipInfo[i].inMiptail = TRUE;
229
pOut->pMipInfo[i].offset = 0;
230
pOut->pMipInfo[i].sliceSize = 0;
231
}
232
233
if (pIn->firstMipIdInTail != pIn->numMipLevels)
234
{
235
pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
236
}
237
}
238
}
239
else
240
{
241
const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
242
const UINT_32 heightInM = pOut->height / metaBlk.h;
243
244
pOut->metaBlkNumPerSlice = pitchInM * heightInM;
245
pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
246
pOut->htileBytes = pOut->sliceSize * pIn->numSlices;
247
248
if (pOut->pMipInfo != NULL)
249
{
250
pOut->pMipInfo[0].inMiptail = FALSE;
251
pOut->pMipInfo[0].offset = 0;
252
pOut->pMipInfo[0].sliceSize = pOut->sliceSize;
253
}
254
}
255
256
// Get the HTILE address equation (copied from HtileAddrFromCoord).
257
// HTILE addressing depends on the number of samples, but this code doesn't support it yet.
258
const UINT_32 index = m_xmaskBaseIndex;
259
const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
260
261
ADDR_C_ASSERT(sizeof(GFX10_HTILE_SW_PATTERN[patIdxTable[index]]) == 72 * 2);
262
pOut->equation.gfx10_bits = (UINT_16 *)GFX10_HTILE_SW_PATTERN[patIdxTable[index]];
263
}
264
265
return ret;
266
}
267
268
/**
269
************************************************************************************************************************
270
* Gfx10Lib::HwlComputeCmaskInfo
271
*
272
* @brief
273
* Interface function stub of AddrComputeCmaskInfo
274
*
275
* @return
276
* ADDR_E_RETURNCODE
277
************************************************************************************************************************
278
*/
279
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskInfo(
280
const ADDR2_COMPUTE_CMASK_INFO_INPUT* pIn, ///< [in] input structure
281
ADDR2_COMPUTE_CMASK_INFO_OUTPUT* pOut ///< [out] output structure
282
) const
283
{
284
ADDR_E_RETURNCODE ret = ADDR_OK;
285
286
if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
287
(pIn->cMaskFlags.pipeAligned != TRUE) ||
288
((pIn->swizzleMode != ADDR_SW_64KB_Z_X) &&
289
((pIn->swizzleMode != ADDR_SW_VAR_Z_X) || (m_blockVarSizeLog2 == 0))))
290
{
291
ret = ADDR_INVALIDPARAMS;
292
}
293
else
294
{
295
Dim3d metaBlk = {};
296
const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataFmask,
297
ADDR_RSRC_TEX_2D,
298
pIn->swizzleMode,
299
0,
300
0,
301
TRUE,
302
&metaBlk);
303
304
pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
305
pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
306
pOut->baseAlign = metaBlkSize;
307
pOut->metaBlkWidth = metaBlk.w;
308
pOut->metaBlkHeight = metaBlk.h;
309
310
if (pIn->numMipLevels > 1)
311
{
312
ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
313
314
UINT_32 metaBlkPerSlice = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : 1;
315
316
for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
317
{
318
UINT_32 mipWidth, mipHeight;
319
320
GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
321
322
mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
323
mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
324
325
const UINT_32 pitchInM = mipWidth / metaBlk.w;
326
const UINT_32 heightInM = mipHeight / metaBlk.h;
327
328
if (pOut->pMipInfo != NULL)
329
{
330
pOut->pMipInfo[i].inMiptail = FALSE;
331
pOut->pMipInfo[i].offset = metaBlkPerSlice * metaBlkSize;
332
pOut->pMipInfo[i].sliceSize = pitchInM * heightInM * metaBlkSize;
333
}
334
335
metaBlkPerSlice += pitchInM * heightInM;
336
}
337
338
pOut->metaBlkNumPerSlice = metaBlkPerSlice;
339
340
if (pOut->pMipInfo != NULL)
341
{
342
for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
343
{
344
pOut->pMipInfo[i].inMiptail = TRUE;
345
pOut->pMipInfo[i].offset = 0;
346
pOut->pMipInfo[i].sliceSize = 0;
347
}
348
349
if (pIn->firstMipIdInTail != pIn->numMipLevels)
350
{
351
pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
352
}
353
}
354
}
355
else
356
{
357
const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
358
const UINT_32 heightInM = pOut->height / metaBlk.h;
359
360
pOut->metaBlkNumPerSlice = pitchInM * heightInM;
361
362
if (pOut->pMipInfo != NULL)
363
{
364
pOut->pMipInfo[0].inMiptail = FALSE;
365
pOut->pMipInfo[0].offset = 0;
366
pOut->pMipInfo[0].sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
367
}
368
}
369
370
pOut->sliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
371
pOut->cmaskBytes = pOut->sliceSize * pIn->numSlices;
372
}
373
374
return ret;
375
}
376
377
/**
378
************************************************************************************************************************
379
* Gfx10Lib::HwlComputeDccInfo
380
*
381
* @brief
382
* Interface function to compute DCC key info
383
*
384
* @return
385
* ADDR_E_RETURNCODE
386
************************************************************************************************************************
387
*/
388
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeDccInfo(
389
const ADDR2_COMPUTE_DCCINFO_INPUT* pIn, ///< [in] input structure
390
ADDR2_COMPUTE_DCCINFO_OUTPUT* pOut ///< [out] output structure
391
) const
392
{
393
ADDR_E_RETURNCODE ret = ADDR_OK;
394
395
if (IsLinear(pIn->swizzleMode) || IsBlock256b(pIn->swizzleMode))
396
{
397
// Hardware support dcc for 256 swizzle mode, but address lib will not support it because we only
398
// select 256 swizzle mode for small surface, and it's not helpful to enable dcc for small surface.
399
ret = ADDR_INVALIDPARAMS;
400
}
401
else if (m_settings.dccUnsup3DSwDis && IsTex3d(pIn->resourceType) && IsDisplaySwizzle(pIn->swizzleMode))
402
{
403
// DCC is not supported on 3D Display surfaces for GFX10.0 and GFX10.1
404
ret = ADDR_INVALIDPARAMS;
405
}
406
else
407
{
408
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
409
410
{
411
// only SW_*_R_X surfaces may be DCC compressed when attached to the CB
412
ADDR_ASSERT(IsRtOptSwizzle(pIn->swizzleMode));
413
414
const BOOL_32 isThick = IsThick(pIn->resourceType, pIn->swizzleMode);
415
416
pOut->compressBlkWidth = isThick ? Block256_3d[elemLog2].w : Block256_2d[elemLog2].w;
417
pOut->compressBlkHeight = isThick ? Block256_3d[elemLog2].h : Block256_2d[elemLog2].h;
418
pOut->compressBlkDepth = isThick ? Block256_3d[elemLog2].d : 1;
419
}
420
421
if (ret == ADDR_OK)
422
{
423
Dim3d metaBlk = {};
424
const UINT_32 numFragLog2 = Log2(Max(pIn->numFrags, 1u));
425
const UINT_32 metaBlkSize = GetMetaBlkSize(Gfx10DataColor,
426
pIn->resourceType,
427
pIn->swizzleMode,
428
elemLog2,
429
numFragLog2,
430
pIn->dccKeyFlags.pipeAligned,
431
&metaBlk);
432
433
pOut->dccRamBaseAlign = metaBlkSize;
434
pOut->metaBlkWidth = metaBlk.w;
435
pOut->metaBlkHeight = metaBlk.h;
436
pOut->metaBlkDepth = metaBlk.d;
437
pOut->metaBlkSize = metaBlkSize;
438
439
pOut->pitch = PowTwoAlign(pIn->unalignedWidth, metaBlk.w);
440
pOut->height = PowTwoAlign(pIn->unalignedHeight, metaBlk.h);
441
pOut->depth = PowTwoAlign(Max(pIn->numSlices, 1u), metaBlk.d);
442
443
if (pIn->numMipLevels > 1)
444
{
445
ADDR_ASSERT(pIn->firstMipIdInTail <= pIn->numMipLevels);
446
447
UINT_32 offset = (pIn->firstMipIdInTail == pIn->numMipLevels) ? 0 : metaBlkSize;
448
449
for (INT_32 i = static_cast<INT_32>(pIn->firstMipIdInTail) - 1; i >= 0; i--)
450
{
451
UINT_32 mipWidth, mipHeight;
452
453
GetMipSize(pIn->unalignedWidth, pIn->unalignedHeight, 1, i, &mipWidth, &mipHeight);
454
455
mipWidth = PowTwoAlign(mipWidth, metaBlk.w);
456
mipHeight = PowTwoAlign(mipHeight, metaBlk.h);
457
458
const UINT_32 pitchInM = mipWidth / metaBlk.w;
459
const UINT_32 heightInM = mipHeight / metaBlk.h;
460
const UINT_32 mipSliceSize = pitchInM * heightInM * metaBlkSize;
461
462
if (pOut->pMipInfo != NULL)
463
{
464
pOut->pMipInfo[i].inMiptail = FALSE;
465
pOut->pMipInfo[i].offset = offset;
466
pOut->pMipInfo[i].sliceSize = mipSliceSize;
467
}
468
469
offset += mipSliceSize;
470
}
471
472
pOut->dccRamSliceSize = offset;
473
pOut->metaBlkNumPerSlice = offset / metaBlkSize;
474
pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
475
476
if (pOut->pMipInfo != NULL)
477
{
478
for (UINT_32 i = pIn->firstMipIdInTail; i < pIn->numMipLevels; i++)
479
{
480
pOut->pMipInfo[i].inMiptail = TRUE;
481
pOut->pMipInfo[i].offset = 0;
482
pOut->pMipInfo[i].sliceSize = 0;
483
}
484
485
if (pIn->firstMipIdInTail != pIn->numMipLevels)
486
{
487
pOut->pMipInfo[pIn->firstMipIdInTail].sliceSize = metaBlkSize;
488
}
489
}
490
}
491
else
492
{
493
const UINT_32 pitchInM = pOut->pitch / metaBlk.w;
494
const UINT_32 heightInM = pOut->height / metaBlk.h;
495
496
pOut->metaBlkNumPerSlice = pitchInM * heightInM;
497
pOut->dccRamSliceSize = pOut->metaBlkNumPerSlice * metaBlkSize;
498
pOut->dccRamSize = pOut->dccRamSliceSize * (pOut->depth / metaBlk.d);
499
500
if (pOut->pMipInfo != NULL)
501
{
502
pOut->pMipInfo[0].inMiptail = FALSE;
503
pOut->pMipInfo[0].offset = 0;
504
pOut->pMipInfo[0].sliceSize = pOut->dccRamSliceSize;
505
}
506
}
507
508
// Get the DCC address equation (copied from DccAddrFromCoord)
509
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
510
const UINT_32 numPipeLog2 = m_pipesLog2;
511
UINT_32 index = m_dccBaseIndex + elemLog2;
512
const UINT_8* patIdxTable;
513
514
if (m_settings.supportRbPlus)
515
{
516
patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
517
518
if (pIn->dccKeyFlags.pipeAligned)
519
{
520
index += MaxNumOfBpp;
521
522
if (m_numPkrLog2 < 2)
523
{
524
index += m_pipesLog2 * MaxNumOfBpp;
525
}
526
else
527
{
528
// 4 groups for "m_numPkrLog2 < 2" case
529
index += 4 * MaxNumOfBpp;
530
531
const UINT_32 dccPipePerPkr = 3;
532
533
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
534
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
535
}
536
}
537
}
538
else
539
{
540
patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
541
542
if (pIn->dccKeyFlags.pipeAligned)
543
{
544
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
545
}
546
else
547
{
548
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
549
}
550
}
551
552
ADDR_C_ASSERT(sizeof(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]]) == 68 * 2);
553
pOut->equation.gfx10_bits = (UINT_16*)GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]];
554
}
555
}
556
557
return ret;
558
}
559
560
/**
561
************************************************************************************************************************
562
* Gfx10Lib::HwlComputeCmaskAddrFromCoord
563
*
564
* @brief
565
* Interface function stub of AddrComputeCmaskAddrFromCoord
566
*
567
* @return
568
* ADDR_E_RETURNCODE
569
************************************************************************************************************************
570
*/
571
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeCmaskAddrFromCoord(
572
const ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
573
ADDR2_COMPUTE_CMASK_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
574
{
575
// Only support pipe aligned CMask
576
ADDR_ASSERT(pIn->cMaskFlags.pipeAligned == TRUE);
577
578
ADDR2_COMPUTE_CMASK_INFO_INPUT input = {};
579
input.size = sizeof(input);
580
input.cMaskFlags = pIn->cMaskFlags;
581
input.colorFlags = pIn->colorFlags;
582
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
583
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
584
input.numSlices = Max(pIn->numSlices, 1u);
585
input.swizzleMode = pIn->swizzleMode;
586
input.resourceType = pIn->resourceType;
587
588
ADDR2_COMPUTE_CMASK_INFO_OUTPUT output = {};
589
output.size = sizeof(output);
590
591
ADDR_E_RETURNCODE returnCode = ComputeCmaskInfo(&input, &output);
592
593
if (returnCode == ADDR_OK)
594
{
595
const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
596
const UINT_32 fmaskElemLog2 = Log2(fmaskBpp >> 3);
597
const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
598
const UINT_32 index = m_xmaskBaseIndex + fmaskElemLog2;
599
const UINT_8* patIdxTable =
600
(pIn->swizzleMode == ADDR_SW_VAR_Z_X) ? GFX10_CMASK_VAR_RBPLUS_PATIDX :
601
(m_settings.supportRbPlus ? GFX10_CMASK_64K_RBPLUS_PATIDX : GFX10_CMASK_64K_PATIDX);
602
603
604
const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 7;
605
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
606
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_CMASK_SW_PATTERN[patIdxTable[index]],
607
blkSizeLog2 + 1, // +1 for nibble offset
608
pIn->x,
609
pIn->y,
610
pIn->slice,
611
0);
612
const UINT_32 xb = pIn->x / output.metaBlkWidth;
613
const UINT_32 yb = pIn->y / output.metaBlkHeight;
614
const UINT_32 pb = output.pitch / output.metaBlkWidth;
615
const UINT_32 blkIndex = (yb * pb) + xb;
616
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
617
618
pOut->addr = (output.sliceSize * pIn->slice) +
619
(blkIndex * (1 << blkSizeLog2)) +
620
((blkOffset >> 1) ^ pipeXor);
621
pOut->bitPosition = (blkOffset & 1) << 2;
622
}
623
624
return returnCode;
625
}
626
627
/**
628
************************************************************************************************************************
629
* Gfx10Lib::HwlComputeHtileAddrFromCoord
630
*
631
* @brief
632
* Interface function stub of AddrComputeHtileAddrFromCoord
633
*
634
* @return
635
* ADDR_E_RETURNCODE
636
************************************************************************************************************************
637
*/
638
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileAddrFromCoord(
639
const ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
640
ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
641
{
642
ADDR_E_RETURNCODE returnCode = ADDR_OK;
643
644
if (pIn->numMipLevels > 1)
645
{
646
returnCode = ADDR_NOTIMPLEMENTED;
647
}
648
else
649
{
650
ADDR2_COMPUTE_HTILE_INFO_INPUT input = {};
651
input.size = sizeof(input);
652
input.hTileFlags = pIn->hTileFlags;
653
input.depthFlags = pIn->depthflags;
654
input.swizzleMode = pIn->swizzleMode;
655
input.unalignedWidth = Max(pIn->unalignedWidth, 1u);
656
input.unalignedHeight = Max(pIn->unalignedHeight, 1u);
657
input.numSlices = Max(pIn->numSlices, 1u);
658
input.numMipLevels = 1;
659
660
ADDR2_COMPUTE_HTILE_INFO_OUTPUT output = {};
661
output.size = sizeof(output);
662
663
returnCode = ComputeHtileInfo(&input, &output);
664
665
if (returnCode == ADDR_OK)
666
{
667
const UINT_32 numSampleLog2 = Log2(pIn->numSamples);
668
const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
669
const UINT_32 index = m_xmaskBaseIndex + numSampleLog2;
670
const UINT_8* patIdxTable = m_settings.supportRbPlus ? GFX10_HTILE_RBPLUS_PATIDX : GFX10_HTILE_PATIDX;
671
672
673
const UINT_32 blkSizeLog2 = Log2(output.metaBlkWidth) + Log2(output.metaBlkHeight) - 4;
674
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
675
const UINT_32 blkOffset = ComputeOffsetFromSwizzlePattern(GFX10_HTILE_SW_PATTERN[patIdxTable[index]],
676
blkSizeLog2 + 1, // +1 for nibble offset
677
pIn->x,
678
pIn->y,
679
pIn->slice,
680
0);
681
const UINT_32 xb = pIn->x / output.metaBlkWidth;
682
const UINT_32 yb = pIn->y / output.metaBlkHeight;
683
const UINT_32 pb = output.pitch / output.metaBlkWidth;
684
const UINT_32 blkIndex = (yb * pb) + xb;
685
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
686
687
pOut->addr = (static_cast<UINT_64>(output.sliceSize) * pIn->slice) +
688
(blkIndex * (1 << blkSizeLog2)) +
689
((blkOffset >> 1) ^ pipeXor);
690
}
691
}
692
693
return returnCode;
694
}
695
696
/**
697
************************************************************************************************************************
698
* Gfx10Lib::HwlComputeHtileCoordFromAddr
699
*
700
* @brief
701
* Interface function stub of AddrComputeHtileCoordFromAddr
702
*
703
* @return
704
* ADDR_E_RETURNCODE
705
************************************************************************************************************************
706
*/
707
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeHtileCoordFromAddr(
708
const ADDR2_COMPUTE_HTILE_COORDFROMADDR_INPUT* pIn, ///< [in] input structure
709
ADDR2_COMPUTE_HTILE_COORDFROMADDR_OUTPUT* pOut) ///< [out] output structure
710
{
711
ADDR_NOT_IMPLEMENTED();
712
713
return ADDR_OK;
714
}
715
716
/**
717
************************************************************************************************************************
718
* Gfx10Lib::HwlSupportComputeDccAddrFromCoord
719
*
720
* @brief
721
* Check whether HwlComputeDccAddrFromCoord() can be done for the input parameter
722
*
723
* @return
724
* ADDR_E_RETURNCODE
725
************************************************************************************************************************
726
*/
727
ADDR_E_RETURNCODE Gfx10Lib::HwlSupportComputeDccAddrFromCoord(
728
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn)
729
{
730
ADDR_E_RETURNCODE returnCode = ADDR_OK;
731
732
if ((pIn->resourceType != ADDR_RSRC_TEX_2D) ||
733
(pIn->swizzleMode != ADDR_SW_64KB_R_X) ||
734
(pIn->dccKeyFlags.linear == TRUE) ||
735
(pIn->numFrags > 1) ||
736
(pIn->numMipLevels > 1) ||
737
(pIn->mipId > 0))
738
{
739
returnCode = ADDR_NOTSUPPORTED;
740
}
741
else if ((pIn->pitch == 0) ||
742
(pIn->metaBlkWidth == 0) ||
743
(pIn->metaBlkHeight == 0) ||
744
(pIn->slice > 0 && pIn->dccRamSliceSize == 0))
745
{
746
returnCode = ADDR_NOTSUPPORTED;
747
}
748
749
return returnCode;
750
}
751
752
/**
753
************************************************************************************************************************
754
* Gfx10Lib::HwlComputeDccAddrFromCoord
755
*
756
* @brief
757
* Interface function stub of AddrComputeDccAddrFromCoord
758
*
759
* @return
760
* N/A
761
************************************************************************************************************************
762
*/
763
VOID Gfx10Lib::HwlComputeDccAddrFromCoord(
764
const ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
765
ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT* pOut) ///< [out] output structure
766
{
767
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
768
const UINT_32 numPipeLog2 = m_pipesLog2;
769
const UINT_32 pipeMask = (1 << numPipeLog2) - 1;
770
UINT_32 index = m_dccBaseIndex + elemLog2;
771
const UINT_8* patIdxTable;
772
773
if (m_settings.supportRbPlus)
774
{
775
patIdxTable = GFX10_DCC_64K_R_X_RBPLUS_PATIDX;
776
777
if (pIn->dccKeyFlags.pipeAligned)
778
{
779
index += MaxNumOfBpp;
780
781
if (m_numPkrLog2 < 2)
782
{
783
index += m_pipesLog2 * MaxNumOfBpp;
784
}
785
else
786
{
787
// 4 groups for "m_numPkrLog2 < 2" case
788
index += 4 * MaxNumOfBpp;
789
790
const UINT_32 dccPipePerPkr = 3;
791
792
index += (m_numPkrLog2 - 2) * dccPipePerPkr * MaxNumOfBpp +
793
(m_pipesLog2 - m_numPkrLog2) * MaxNumOfBpp;
794
}
795
}
796
}
797
else
798
{
799
patIdxTable = GFX10_DCC_64K_R_X_PATIDX;
800
801
if (pIn->dccKeyFlags.pipeAligned)
802
{
803
index += (numPipeLog2 + UnalignedDccType) * MaxNumOfBpp;
804
}
805
else
806
{
807
index += Min(numPipeLog2, UnalignedDccType - 1) * MaxNumOfBpp;
808
}
809
}
810
811
const UINT_32 blkSizeLog2 = Log2(pIn->metaBlkWidth) + Log2(pIn->metaBlkHeight) + elemLog2 - 8;
812
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
813
const UINT_32 blkOffset =
814
ComputeOffsetFromSwizzlePattern(GFX10_DCC_64K_R_X_SW_PATTERN[patIdxTable[index]],
815
blkSizeLog2 + 1, // +1 for nibble offset
816
pIn->x,
817
pIn->y,
818
pIn->slice,
819
0);
820
const UINT_32 xb = pIn->x / pIn->metaBlkWidth;
821
const UINT_32 yb = pIn->y / pIn->metaBlkHeight;
822
const UINT_32 pb = pIn->pitch / pIn->metaBlkWidth;
823
const UINT_32 blkIndex = (yb * pb) + xb;
824
const UINT_32 pipeXor = ((pIn->pipeXor & pipeMask) << m_pipeInterleaveLog2) & blkMask;
825
826
pOut->addr = (static_cast<UINT_64>(pIn->dccRamSliceSize) * pIn->slice) +
827
(blkIndex * (1 << blkSizeLog2)) +
828
((blkOffset >> 1) ^ pipeXor);
829
}
830
831
/**
832
************************************************************************************************************************
833
* Gfx10Lib::HwlInitGlobalParams
834
*
835
* @brief
836
* Initializes global parameters
837
*
838
* @return
839
* TRUE if all settings are valid
840
*
841
************************************************************************************************************************
842
*/
843
BOOL_32 Gfx10Lib::HwlInitGlobalParams(
844
const ADDR_CREATE_INPUT* pCreateIn) ///< [in] create input
845
{
846
BOOL_32 valid = TRUE;
847
GB_ADDR_CONFIG_GFX10 gbAddrConfig;
848
849
gbAddrConfig.u32All = pCreateIn->regValue.gbAddrConfig;
850
851
// These values are copied from CModel code
852
switch (gbAddrConfig.bits.NUM_PIPES)
853
{
854
case ADDR_CONFIG_1_PIPE:
855
m_pipes = 1;
856
m_pipesLog2 = 0;
857
break;
858
case ADDR_CONFIG_2_PIPE:
859
m_pipes = 2;
860
m_pipesLog2 = 1;
861
break;
862
case ADDR_CONFIG_4_PIPE:
863
m_pipes = 4;
864
m_pipesLog2 = 2;
865
break;
866
case ADDR_CONFIG_8_PIPE:
867
m_pipes = 8;
868
m_pipesLog2 = 3;
869
break;
870
case ADDR_CONFIG_16_PIPE:
871
m_pipes = 16;
872
m_pipesLog2 = 4;
873
break;
874
case ADDR_CONFIG_32_PIPE:
875
m_pipes = 32;
876
m_pipesLog2 = 5;
877
break;
878
case ADDR_CONFIG_64_PIPE:
879
m_pipes = 64;
880
m_pipesLog2 = 6;
881
break;
882
default:
883
ADDR_ASSERT_ALWAYS();
884
valid = FALSE;
885
break;
886
}
887
888
switch (gbAddrConfig.bits.PIPE_INTERLEAVE_SIZE)
889
{
890
case ADDR_CONFIG_PIPE_INTERLEAVE_256B:
891
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_256B;
892
m_pipeInterleaveLog2 = 8;
893
break;
894
case ADDR_CONFIG_PIPE_INTERLEAVE_512B:
895
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_512B;
896
m_pipeInterleaveLog2 = 9;
897
break;
898
case ADDR_CONFIG_PIPE_INTERLEAVE_1KB:
899
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_1KB;
900
m_pipeInterleaveLog2 = 10;
901
break;
902
case ADDR_CONFIG_PIPE_INTERLEAVE_2KB:
903
m_pipeInterleaveBytes = ADDR_PIPEINTERLEAVE_2KB;
904
m_pipeInterleaveLog2 = 11;
905
break;
906
default:
907
ADDR_ASSERT_ALWAYS();
908
valid = FALSE;
909
break;
910
}
911
912
// Addr::V2::Lib::ComputePipeBankXor()/ComputeSlicePipeBankXor() requires pipe interleave to be exactly 8 bits, and
913
// any larger value requires a post-process (left shift) on the output pipeBankXor bits.
914
// And more importantly, SW AddrLib doesn't support sw equation/pattern for PI != 256 case.
915
ADDR_ASSERT(m_pipeInterleaveBytes == ADDR_PIPEINTERLEAVE_256B);
916
917
switch (gbAddrConfig.bits.MAX_COMPRESSED_FRAGS)
918
{
919
case ADDR_CONFIG_1_MAX_COMPRESSED_FRAGMENTS:
920
m_maxCompFrag = 1;
921
m_maxCompFragLog2 = 0;
922
break;
923
case ADDR_CONFIG_2_MAX_COMPRESSED_FRAGMENTS:
924
m_maxCompFrag = 2;
925
m_maxCompFragLog2 = 1;
926
break;
927
case ADDR_CONFIG_4_MAX_COMPRESSED_FRAGMENTS:
928
m_maxCompFrag = 4;
929
m_maxCompFragLog2 = 2;
930
break;
931
case ADDR_CONFIG_8_MAX_COMPRESSED_FRAGMENTS:
932
m_maxCompFrag = 8;
933
m_maxCompFragLog2 = 3;
934
break;
935
default:
936
ADDR_ASSERT_ALWAYS();
937
valid = FALSE;
938
break;
939
}
940
941
{
942
// Skip unaligned case
943
m_xmaskBaseIndex += MaxNumOfAA;
944
945
m_xmaskBaseIndex += m_pipesLog2 * MaxNumOfAA;
946
m_colorBaseIndex += m_pipesLog2 * MaxNumOfBpp;
947
948
if (m_settings.supportRbPlus)
949
{
950
m_numPkrLog2 = gbAddrConfig.bits.NUM_PKRS;
951
m_numSaLog2 = (m_numPkrLog2 > 0) ? (m_numPkrLog2 - 1) : 0;
952
953
ADDR_ASSERT((m_numPkrLog2 <= m_pipesLog2) && ((m_pipesLog2 - m_numPkrLog2) <= 2));
954
955
ADDR_C_ASSERT(sizeof(GFX10_HTILE_RBPLUS_PATIDX) / sizeof(GFX10_HTILE_RBPLUS_PATIDX[0]) ==
956
sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX) / sizeof(GFX10_CMASK_64K_RBPLUS_PATIDX[0]));
957
958
if (m_numPkrLog2 >= 2)
959
{
960
m_colorBaseIndex += (2 * m_numPkrLog2 - 2) * MaxNumOfBpp;
961
m_xmaskBaseIndex += (m_numPkrLog2 - 1) * 3 * MaxNumOfAA;
962
}
963
}
964
else
965
{
966
const UINT_32 numPipeType = static_cast<UINT_32>(ADDR_CONFIG_64_PIPE) -
967
static_cast<UINT_32>(ADDR_CONFIG_1_PIPE) +
968
1;
969
970
ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) == (numPipeType + 1) * MaxNumOfAA);
971
972
ADDR_C_ASSERT(sizeof(GFX10_HTILE_PATIDX) / sizeof(GFX10_HTILE_PATIDX[0]) ==
973
sizeof(GFX10_CMASK_64K_PATIDX) / sizeof(GFX10_CMASK_64K_PATIDX[0]));
974
}
975
}
976
977
if (m_settings.supportRbPlus)
978
{
979
// VAR block size = 16K * num_pipes. For 4 pipe configuration, SW_VAR_* mode swizzle patterns are same as the
980
// corresponding SW_64KB_* mode
981
m_blockVarSizeLog2 = m_pipesLog2 + 14;
982
}
983
984
985
if (valid)
986
{
987
InitEquationTable();
988
}
989
990
return valid;
991
}
992
993
/**
994
************************************************************************************************************************
995
* Gfx10Lib::HwlConvertChipFamily
996
*
997
* @brief
998
* Convert familyID defined in atiid.h to ChipFamily and set m_chipFamily/m_chipRevision
999
* @return
1000
* ChipFamily
1001
************************************************************************************************************************
1002
*/
1003
ChipFamily Gfx10Lib::HwlConvertChipFamily(
1004
UINT_32 chipFamily, ///< [in] chip family defined in atiih.h
1005
UINT_32 chipRevision) ///< [in] chip revision defined in "asic_family"_id.h
1006
{
1007
ChipFamily family = ADDR_CHIP_FAMILY_NAVI;
1008
1009
m_settings.dccUnsup3DSwDis = 1;
1010
m_settings.dsMipmapHtileFix = 1;
1011
1012
switch (chipFamily)
1013
{
1014
case FAMILY_NV:
1015
if (ASICREV_IS_NAVI10_P(chipRevision))
1016
{
1017
m_settings.dsMipmapHtileFix = 0;
1018
m_settings.isDcn20 = 1;
1019
}
1020
1021
if (ASICREV_IS_NAVI12_P(chipRevision))
1022
{
1023
m_settings.isDcn20 = 1;
1024
}
1025
1026
if (ASICREV_IS_NAVI14_M(chipRevision))
1027
{
1028
m_settings.isDcn20 = 1;
1029
}
1030
1031
if (ASICREV_IS_SIENNA_CICHLID(chipRevision))
1032
{
1033
m_settings.supportRbPlus = 1;
1034
m_settings.dccUnsup3DSwDis = 0;
1035
}
1036
1037
if (ASICREV_IS_NAVY_FLOUNDER(chipRevision))
1038
{
1039
m_settings.supportRbPlus = 1;
1040
m_settings.dccUnsup3DSwDis = 0;
1041
}
1042
1043
if (ASICREV_IS_DIMGREY_CAVEFISH(chipRevision))
1044
{
1045
m_settings.supportRbPlus = 1;
1046
m_settings.dccUnsup3DSwDis = 0;
1047
}
1048
1049
if (ASICREV_IS_BEIGE_GOBY(chipRevision))
1050
{
1051
m_settings.supportRbPlus = 1;
1052
m_settings.dccUnsup3DSwDis = 0;
1053
}
1054
break;
1055
1056
case FAMILY_VGH:
1057
if (ASICREV_IS_VANGOGH(chipRevision))
1058
{
1059
m_settings.supportRbPlus = 1;
1060
m_settings.dccUnsup3DSwDis = 0;
1061
}
1062
else
1063
{
1064
ADDR_ASSERT(!"Unknown chip revision");
1065
}
1066
1067
break;
1068
1069
case FAMILY_YC:
1070
if (ASICREV_IS_YELLOW_CARP(chipRevision))
1071
{
1072
m_settings.supportRbPlus = 1;
1073
m_settings.dccUnsup3DSwDis = 0;
1074
}
1075
else
1076
{
1077
ADDR_ASSERT(!"Unknown chip revision");
1078
}
1079
1080
break;
1081
1082
default:
1083
ADDR_ASSERT(!"Unknown chip family");
1084
break;
1085
}
1086
1087
m_configFlags.use32bppFor422Fmt = TRUE;
1088
1089
return family;
1090
}
1091
1092
/**
1093
************************************************************************************************************************
1094
* Gfx10Lib::GetBlk256SizeLog2
1095
*
1096
* @brief
1097
* Get block 256 size
1098
*
1099
* @return
1100
* N/A
1101
************************************************************************************************************************
1102
*/
1103
void Gfx10Lib::GetBlk256SizeLog2(
1104
AddrResourceType resourceType, ///< [in] Resource type
1105
AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1106
UINT_32 elemLog2, ///< [in] element size log2
1107
UINT_32 numSamplesLog2, ///< [in] number of samples
1108
Dim3d* pBlock ///< [out] block size
1109
) const
1110
{
1111
if (IsThin(resourceType, swizzleMode))
1112
{
1113
UINT_32 blockBits = 8 - elemLog2;
1114
1115
if (IsZOrderSwizzle(swizzleMode))
1116
{
1117
blockBits -= numSamplesLog2;
1118
}
1119
1120
pBlock->w = (blockBits >> 1) + (blockBits & 1);
1121
pBlock->h = (blockBits >> 1);
1122
pBlock->d = 0;
1123
}
1124
else
1125
{
1126
ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1127
1128
UINT_32 blockBits = 8 - elemLog2;
1129
1130
pBlock->d = (blockBits / 3) + (((blockBits % 3) > 0) ? 1 : 0);
1131
pBlock->w = (blockBits / 3) + (((blockBits % 3) > 1) ? 1 : 0);
1132
pBlock->h = (blockBits / 3);
1133
}
1134
}
1135
1136
/**
1137
************************************************************************************************************************
1138
* Gfx10Lib::GetCompressedBlockSizeLog2
1139
*
1140
* @brief
1141
* Get compress block size
1142
*
1143
* @return
1144
* N/A
1145
************************************************************************************************************************
1146
*/
1147
void Gfx10Lib::GetCompressedBlockSizeLog2(
1148
Gfx10DataType dataType, ///< [in] Data type
1149
AddrResourceType resourceType, ///< [in] Resource type
1150
AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1151
UINT_32 elemLog2, ///< [in] element size log2
1152
UINT_32 numSamplesLog2, ///< [in] number of samples
1153
Dim3d* pBlock ///< [out] block size
1154
) const
1155
{
1156
if (dataType == Gfx10DataColor)
1157
{
1158
GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, pBlock);
1159
}
1160
else
1161
{
1162
ADDR_ASSERT((dataType == Gfx10DataDepthStencil) || (dataType == Gfx10DataFmask));
1163
pBlock->w = 3;
1164
pBlock->h = 3;
1165
pBlock->d = 0;
1166
}
1167
}
1168
1169
/**
1170
************************************************************************************************************************
1171
* Gfx10Lib::GetMetaOverlapLog2
1172
*
1173
* @brief
1174
* Get meta block overlap
1175
*
1176
* @return
1177
* N/A
1178
************************************************************************************************************************
1179
*/
1180
INT_32 Gfx10Lib::GetMetaOverlapLog2(
1181
Gfx10DataType dataType, ///< [in] Data type
1182
AddrResourceType resourceType, ///< [in] Resource type
1183
AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1184
UINT_32 elemLog2, ///< [in] element size log2
1185
UINT_32 numSamplesLog2 ///< [in] number of samples
1186
) const
1187
{
1188
Dim3d compBlock;
1189
Dim3d microBlock;
1190
1191
GetCompressedBlockSizeLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2, &compBlock);
1192
GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, numSamplesLog2, &microBlock);
1193
1194
const INT_32 compSizeLog2 = compBlock.w + compBlock.h + compBlock.d;
1195
const INT_32 blk256SizeLog2 = microBlock.w + microBlock.h + microBlock.d;
1196
const INT_32 maxSizeLog2 = Max(compSizeLog2, blk256SizeLog2);
1197
const INT_32 numPipesLog2 = GetEffectiveNumPipes();
1198
INT_32 overlap = numPipesLog2 - maxSizeLog2;
1199
1200
if ((numPipesLog2 > 1) && m_settings.supportRbPlus)
1201
{
1202
overlap++;
1203
}
1204
1205
// In 16Bpp 8xaa, we lose 1 overlap bit because the block size reduction eats into a pipe anchor bit (y4)
1206
if ((elemLog2 == 4) && (numSamplesLog2 == 3))
1207
{
1208
overlap--;
1209
}
1210
overlap = Max(overlap, 0);
1211
return overlap;
1212
}
1213
1214
/**
1215
************************************************************************************************************************
1216
* Gfx10Lib::Get3DMetaOverlapLog2
1217
*
1218
* @brief
1219
* Get 3d meta block overlap
1220
*
1221
* @return
1222
* N/A
1223
************************************************************************************************************************
1224
*/
1225
INT_32 Gfx10Lib::Get3DMetaOverlapLog2(
1226
AddrResourceType resourceType, ///< [in] Resource type
1227
AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1228
UINT_32 elemLog2 ///< [in] element size log2
1229
) const
1230
{
1231
Dim3d microBlock;
1232
GetBlk256SizeLog2(resourceType, swizzleMode, elemLog2, 0, &microBlock);
1233
1234
INT_32 overlap = GetEffectiveNumPipes() - static_cast<INT_32>(microBlock.w);
1235
1236
if (m_settings.supportRbPlus)
1237
{
1238
overlap++;
1239
}
1240
1241
if ((overlap < 0) || (IsStandardSwizzle(resourceType, swizzleMode) == TRUE))
1242
{
1243
overlap = 0;
1244
}
1245
return overlap;
1246
}
1247
1248
/**
1249
************************************************************************************************************************
1250
* Gfx10Lib::GetPipeRotateAmount
1251
*
1252
* @brief
1253
* Get pipe rotate amount
1254
*
1255
* @return
1256
* Pipe rotate amount
1257
************************************************************************************************************************
1258
*/
1259
1260
INT_32 Gfx10Lib::GetPipeRotateAmount(
1261
AddrResourceType resourceType, ///< [in] Resource type
1262
AddrSwizzleMode swizzleMode ///< [in] Swizzle mode
1263
) const
1264
{
1265
INT_32 amount = 0;
1266
1267
if (m_settings.supportRbPlus && (m_pipesLog2 >= (m_numSaLog2 + 1)) && (m_pipesLog2 > 1))
1268
{
1269
amount = ((m_pipesLog2 == (m_numSaLog2 + 1)) && IsRbAligned(resourceType, swizzleMode)) ?
1270
1 : m_pipesLog2 - (m_numSaLog2 + 1);
1271
}
1272
1273
return amount;
1274
}
1275
1276
/**
1277
************************************************************************************************************************
1278
* Gfx10Lib::GetMetaBlkSize
1279
*
1280
* @brief
1281
* Get metadata block size
1282
*
1283
* @return
1284
* Meta block size
1285
************************************************************************************************************************
1286
*/
1287
UINT_32 Gfx10Lib::GetMetaBlkSize(
1288
Gfx10DataType dataType, ///< [in] Data type
1289
AddrResourceType resourceType, ///< [in] Resource type
1290
AddrSwizzleMode swizzleMode, ///< [in] Swizzle mode
1291
UINT_32 elemLog2, ///< [in] element size log2
1292
UINT_32 numSamplesLog2, ///< [in] number of samples
1293
BOOL_32 pipeAlign, ///< [in] pipe align
1294
Dim3d* pBlock ///< [out] block size
1295
) const
1296
{
1297
INT_32 metablkSizeLog2;
1298
1299
{
1300
const INT_32 metaElemSizeLog2 = GetMetaElementSizeLog2(dataType);
1301
const INT_32 metaCacheSizeLog2 = GetMetaCacheSizeLog2(dataType);
1302
const INT_32 compBlkSizeLog2 = (dataType == Gfx10DataColor) ? 8 : 6 + numSamplesLog2 + elemLog2;
1303
const INT_32 metaBlkSamplesLog2 = (dataType == Gfx10DataDepthStencil) ?
1304
numSamplesLog2 : Min(numSamplesLog2, m_maxCompFragLog2);
1305
const INT_32 dataBlkSizeLog2 = GetBlockSizeLog2(swizzleMode);
1306
INT_32 numPipesLog2 = m_pipesLog2;
1307
1308
if (IsThin(resourceType, swizzleMode))
1309
{
1310
if ((pipeAlign == FALSE) ||
1311
(IsStandardSwizzle(resourceType, swizzleMode) == TRUE) ||
1312
(IsDisplaySwizzle(resourceType, swizzleMode) == TRUE))
1313
{
1314
if (pipeAlign)
1315
{
1316
metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1317
metablkSizeLog2 = Min(metablkSizeLog2, dataBlkSizeLog2);
1318
}
1319
else
1320
{
1321
metablkSizeLog2 = Min(dataBlkSizeLog2, 12);
1322
}
1323
}
1324
else
1325
{
1326
if (m_settings.supportRbPlus && (m_pipesLog2 == m_numSaLog2 + 1) && (m_pipesLog2 > 1))
1327
{
1328
numPipesLog2++;
1329
}
1330
1331
INT_32 pipeRotateLog2 = GetPipeRotateAmount(resourceType, swizzleMode);
1332
1333
if (numPipesLog2 >= 4)
1334
{
1335
INT_32 overlapLog2 = GetMetaOverlapLog2(dataType, resourceType, swizzleMode, elemLog2, numSamplesLog2);
1336
1337
// In 16Bpe 8xaa, we have an extra overlap bit
1338
if ((pipeRotateLog2 > 0) &&
1339
(elemLog2 == 4) &&
1340
(numSamplesLog2 == 3) &&
1341
(IsZOrderSwizzle(swizzleMode) || (GetEffectiveNumPipes() > 3)))
1342
{
1343
overlapLog2++;
1344
}
1345
1346
metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1347
metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1348
1349
if (m_settings.supportRbPlus &&
1350
IsRtOptSwizzle(swizzleMode) &&
1351
(numPipesLog2 == 6) &&
1352
(numSamplesLog2 == 3) &&
1353
(m_maxCompFragLog2 == 3) &&
1354
(metablkSizeLog2 < 15))
1355
{
1356
metablkSizeLog2 = 15;
1357
}
1358
}
1359
else
1360
{
1361
metablkSizeLog2 = Max(static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2, 12);
1362
}
1363
1364
if (dataType == Gfx10DataDepthStencil)
1365
{
1366
// For htile surfaces, pad meta block size to 2K * num_pipes
1367
metablkSizeLog2 = Max(metablkSizeLog2, 11 + numPipesLog2);
1368
}
1369
1370
const INT_32 compFragLog2 = Min(m_maxCompFragLog2, numSamplesLog2);
1371
1372
if (IsRtOptSwizzle(swizzleMode) && (compFragLog2 > 1) && (pipeRotateLog2 >= 1))
1373
{
1374
const INT_32 tmp = 8 + m_pipesLog2 + Max(pipeRotateLog2, compFragLog2 - 1);
1375
1376
metablkSizeLog2 = Max(metablkSizeLog2, tmp);
1377
}
1378
}
1379
1380
const INT_32 metablkBitsLog2 =
1381
metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1382
pBlock->w = 1 << ((metablkBitsLog2 >> 1) + (metablkBitsLog2 & 1));
1383
pBlock->h = 1 << (metablkBitsLog2 >> 1);
1384
pBlock->d = 1;
1385
}
1386
else
1387
{
1388
ADDR_ASSERT(IsThick(resourceType, swizzleMode));
1389
1390
if (pipeAlign)
1391
{
1392
if (m_settings.supportRbPlus &&
1393
(m_pipesLog2 == m_numSaLog2 + 1) &&
1394
(m_pipesLog2 > 1) &&
1395
IsRbAligned(resourceType, swizzleMode))
1396
{
1397
numPipesLog2++;
1398
}
1399
1400
const INT_32 overlapLog2 = Get3DMetaOverlapLog2(resourceType, swizzleMode, elemLog2);
1401
1402
metablkSizeLog2 = metaCacheSizeLog2 + overlapLog2 + numPipesLog2;
1403
metablkSizeLog2 = Max(metablkSizeLog2, static_cast<INT_32>(m_pipeInterleaveLog2) + numPipesLog2);
1404
metablkSizeLog2 = Max(metablkSizeLog2, 12);
1405
}
1406
else
1407
{
1408
metablkSizeLog2 = 12;
1409
}
1410
1411
const INT_32 metablkBitsLog2 =
1412
metablkSizeLog2 + compBlkSizeLog2 - elemLog2 - metaBlkSamplesLog2 - metaElemSizeLog2;
1413
pBlock->w = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 0) ? 1 : 0));
1414
pBlock->h = 1 << ((metablkBitsLog2 / 3) + (((metablkBitsLog2 % 3) > 1) ? 1 : 0));
1415
pBlock->d = 1 << (metablkBitsLog2 / 3);
1416
}
1417
}
1418
1419
return (1 << static_cast<UINT_32>(metablkSizeLog2));
1420
}
1421
1422
/**
1423
************************************************************************************************************************
1424
* Gfx10Lib::ConvertSwizzlePatternToEquation
1425
*
1426
* @brief
1427
* Convert swizzle pattern to equation.
1428
*
1429
* @return
1430
* N/A
1431
************************************************************************************************************************
1432
*/
1433
VOID Gfx10Lib::ConvertSwizzlePatternToEquation(
1434
UINT_32 elemLog2, ///< [in] element bytes log2
1435
AddrResourceType rsrcType, ///< [in] resource type
1436
AddrSwizzleMode swMode, ///< [in] swizzle mode
1437
const ADDR_SW_PATINFO* pPatInfo, ///< [in] swizzle pattern infor
1438
ADDR_EQUATION* pEquation) ///< [out] equation converted from swizzle pattern
1439
const
1440
{
1441
ADDR_BIT_SETTING fullSwizzlePattern[20];
1442
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
1443
1444
const ADDR_BIT_SETTING* pSwizzle = fullSwizzlePattern;
1445
const UINT_32 blockSizeLog2 = GetBlockSizeLog2(swMode);
1446
1447
pEquation->numBits = blockSizeLog2;
1448
pEquation->stackedDepthSlices = FALSE;
1449
1450
for (UINT_32 i = 0; i < elemLog2; i++)
1451
{
1452
pEquation->addr[i].channel = 0;
1453
pEquation->addr[i].valid = 1;
1454
pEquation->addr[i].index = i;
1455
}
1456
1457
if (IsXor(swMode) == FALSE)
1458
{
1459
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1460
{
1461
ADDR_ASSERT(IsPow2(pSwizzle[i].value));
1462
1463
if (pSwizzle[i].x != 0)
1464
{
1465
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].x)));
1466
1467
pEquation->addr[i].channel = 0;
1468
pEquation->addr[i].valid = 1;
1469
pEquation->addr[i].index = Log2(pSwizzle[i].x) + elemLog2;
1470
}
1471
else if (pSwizzle[i].y != 0)
1472
{
1473
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].y)));
1474
1475
pEquation->addr[i].channel = 1;
1476
pEquation->addr[i].valid = 1;
1477
pEquation->addr[i].index = Log2(pSwizzle[i].y);
1478
}
1479
else
1480
{
1481
ADDR_ASSERT(pSwizzle[i].z != 0);
1482
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1483
1484
pEquation->addr[i].channel = 2;
1485
pEquation->addr[i].valid = 1;
1486
pEquation->addr[i].index = Log2(pSwizzle[i].z);
1487
}
1488
1489
pEquation->xor1[i].value = 0;
1490
pEquation->xor2[i].value = 0;
1491
}
1492
}
1493
else if (IsThin(rsrcType, swMode))
1494
{
1495
Dim3d dim;
1496
ComputeThinBlockDimension(&dim.w, &dim.h, &dim.d, 8u << elemLog2, 0, rsrcType, swMode);
1497
1498
const UINT_32 blkXLog2 = Log2(dim.w);
1499
const UINT_32 blkYLog2 = Log2(dim.h);
1500
const UINT_32 blkXMask = dim.w - 1;
1501
const UINT_32 blkYMask = dim.h - 1;
1502
1503
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1504
UINT_32 xMask = 0;
1505
UINT_32 yMask = 0;
1506
UINT_32 bMask = (1 << elemLog2) - 1;
1507
1508
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1509
{
1510
if (IsPow2(pSwizzle[i].value))
1511
{
1512
if (pSwizzle[i].x != 0)
1513
{
1514
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1515
xMask |= pSwizzle[i].x;
1516
1517
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1518
1519
ADDR_ASSERT(xLog2 < blkXLog2);
1520
1521
pEquation->addr[i].channel = 0;
1522
pEquation->addr[i].valid = 1;
1523
pEquation->addr[i].index = xLog2 + elemLog2;
1524
}
1525
else
1526
{
1527
ADDR_ASSERT(pSwizzle[i].y != 0);
1528
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1529
yMask |= pSwizzle[i].y;
1530
1531
pEquation->addr[i].channel = 1;
1532
pEquation->addr[i].valid = 1;
1533
pEquation->addr[i].index = Log2(pSwizzle[i].y);
1534
1535
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1536
}
1537
1538
swizzle[i].value = 0;
1539
bMask |= 1 << i;
1540
}
1541
else
1542
{
1543
if (pSwizzle[i].z != 0)
1544
{
1545
ADDR_ASSERT(IsPow2(static_cast<UINT_32>(pSwizzle[i].z)));
1546
1547
pEquation->xor2[i].channel = 2;
1548
pEquation->xor2[i].valid = 1;
1549
pEquation->xor2[i].index = Log2(pSwizzle[i].z);
1550
}
1551
1552
swizzle[i].x = pSwizzle[i].x;
1553
swizzle[i].y = pSwizzle[i].y;
1554
swizzle[i].z = swizzle[i].s = 0;
1555
1556
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1557
1558
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1559
1560
if (xHi != 0)
1561
{
1562
ADDR_ASSERT(IsPow2(xHi));
1563
ADDR_ASSERT(pEquation->xor1[i].value == 0);
1564
1565
pEquation->xor1[i].channel = 0;
1566
pEquation->xor1[i].valid = 1;
1567
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1568
1569
swizzle[i].x &= blkXMask;
1570
}
1571
1572
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1573
1574
if (yHi != 0)
1575
{
1576
ADDR_ASSERT(IsPow2(yHi));
1577
1578
if (xHi == 0)
1579
{
1580
ADDR_ASSERT(pEquation->xor1[i].value == 0);
1581
pEquation->xor1[i].channel = 1;
1582
pEquation->xor1[i].valid = 1;
1583
pEquation->xor1[i].index = Log2(yHi);
1584
}
1585
else
1586
{
1587
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1588
pEquation->xor2[i].channel = 1;
1589
pEquation->xor2[i].valid = 1;
1590
pEquation->xor2[i].index = Log2(yHi);
1591
}
1592
1593
swizzle[i].y &= blkYMask;
1594
}
1595
1596
if (swizzle[i].value == 0)
1597
{
1598
bMask |= 1 << i;
1599
}
1600
}
1601
}
1602
1603
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1604
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1605
1606
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1607
1608
while (bMask != blockMask)
1609
{
1610
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1611
{
1612
if ((bMask & (1 << i)) == 0)
1613
{
1614
if (IsPow2(swizzle[i].value))
1615
{
1616
if (swizzle[i].x != 0)
1617
{
1618
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1619
xMask |= swizzle[i].x;
1620
1621
const UINT_32 xLog2 = Log2(swizzle[i].x);
1622
1623
ADDR_ASSERT(xLog2 < blkXLog2);
1624
1625
pEquation->addr[i].channel = 0;
1626
pEquation->addr[i].valid = 1;
1627
pEquation->addr[i].index = xLog2 + elemLog2;
1628
}
1629
else
1630
{
1631
ADDR_ASSERT(swizzle[i].y != 0);
1632
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1633
yMask |= swizzle[i].y;
1634
1635
pEquation->addr[i].channel = 1;
1636
pEquation->addr[i].valid = 1;
1637
pEquation->addr[i].index = Log2(swizzle[i].y);
1638
1639
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1640
}
1641
1642
swizzle[i].value = 0;
1643
bMask |= 1 << i;
1644
}
1645
else
1646
{
1647
const UINT_32 x = swizzle[i].x & xMask;
1648
const UINT_32 y = swizzle[i].y & yMask;
1649
1650
if (x != 0)
1651
{
1652
ADDR_ASSERT(IsPow2(x));
1653
1654
if (pEquation->xor1[i].value == 0)
1655
{
1656
pEquation->xor1[i].channel = 0;
1657
pEquation->xor1[i].valid = 1;
1658
pEquation->xor1[i].index = Log2(x) + elemLog2;
1659
}
1660
else
1661
{
1662
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1663
pEquation->xor2[i].channel = 0;
1664
pEquation->xor2[i].valid = 1;
1665
pEquation->xor2[i].index = Log2(x) + elemLog2;
1666
}
1667
}
1668
1669
if (y != 0)
1670
{
1671
ADDR_ASSERT(IsPow2(y));
1672
1673
if (pEquation->xor1[i].value == 0)
1674
{
1675
pEquation->xor1[i].channel = 1;
1676
pEquation->xor1[i].valid = 1;
1677
pEquation->xor1[i].index = Log2(y);
1678
}
1679
else
1680
{
1681
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1682
pEquation->xor2[i].channel = 1;
1683
pEquation->xor2[i].valid = 1;
1684
pEquation->xor2[i].index = Log2(y);
1685
}
1686
}
1687
1688
swizzle[i].x &= ~x;
1689
swizzle[i].y &= ~y;
1690
}
1691
}
1692
}
1693
}
1694
1695
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask));
1696
}
1697
else
1698
{
1699
const UINT_32 blkXLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].w : Block64K_Log2_3d[elemLog2].w;
1700
const UINT_32 blkYLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].h : Block64K_Log2_3d[elemLog2].h;
1701
const UINT_32 blkZLog2 = (blockSizeLog2 == 12) ? Block4K_Log2_3d[elemLog2].d : Block64K_Log2_3d[elemLog2].d;
1702
const UINT_32 blkXMask = (1 << blkXLog2) - 1;
1703
const UINT_32 blkYMask = (1 << blkYLog2) - 1;
1704
const UINT_32 blkZMask = (1 << blkZLog2) - 1;
1705
1706
ADDR_BIT_SETTING swizzle[ADDR_MAX_EQUATION_BIT] = {};
1707
UINT_32 xMask = 0;
1708
UINT_32 yMask = 0;
1709
UINT_32 zMask = 0;
1710
UINT_32 bMask = (1 << elemLog2) - 1;
1711
1712
for (UINT_32 i = elemLog2; i < blockSizeLog2; i++)
1713
{
1714
if (IsPow2(pSwizzle[i].value))
1715
{
1716
if (pSwizzle[i].x != 0)
1717
{
1718
ADDR_ASSERT((xMask & pSwizzle[i].x) == 0);
1719
xMask |= pSwizzle[i].x;
1720
1721
const UINT_32 xLog2 = Log2(pSwizzle[i].x);
1722
1723
ADDR_ASSERT(xLog2 < blkXLog2);
1724
1725
pEquation->addr[i].channel = 0;
1726
pEquation->addr[i].valid = 1;
1727
pEquation->addr[i].index = xLog2 + elemLog2;
1728
}
1729
else if (pSwizzle[i].y != 0)
1730
{
1731
ADDR_ASSERT((yMask & pSwizzle[i].y) == 0);
1732
yMask |= pSwizzle[i].y;
1733
1734
pEquation->addr[i].channel = 1;
1735
pEquation->addr[i].valid = 1;
1736
pEquation->addr[i].index = Log2(pSwizzle[i].y);
1737
1738
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1739
}
1740
else
1741
{
1742
ADDR_ASSERT(pSwizzle[i].z != 0);
1743
ADDR_ASSERT((zMask & pSwizzle[i].z) == 0);
1744
zMask |= pSwizzle[i].z;
1745
1746
pEquation->addr[i].channel = 2;
1747
pEquation->addr[i].valid = 1;
1748
pEquation->addr[i].index = Log2(pSwizzle[i].z);
1749
1750
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1751
}
1752
1753
swizzle[i].value = 0;
1754
bMask |= 1 << i;
1755
}
1756
else
1757
{
1758
swizzle[i].x = pSwizzle[i].x;
1759
swizzle[i].y = pSwizzle[i].y;
1760
swizzle[i].z = pSwizzle[i].z;
1761
swizzle[i].s = 0;
1762
1763
ADDR_ASSERT(IsPow2(swizzle[i].value) == FALSE);
1764
1765
const UINT_32 xHi = swizzle[i].x & (~blkXMask);
1766
const UINT_32 yHi = swizzle[i].y & (~blkYMask);
1767
const UINT_32 zHi = swizzle[i].z & (~blkZMask);
1768
1769
ADDR_ASSERT((xHi == 0) || (yHi== 0) || (zHi == 0));
1770
1771
if (xHi != 0)
1772
{
1773
ADDR_ASSERT(IsPow2(xHi));
1774
ADDR_ASSERT(pEquation->xor1[i].value == 0);
1775
1776
pEquation->xor1[i].channel = 0;
1777
pEquation->xor1[i].valid = 1;
1778
pEquation->xor1[i].index = Log2(xHi) + elemLog2;
1779
1780
swizzle[i].x &= blkXMask;
1781
}
1782
1783
if (yHi != 0)
1784
{
1785
ADDR_ASSERT(IsPow2(yHi));
1786
1787
if (pEquation->xor1[i].value == 0)
1788
{
1789
pEquation->xor1[i].channel = 1;
1790
pEquation->xor1[i].valid = 1;
1791
pEquation->xor1[i].index = Log2(yHi);
1792
}
1793
else
1794
{
1795
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1796
pEquation->xor2[i].channel = 1;
1797
pEquation->xor2[i].valid = 1;
1798
pEquation->xor2[i].index = Log2(yHi);
1799
}
1800
1801
swizzle[i].y &= blkYMask;
1802
}
1803
1804
if (zHi != 0)
1805
{
1806
ADDR_ASSERT(IsPow2(zHi));
1807
1808
if (pEquation->xor1[i].value == 0)
1809
{
1810
pEquation->xor1[i].channel = 2;
1811
pEquation->xor1[i].valid = 1;
1812
pEquation->xor1[i].index = Log2(zHi);
1813
}
1814
else
1815
{
1816
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1817
pEquation->xor2[i].channel = 2;
1818
pEquation->xor2[i].valid = 1;
1819
pEquation->xor2[i].index = Log2(zHi);
1820
}
1821
1822
swizzle[i].z &= blkZMask;
1823
}
1824
1825
if (swizzle[i].value == 0)
1826
{
1827
bMask |= 1 << i;
1828
}
1829
}
1830
}
1831
1832
const UINT_32 pipeIntMask = (1 << m_pipeInterleaveLog2) - 1;
1833
const UINT_32 blockMask = (1 << blockSizeLog2) - 1;
1834
1835
ADDR_ASSERT((bMask & pipeIntMask) == pipeIntMask);
1836
1837
while (bMask != blockMask)
1838
{
1839
for (UINT_32 i = m_pipeInterleaveLog2; i < blockSizeLog2; i++)
1840
{
1841
if ((bMask & (1 << i)) == 0)
1842
{
1843
if (IsPow2(swizzle[i].value))
1844
{
1845
if (swizzle[i].x != 0)
1846
{
1847
ADDR_ASSERT((xMask & swizzle[i].x) == 0);
1848
xMask |= swizzle[i].x;
1849
1850
const UINT_32 xLog2 = Log2(swizzle[i].x);
1851
1852
ADDR_ASSERT(xLog2 < blkXLog2);
1853
1854
pEquation->addr[i].channel = 0;
1855
pEquation->addr[i].valid = 1;
1856
pEquation->addr[i].index = xLog2 + elemLog2;
1857
}
1858
else if (swizzle[i].y != 0)
1859
{
1860
ADDR_ASSERT((yMask & swizzle[i].y) == 0);
1861
yMask |= swizzle[i].y;
1862
1863
pEquation->addr[i].channel = 1;
1864
pEquation->addr[i].valid = 1;
1865
pEquation->addr[i].index = Log2(swizzle[i].y);
1866
1867
ADDR_ASSERT(pEquation->addr[i].index < blkYLog2);
1868
}
1869
else
1870
{
1871
ADDR_ASSERT(swizzle[i].z != 0);
1872
ADDR_ASSERT((zMask & swizzle[i].z) == 0);
1873
zMask |= swizzle[i].z;
1874
1875
pEquation->addr[i].channel = 2;
1876
pEquation->addr[i].valid = 1;
1877
pEquation->addr[i].index = Log2(swizzle[i].z);
1878
1879
ADDR_ASSERT(pEquation->addr[i].index < blkZLog2);
1880
}
1881
1882
swizzle[i].value = 0;
1883
bMask |= 1 << i;
1884
}
1885
else
1886
{
1887
const UINT_32 x = swizzle[i].x & xMask;
1888
const UINT_32 y = swizzle[i].y & yMask;
1889
const UINT_32 z = swizzle[i].z & zMask;
1890
1891
if (x != 0)
1892
{
1893
ADDR_ASSERT(IsPow2(x));
1894
1895
if (pEquation->xor1[i].value == 0)
1896
{
1897
pEquation->xor1[i].channel = 0;
1898
pEquation->xor1[i].valid = 1;
1899
pEquation->xor1[i].index = Log2(x) + elemLog2;
1900
}
1901
else
1902
{
1903
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1904
pEquation->xor2[i].channel = 0;
1905
pEquation->xor2[i].valid = 1;
1906
pEquation->xor2[i].index = Log2(x) + elemLog2;
1907
}
1908
}
1909
1910
if (y != 0)
1911
{
1912
ADDR_ASSERT(IsPow2(y));
1913
1914
if (pEquation->xor1[i].value == 0)
1915
{
1916
pEquation->xor1[i].channel = 1;
1917
pEquation->xor1[i].valid = 1;
1918
pEquation->xor1[i].index = Log2(y);
1919
}
1920
else
1921
{
1922
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1923
pEquation->xor2[i].channel = 1;
1924
pEquation->xor2[i].valid = 1;
1925
pEquation->xor2[i].index = Log2(y);
1926
}
1927
}
1928
1929
if (z != 0)
1930
{
1931
ADDR_ASSERT(IsPow2(z));
1932
1933
if (pEquation->xor1[i].value == 0)
1934
{
1935
pEquation->xor1[i].channel = 2;
1936
pEquation->xor1[i].valid = 1;
1937
pEquation->xor1[i].index = Log2(z);
1938
}
1939
else
1940
{
1941
ADDR_ASSERT(pEquation->xor2[i].value == 0);
1942
pEquation->xor2[i].channel = 2;
1943
pEquation->xor2[i].valid = 1;
1944
pEquation->xor2[i].index = Log2(z);
1945
}
1946
}
1947
1948
swizzle[i].x &= ~x;
1949
swizzle[i].y &= ~y;
1950
swizzle[i].z &= ~z;
1951
}
1952
}
1953
}
1954
}
1955
1956
ADDR_ASSERT((xMask == blkXMask) && (yMask == blkYMask) && (zMask == blkZMask));
1957
}
1958
}
1959
1960
/**
1961
************************************************************************************************************************
1962
* Gfx10Lib::InitEquationTable
1963
*
1964
* @brief
1965
* Initialize Equation table.
1966
*
1967
* @return
1968
* N/A
1969
************************************************************************************************************************
1970
*/
1971
VOID Gfx10Lib::InitEquationTable()
1972
{
1973
memset(m_equationTable, 0, sizeof(m_equationTable));
1974
1975
for (UINT_32 rsrcTypeIdx = 0; rsrcTypeIdx < MaxRsrcType; rsrcTypeIdx++)
1976
{
1977
const AddrResourceType rsrcType = static_cast<AddrResourceType>(rsrcTypeIdx + ADDR_RSRC_TEX_2D);
1978
1979
for (UINT_32 swModeIdx = 0; swModeIdx < MaxSwModeType; swModeIdx++)
1980
{
1981
const AddrSwizzleMode swMode = static_cast<AddrSwizzleMode>(swModeIdx);
1982
1983
for (UINT_32 elemLog2 = 0; elemLog2 < MaxElementBytesLog2; elemLog2++)
1984
{
1985
UINT_32 equationIndex = ADDR_INVALID_EQUATION_INDEX;
1986
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(swMode, rsrcType, elemLog2, 1);
1987
1988
if (pPatInfo != NULL)
1989
{
1990
ADDR_ASSERT(IsValidSwMode(swMode));
1991
1992
if (pPatInfo->maxItemCount <= 3)
1993
{
1994
ADDR_EQUATION equation = {};
1995
1996
ConvertSwizzlePatternToEquation(elemLog2, rsrcType, swMode, pPatInfo, &equation);
1997
1998
equationIndex = m_numEquations;
1999
ADDR_ASSERT(equationIndex < EquationTableSize);
2000
2001
m_equationTable[equationIndex] = equation;
2002
2003
m_numEquations++;
2004
}
2005
else
2006
{
2007
// We only see "ill" equation from 64/128 BPE + 3D resource + SW_64KB_D_X under RB+ case
2008
ADDR_ASSERT((elemLog2 == 3) || (elemLog2 == 4));
2009
ADDR_ASSERT(rsrcTypeIdx == 1);
2010
ADDR_ASSERT(swMode == ADDR_SW_64KB_D_X);
2011
ADDR_ASSERT(m_settings.supportRbPlus == 1);
2012
}
2013
}
2014
2015
m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2] = equationIndex;
2016
}
2017
}
2018
}
2019
}
2020
2021
/**
2022
************************************************************************************************************************
2023
* Gfx10Lib::HwlGetEquationIndex
2024
*
2025
* @brief
2026
* Interface function stub of GetEquationIndex
2027
*
2028
* @return
2029
* ADDR_E_RETURNCODE
2030
************************************************************************************************************************
2031
*/
2032
UINT_32 Gfx10Lib::HwlGetEquationIndex(
2033
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
2034
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
2035
) const
2036
{
2037
UINT_32 equationIdx = ADDR_INVALID_EQUATION_INDEX;
2038
2039
if ((pIn->resourceType == ADDR_RSRC_TEX_2D) ||
2040
(pIn->resourceType == ADDR_RSRC_TEX_3D))
2041
{
2042
const UINT_32 rsrcTypeIdx = static_cast<UINT_32>(pIn->resourceType) - 1;
2043
const UINT_32 swModeIdx = static_cast<UINT_32>(pIn->swizzleMode);
2044
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
2045
2046
equationIdx = m_equationLookupTable[rsrcTypeIdx][swModeIdx][elemLog2];
2047
}
2048
2049
if (pOut->pMipInfo != NULL)
2050
{
2051
for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
2052
{
2053
pOut->pMipInfo[i].equationIndex = equationIdx;
2054
}
2055
}
2056
2057
return equationIdx;
2058
}
2059
2060
/**
2061
************************************************************************************************************************
2062
* Gfx10Lib::GetValidDisplaySwizzleModes
2063
*
2064
* @brief
2065
* Get valid swizzle modes mask for displayable surface
2066
*
2067
* @return
2068
* Valid swizzle modes mask for displayable surface
2069
************************************************************************************************************************
2070
*/
2071
UINT_32 Gfx10Lib::GetValidDisplaySwizzleModes(
2072
UINT_32 bpp
2073
) const
2074
{
2075
UINT_32 swModeMask = 0;
2076
2077
if (bpp <= 64)
2078
{
2079
if (m_settings.isDcn20)
2080
{
2081
swModeMask = (bpp == 64) ? Dcn20Bpp64SwModeMask : Dcn20NonBpp64SwModeMask;
2082
}
2083
else
2084
{
2085
swModeMask = (bpp == 64) ? Dcn21Bpp64SwModeMask : Dcn21NonBpp64SwModeMask;
2086
}
2087
}
2088
2089
return swModeMask;
2090
}
2091
2092
/**
2093
************************************************************************************************************************
2094
* Gfx10Lib::IsValidDisplaySwizzleMode
2095
*
2096
* @brief
2097
* Check if a swizzle mode is supported by display engine
2098
*
2099
* @return
2100
* TRUE is swizzle mode is supported by display engine
2101
************************************************************************************************************************
2102
*/
2103
BOOL_32 Gfx10Lib::IsValidDisplaySwizzleMode(
2104
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2105
) const
2106
{
2107
ADDR_ASSERT(pIn->resourceType == ADDR_RSRC_TEX_2D);
2108
2109
return (GetValidDisplaySwizzleModes(pIn->bpp) & (1 << pIn->swizzleMode)) ? TRUE : FALSE;
2110
}
2111
2112
/**
2113
************************************************************************************************************************
2114
* Gfx10Lib::GetMaxNumMipsInTail
2115
*
2116
* @brief
2117
* Return max number of mips in tails
2118
*
2119
* @return
2120
* Max number of mips in tails
2121
************************************************************************************************************************
2122
*/
2123
UINT_32 Gfx10Lib::GetMaxNumMipsInTail(
2124
UINT_32 blockSizeLog2, ///< block size log2
2125
BOOL_32 isThin ///< is thin or thick
2126
) const
2127
{
2128
UINT_32 effectiveLog2 = blockSizeLog2;
2129
2130
if (isThin == FALSE)
2131
{
2132
effectiveLog2 -= (blockSizeLog2 - 8) / 3;
2133
}
2134
2135
return (effectiveLog2 <= 11) ? (1 + (1 << (effectiveLog2 - 9))) : (effectiveLog2 - 4);
2136
}
2137
2138
/**
2139
************************************************************************************************************************
2140
* Gfx10Lib::HwlComputePipeBankXor
2141
*
2142
* @brief
2143
* Generate a PipeBankXor value to be ORed into bits above pipeInterleaveBits of address
2144
*
2145
* @return
2146
* PipeBankXor value
2147
************************************************************************************************************************
2148
*/
2149
ADDR_E_RETURNCODE Gfx10Lib::HwlComputePipeBankXor(
2150
const ADDR2_COMPUTE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2151
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2152
) const
2153
{
2154
if (IsNonPrtXor(pIn->swizzleMode))
2155
{
2156
const UINT_32 bankBits = GetBankXorBits(GetBlockSizeLog2(pIn->swizzleMode));
2157
2158
// No pipe xor...
2159
const UINT_32 pipeXor = 0;
2160
UINT_32 bankXor = 0;
2161
2162
const UINT_32 XorPatternLen = 8;
2163
static const UINT_32 XorBankRot1b[XorPatternLen] = {0, 1, 0, 1, 0, 1, 0, 1};
2164
static const UINT_32 XorBankRot2b[XorPatternLen] = {0, 2, 1, 3, 2, 0, 3, 1};
2165
static const UINT_32 XorBankRot3b[XorPatternLen] = {0, 4, 2, 6, 1, 5, 3, 7};
2166
static const UINT_32 XorBankRot4b[XorPatternLen] = {0, 8, 4, 12, 2, 10, 6, 14};
2167
static const UINT_32* XorBankRotPat[] = {XorBankRot1b, XorBankRot2b, XorBankRot3b, XorBankRot4b};
2168
2169
switch (bankBits)
2170
{
2171
case 1:
2172
case 2:
2173
case 3:
2174
case 4:
2175
bankXor = XorBankRotPat[bankBits - 1][pIn->surfIndex % XorPatternLen] << (m_pipesLog2 + ColumnBits);
2176
break;
2177
default:
2178
// valid bank bits should be 0~4
2179
ADDR_ASSERT_ALWAYS();
2180
case 0:
2181
break;
2182
}
2183
2184
pOut->pipeBankXor = bankXor | pipeXor;
2185
}
2186
else
2187
{
2188
pOut->pipeBankXor = 0;
2189
}
2190
2191
return ADDR_OK;
2192
}
2193
2194
/**
2195
************************************************************************************************************************
2196
* Gfx10Lib::HwlComputeSlicePipeBankXor
2197
*
2198
* @brief
2199
* Generate slice PipeBankXor value based on base PipeBankXor value and slice id
2200
*
2201
* @return
2202
* PipeBankXor value
2203
************************************************************************************************************************
2204
*/
2205
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSlicePipeBankXor(
2206
const ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT* pIn, ///< [in] input structure
2207
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT* pOut ///< [out] output structure
2208
) const
2209
{
2210
if (IsNonPrtXor(pIn->swizzleMode))
2211
{
2212
const UINT_32 blockBits = GetBlockSizeLog2(pIn->swizzleMode);
2213
const UINT_32 pipeBits = GetPipeXorBits(blockBits);
2214
const UINT_32 pipeXor = ReverseBitVector(pIn->slice, pipeBits);
2215
2216
pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeXor;
2217
2218
if (pIn->bpe != 0)
2219
{
2220
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
2221
pIn->resourceType,
2222
Log2(pIn->bpe >> 3),
2223
1);
2224
2225
if (pPatInfo != NULL)
2226
{
2227
ADDR_BIT_SETTING fullSwizzlePattern[20];
2228
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
2229
2230
const UINT_32 pipeBankXorOffset =
2231
ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
2232
blockBits,
2233
0,
2234
0,
2235
pIn->slice,
2236
0);
2237
2238
const UINT_32 pipeBankXor = pipeBankXorOffset >> m_pipeInterleaveLog2;
2239
2240
// Should have no bit set under pipe interleave
2241
ADDR_ASSERT((pipeBankXor << m_pipeInterleaveLog2) == pipeBankXorOffset);
2242
2243
// This assertion firing means old approach doesn't calculate a correct sliceXor value...
2244
ADDR_ASSERT(pipeBankXor == pipeXor);
2245
2246
pOut->pipeBankXor = pIn->basePipeBankXor ^ pipeBankXor;
2247
}
2248
}
2249
}
2250
else
2251
{
2252
pOut->pipeBankXor = 0;
2253
}
2254
2255
return ADDR_OK;
2256
}
2257
2258
/**
2259
************************************************************************************************************************
2260
* Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern
2261
*
2262
* @brief
2263
* Compute sub resource offset to support swizzle pattern
2264
*
2265
* @return
2266
* Offset
2267
************************************************************************************************************************
2268
*/
2269
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSubResourceOffsetForSwizzlePattern(
2270
const ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT* pIn, ///< [in] input structure
2271
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT* pOut ///< [out] output structure
2272
) const
2273
{
2274
ADDR_ASSERT(IsThin(pIn->resourceType, pIn->swizzleMode));
2275
2276
pOut->offset = pIn->slice * pIn->sliceSize + pIn->macroBlockOffset;
2277
2278
return ADDR_OK;
2279
}
2280
2281
/**
2282
************************************************************************************************************************
2283
* Gfx10Lib::HwlComputeNonBlockCompressedView
2284
*
2285
* @brief
2286
* Compute non-block-compressed view for a given mipmap level/slice.
2287
*
2288
* @return
2289
* ADDR_E_RETURNCODE
2290
************************************************************************************************************************
2291
*/
2292
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeNonBlockCompressedView(
2293
const ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_INPUT* pIn, ///< [in] input structure
2294
ADDR2_COMPUTE_NONBLOCKCOMPRESSEDVIEW_OUTPUT* pOut ///< [out] output structure
2295
) const
2296
{
2297
ADDR_E_RETURNCODE returnCode = ADDR_OK;
2298
2299
if (pIn->resourceType != ADDR_RSRC_TEX_2D)
2300
{
2301
// Only 2D resource can have a NonBC view...
2302
returnCode = ADDR_INVALIDPARAMS;
2303
}
2304
else if ((pIn->format != ADDR_FMT_ASTC_8x8) &&
2305
((pIn->format < ADDR_FMT_BC1) || (pIn->format > ADDR_FMT_BC7)))
2306
{
2307
// Only support BC1~BC7 or ASTC_8x8 for now...
2308
returnCode = ADDR_NOTSUPPORTED;
2309
}
2310
else
2311
{
2312
UINT_32 bcWidth, bcHeight;
2313
UINT_32 bpp = GetElemLib()->GetBitsPerPixel(pIn->format, NULL, &bcWidth, &bcHeight);
2314
2315
ADDR2_COMPUTE_SURFACE_INFO_INPUT infoIn = {};
2316
infoIn.flags = pIn->flags;
2317
infoIn.swizzleMode = pIn->swizzleMode;
2318
infoIn.resourceType = pIn->resourceType;
2319
infoIn.bpp = bpp;
2320
infoIn.width = PowTwoAlign(pIn->width, bcWidth) / bcWidth;
2321
infoIn.height = PowTwoAlign(pIn->height, bcHeight) / bcHeight;
2322
infoIn.numSlices = pIn->numSlices;
2323
infoIn.numMipLevels = pIn->numMipLevels;
2324
infoIn.numSamples = 1;
2325
infoIn.numFrags = 1;
2326
2327
ADDR2_MIP_INFO mipInfo[MaxMipLevels] = {};
2328
2329
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT infoOut = {};
2330
infoOut.pMipInfo = mipInfo;
2331
2332
const BOOL_32 tiled = (pIn->swizzleMode != ADDR_SW_LINEAR) ? TRUE : FALSE;
2333
2334
if (tiled)
2335
{
2336
returnCode = HwlComputeSurfaceInfoTiled(&infoIn, &infoOut);
2337
}
2338
else
2339
{
2340
returnCode = HwlComputeSurfaceInfoLinear(&infoIn, &infoOut);
2341
}
2342
2343
if (returnCode == ADDR_OK)
2344
{
2345
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_INPUT subOffIn = {};
2346
subOffIn.swizzleMode = infoIn.swizzleMode;
2347
subOffIn.resourceType = infoIn.resourceType;
2348
subOffIn.slice = pIn->slice;
2349
subOffIn.sliceSize = infoOut.sliceSize;
2350
subOffIn.macroBlockOffset = mipInfo[pIn->mipId].macroBlockOffset;
2351
subOffIn.mipTailOffset = mipInfo[pIn->mipId].mipTailOffset;
2352
2353
ADDR2_COMPUTE_SUBRESOURCE_OFFSET_FORSWIZZLEPATTERN_OUTPUT subOffOut = {};
2354
2355
// For any mipmap level, move nonBc view base address by offset
2356
HwlComputeSubResourceOffsetForSwizzlePattern(&subOffIn, &subOffOut);
2357
pOut->offset = subOffOut.offset;
2358
2359
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_INPUT slicePbXorIn = {};
2360
slicePbXorIn.bpe = infoIn.bpp;
2361
slicePbXorIn.swizzleMode = infoIn.swizzleMode;
2362
slicePbXorIn.resourceType = infoIn.resourceType;
2363
slicePbXorIn.basePipeBankXor = pIn->pipeBankXor;
2364
slicePbXorIn.slice = pIn->slice;
2365
2366
ADDR2_COMPUTE_SLICE_PIPEBANKXOR_OUTPUT slicePbXorOut = {};
2367
2368
// For any mipmap level, nonBc view should use computed pbXor
2369
HwlComputeSlicePipeBankXor(&slicePbXorIn, &slicePbXorOut);
2370
pOut->pipeBankXor = slicePbXorOut.pipeBankXor;
2371
2372
const BOOL_32 inTail = tiled && (pIn->mipId >= infoOut.firstMipIdInTail) ? TRUE : FALSE;
2373
const UINT_32 requestMipWidth = PowTwoAlign(Max(pIn->width >> pIn->mipId, 1u), bcWidth) / bcWidth;
2374
const UINT_32 requestMipHeight = PowTwoAlign(Max(pIn->height >> pIn->mipId, 1u), bcHeight) / bcHeight;
2375
2376
if (inTail)
2377
{
2378
// For mipmap level that is in mip tail block, hack a lot of things...
2379
// Basically all mipmap levels in tail block will be viewed as a small mipmap chain that all levels
2380
// are fit in tail block:
2381
2382
// - mipId = relative mip id (which is counted from first mip ID in tail in original mip chain)
2383
pOut->mipId = pIn->mipId - infoOut.firstMipIdInTail;
2384
2385
// - at least 2 mipmap levels (since only 1 mipmap level will not be viewed as mipmap!)
2386
pOut->numMipLevels = Max(infoIn.numMipLevels - infoOut.firstMipIdInTail, 2u);
2387
2388
// - (mip0) width = requestMipWidth << mipId, the value can't exceed mip tail dimension threshold
2389
pOut->unalignedWidth = Min(requestMipWidth << pOut->mipId, infoOut.blockWidth / 2);
2390
2391
// - (mip0) height = requestMipHeight << mipId, the value can't exceed mip tail dimension threshold
2392
pOut->unalignedHeight = Min(requestMipHeight << pOut->mipId, infoOut.blockHeight);
2393
}
2394
// This check should cover at least mipId == 0
2395
else if (requestMipWidth << pIn->mipId == infoIn.width)
2396
{
2397
// For mipmap level [N] that is not in mip tail block and downgraded without losing element:
2398
// - only one mipmap level and mipId = 0
2399
pOut->mipId = 0;
2400
pOut->numMipLevels = 1;
2401
2402
// (mip0) width = requestMipWidth
2403
pOut->unalignedWidth = requestMipWidth;
2404
2405
// (mip0) height = requestMipHeight
2406
pOut->unalignedHeight = requestMipHeight;
2407
}
2408
else
2409
{
2410
// For mipmap level [N] that is not in mip tail block and downgraded with element losing,
2411
// We have to make it a multiple mipmap view (2 levels view here), add one extra element if needed,
2412
// because single mip view may have different pitch value than original (multiple) mip view...
2413
// A simple case would be:
2414
// - 64KB block swizzle mode, 8 Bytes-Per-Element. Block dim = [0x80, 0x40]
2415
// - 2 mipmap levels with API mip0 width = 0x401/mip1 width = 0x200 and non-BC view
2416
// mip0 width = 0x101/mip1 width = 0x80
2417
// By multiple mip view, the pitch for mip level 1 would be 0x100 bytes, due to rounding up logic in
2418
// GetMipSize(), and by single mip level view the pitch will only be 0x80 bytes.
2419
2420
// - 2 levels and mipId = 1
2421
pOut->mipId = 1;
2422
pOut->numMipLevels = 2;
2423
2424
const UINT_32 upperMipWidth =
2425
PowTwoAlign(Max(pIn->width >> (pIn->mipId - 1), 1u), bcWidth) / bcWidth;
2426
const UINT_32 upperMipHeight =
2427
PowTwoAlign(Max(pIn->height >> (pIn->mipId - 1), 1u), bcHeight) / bcHeight;
2428
2429
const BOOL_32 needToAvoidInTail =
2430
tiled && (requestMipWidth <= infoOut.blockWidth / 2) && (requestMipHeight <= infoOut.blockHeight) ?
2431
TRUE : FALSE;
2432
2433
const UINT_32 hwMipWidth = PowTwoAlign(ShiftCeil(infoIn.width, pIn->mipId), infoOut.blockWidth);
2434
const UINT_32 hwMipHeight = PowTwoAlign(ShiftCeil(infoIn.height, pIn->mipId), infoOut.blockHeight);
2435
2436
const BOOL_32 needExtraWidth =
2437
((upperMipWidth < requestMipWidth * 2) ||
2438
((upperMipWidth == requestMipWidth * 2) &&
2439
((needToAvoidInTail == TRUE) ||
2440
(hwMipWidth > PowTwoAlign(requestMipWidth, infoOut.blockWidth))))) ? TRUE : FALSE;
2441
2442
const BOOL_32 needExtraHeight =
2443
((upperMipHeight < requestMipHeight * 2) ||
2444
((upperMipHeight == requestMipHeight * 2) &&
2445
((needToAvoidInTail == TRUE) ||
2446
(hwMipHeight > PowTwoAlign(requestMipHeight, infoOut.blockHeight))))) ? TRUE : FALSE;
2447
2448
// (mip0) width = requestLastMipLevelWidth
2449
pOut->unalignedWidth = upperMipWidth + (needExtraWidth ? 1: 0);
2450
2451
// (mip0) height = requestLastMipLevelHeight
2452
pOut->unalignedHeight = upperMipHeight + (needExtraHeight ? 1: 0);
2453
}
2454
2455
// Assert the downgrading from this mip[0] width would still generate correct mip[N] width
2456
ADDR_ASSERT(ShiftRight(pOut->unalignedWidth, pOut->mipId) == requestMipWidth);
2457
// Assert the downgrading from this mip[0] height would still generate correct mip[N] height
2458
ADDR_ASSERT(ShiftRight(pOut->unalignedHeight, pOut->mipId) == requestMipHeight);
2459
}
2460
}
2461
2462
return returnCode;
2463
}
2464
2465
/**
2466
************************************************************************************************************************
2467
* Gfx10Lib::ValidateNonSwModeParams
2468
*
2469
* @brief
2470
* Validate compute surface info params except swizzle mode
2471
*
2472
* @return
2473
* TRUE if parameters are valid, FALSE otherwise
2474
************************************************************************************************************************
2475
*/
2476
BOOL_32 Gfx10Lib::ValidateNonSwModeParams(
2477
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2478
{
2479
BOOL_32 valid = TRUE;
2480
2481
if ((pIn->bpp == 0) || (pIn->bpp > 128) || (pIn->width == 0) || (pIn->numFrags > 8) || (pIn->numSamples > 16))
2482
{
2483
ADDR_ASSERT_ALWAYS();
2484
valid = FALSE;
2485
}
2486
2487
if (pIn->resourceType >= ADDR_RSRC_MAX_TYPE)
2488
{
2489
ADDR_ASSERT_ALWAYS();
2490
valid = FALSE;
2491
}
2492
2493
const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2494
const AddrResourceType rsrcType = pIn->resourceType;
2495
const BOOL_32 mipmap = (pIn->numMipLevels > 1);
2496
const BOOL_32 msaa = (pIn->numFrags > 1);
2497
const BOOL_32 display = flags.display;
2498
const BOOL_32 tex3d = IsTex3d(rsrcType);
2499
const BOOL_32 tex2d = IsTex2d(rsrcType);
2500
const BOOL_32 tex1d = IsTex1d(rsrcType);
2501
const BOOL_32 stereo = flags.qbStereo;
2502
2503
2504
// Resource type check
2505
if (tex1d)
2506
{
2507
if (msaa || display || stereo)
2508
{
2509
ADDR_ASSERT_ALWAYS();
2510
valid = FALSE;
2511
}
2512
}
2513
else if (tex2d)
2514
{
2515
if ((msaa && mipmap) || (stereo && msaa) || (stereo && mipmap))
2516
{
2517
ADDR_ASSERT_ALWAYS();
2518
valid = FALSE;
2519
}
2520
}
2521
else if (tex3d)
2522
{
2523
if (msaa || display || stereo)
2524
{
2525
ADDR_ASSERT_ALWAYS();
2526
valid = FALSE;
2527
}
2528
}
2529
else
2530
{
2531
ADDR_ASSERT_ALWAYS();
2532
valid = FALSE;
2533
}
2534
2535
return valid;
2536
}
2537
2538
/**
2539
************************************************************************************************************************
2540
* Gfx10Lib::ValidateSwModeParams
2541
*
2542
* @brief
2543
* Validate compute surface info related to swizzle mode
2544
*
2545
* @return
2546
* TRUE if parameters are valid, FALSE otherwise
2547
************************************************************************************************************************
2548
*/
2549
BOOL_32 Gfx10Lib::ValidateSwModeParams(
2550
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn) const
2551
{
2552
BOOL_32 valid = TRUE;
2553
2554
if (pIn->swizzleMode >= ADDR_SW_MAX_TYPE)
2555
{
2556
ADDR_ASSERT_ALWAYS();
2557
valid = FALSE;
2558
}
2559
else if (IsValidSwMode(pIn->swizzleMode) == FALSE)
2560
{
2561
{
2562
ADDR_ASSERT_ALWAYS();
2563
valid = FALSE;
2564
}
2565
}
2566
2567
const ADDR2_SURFACE_FLAGS flags = pIn->flags;
2568
const AddrResourceType rsrcType = pIn->resourceType;
2569
const AddrSwizzleMode swizzle = pIn->swizzleMode;
2570
const BOOL_32 msaa = (pIn->numFrags > 1);
2571
const BOOL_32 zbuffer = flags.depth || flags.stencil;
2572
const BOOL_32 color = flags.color;
2573
const BOOL_32 display = flags.display;
2574
const BOOL_32 tex3d = IsTex3d(rsrcType);
2575
const BOOL_32 tex2d = IsTex2d(rsrcType);
2576
const BOOL_32 tex1d = IsTex1d(rsrcType);
2577
const BOOL_32 thin3d = flags.view3dAs2dArray;
2578
const BOOL_32 linear = IsLinear(swizzle);
2579
const BOOL_32 blk256B = IsBlock256b(swizzle);
2580
const BOOL_32 blkVar = IsBlockVariable(swizzle);
2581
const BOOL_32 isNonPrtXor = IsNonPrtXor(swizzle);
2582
const BOOL_32 prt = flags.prt;
2583
const BOOL_32 fmask = flags.fmask;
2584
2585
// Misc check
2586
if ((pIn->numFrags > 1) &&
2587
(GetBlockSize(swizzle) < (m_pipeInterleaveBytes * pIn->numFrags)))
2588
{
2589
// MSAA surface must have blk_bytes/pipe_interleave >= num_samples
2590
ADDR_ASSERT_ALWAYS();
2591
valid = FALSE;
2592
}
2593
2594
if (display && (IsValidDisplaySwizzleMode(pIn) == FALSE))
2595
{
2596
ADDR_ASSERT_ALWAYS();
2597
valid = FALSE;
2598
}
2599
2600
if ((pIn->bpp == 96) && (linear == FALSE))
2601
{
2602
ADDR_ASSERT_ALWAYS();
2603
valid = FALSE;
2604
}
2605
2606
const UINT_32 swizzleMask = 1 << swizzle;
2607
2608
// Resource type check
2609
if (tex1d)
2610
{
2611
if ((swizzleMask & Gfx10Rsrc1dSwModeMask) == 0)
2612
{
2613
ADDR_ASSERT_ALWAYS();
2614
valid = FALSE;
2615
}
2616
}
2617
else if (tex2d)
2618
{
2619
if ((swizzleMask & Gfx10Rsrc2dSwModeMask) == 0)
2620
{
2621
{
2622
ADDR_ASSERT_ALWAYS();
2623
valid = FALSE;
2624
}
2625
}
2626
else if ((prt && ((swizzleMask & Gfx10Rsrc2dPrtSwModeMask) == 0)) ||
2627
(fmask && ((swizzleMask & Gfx10ZSwModeMask) == 0)))
2628
{
2629
ADDR_ASSERT_ALWAYS();
2630
valid = FALSE;
2631
}
2632
2633
}
2634
else if (tex3d)
2635
{
2636
if (((swizzleMask & Gfx10Rsrc3dSwModeMask) == 0) ||
2637
(prt && ((swizzleMask & Gfx10Rsrc3dPrtSwModeMask) == 0)) ||
2638
(thin3d && ((swizzleMask & Gfx10Rsrc3dThinSwModeMask) == 0)))
2639
{
2640
ADDR_ASSERT_ALWAYS();
2641
valid = FALSE;
2642
}
2643
}
2644
2645
// Swizzle type check
2646
if (linear)
2647
{
2648
if (zbuffer || msaa || (pIn->bpp == 0) || ((pIn->bpp % 8) != 0))
2649
{
2650
ADDR_ASSERT_ALWAYS();
2651
valid = FALSE;
2652
}
2653
}
2654
else if (IsZOrderSwizzle(swizzle))
2655
{
2656
if ((pIn->bpp > 64) ||
2657
(msaa && (color || (pIn->bpp > 32))) ||
2658
ElemLib::IsBlockCompressed(pIn->format) ||
2659
ElemLib::IsMacroPixelPacked(pIn->format))
2660
{
2661
ADDR_ASSERT_ALWAYS();
2662
valid = FALSE;
2663
}
2664
}
2665
else if (IsStandardSwizzle(rsrcType, swizzle))
2666
{
2667
if (zbuffer || msaa)
2668
{
2669
ADDR_ASSERT_ALWAYS();
2670
valid = FALSE;
2671
}
2672
}
2673
else if (IsDisplaySwizzle(rsrcType, swizzle))
2674
{
2675
if (zbuffer || msaa)
2676
{
2677
ADDR_ASSERT_ALWAYS();
2678
valid = FALSE;
2679
}
2680
}
2681
else if (IsRtOptSwizzle(swizzle))
2682
{
2683
if (zbuffer)
2684
{
2685
ADDR_ASSERT_ALWAYS();
2686
valid = FALSE;
2687
}
2688
}
2689
else
2690
{
2691
{
2692
ADDR_ASSERT_ALWAYS();
2693
valid = FALSE;
2694
}
2695
}
2696
2697
// Block type check
2698
if (blk256B)
2699
{
2700
if (zbuffer || tex3d || msaa)
2701
{
2702
ADDR_ASSERT_ALWAYS();
2703
valid = FALSE;
2704
}
2705
}
2706
else if (blkVar)
2707
{
2708
if (m_blockVarSizeLog2 == 0)
2709
{
2710
ADDR_ASSERT_ALWAYS();
2711
valid = FALSE;
2712
}
2713
}
2714
2715
return valid;
2716
}
2717
2718
/**
2719
************************************************************************************************************************
2720
* Gfx10Lib::HwlComputeSurfaceInfoSanityCheck
2721
*
2722
* @brief
2723
* Compute surface info sanity check
2724
*
2725
* @return
2726
* Offset
2727
************************************************************************************************************************
2728
*/
2729
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoSanityCheck(
2730
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn ///< [in] input structure
2731
) const
2732
{
2733
return ValidateNonSwModeParams(pIn) && ValidateSwModeParams(pIn) ? ADDR_OK : ADDR_INVALIDPARAMS;
2734
}
2735
2736
/**
2737
************************************************************************************************************************
2738
* Gfx10Lib::HwlGetPreferredSurfaceSetting
2739
*
2740
* @brief
2741
* Internal function to get suggested surface information for cliet to use
2742
*
2743
* @return
2744
* ADDR_E_RETURNCODE
2745
************************************************************************************************************************
2746
*/
2747
ADDR_E_RETURNCODE Gfx10Lib::HwlGetPreferredSurfaceSetting(
2748
const ADDR2_GET_PREFERRED_SURF_SETTING_INPUT* pIn, ///< [in] input structure
2749
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT* pOut ///< [out] output structure
2750
) const
2751
{
2752
ADDR_E_RETURNCODE returnCode = ADDR_OK;
2753
2754
if (pIn->flags.fmask)
2755
{
2756
const BOOL_32 forbid64KbBlockType = pIn->forbiddenBlock.macroThin64KB ? TRUE : FALSE;
2757
const BOOL_32 forbidVarBlockType = ((m_blockVarSizeLog2 == 0) || (pIn->forbiddenBlock.var != 0));
2758
2759
if (forbid64KbBlockType && forbidVarBlockType)
2760
{
2761
// Invalid combination...
2762
ADDR_ASSERT_ALWAYS();
2763
returnCode = ADDR_INVALIDPARAMS;
2764
}
2765
else
2766
{
2767
pOut->resourceType = ADDR_RSRC_TEX_2D;
2768
pOut->validBlockSet.value = 0;
2769
pOut->validBlockSet.macroThin64KB = forbid64KbBlockType ? 0 : 1;
2770
pOut->validBlockSet.var = forbidVarBlockType ? 0 : 1;
2771
pOut->validSwModeSet.value = 0;
2772
pOut->validSwModeSet.sw64KB_Z_X = forbid64KbBlockType ? 0 : 1;
2773
pOut->validSwModeSet.gfx10.swVar_Z_X = forbidVarBlockType ? 0 : 1;
2774
pOut->canXor = TRUE;
2775
pOut->validSwTypeSet.value = AddrSwSetZ;
2776
pOut->clientPreferredSwSet = pOut->validSwTypeSet;
2777
2778
BOOL_32 use64KbBlockType = (forbid64KbBlockType == FALSE);
2779
2780
if ((forbid64KbBlockType == FALSE) && (forbidVarBlockType == FALSE))
2781
{
2782
const UINT_8 maxFmaskSwizzleModeType = 2;
2783
const UINT_32 ratioLow = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 3 : 2);
2784
const UINT_32 ratioHi = pIn->flags.minimizeAlign ? 1 : (pIn->flags.opt4space ? 2 : 1);
2785
const UINT_32 fmaskBpp = GetFmaskBpp(pIn->numSamples, pIn->numFrags);
2786
const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2787
const UINT_32 width = Max(pIn->width, 1u);
2788
const UINT_32 height = Max(pIn->height, 1u);
2789
const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (fmaskBpp >> 3), 1u);
2790
2791
AddrSwizzleMode swMode[maxFmaskSwizzleModeType] = {ADDR_SW_64KB_Z_X, ADDR_SW_VAR_Z_X};
2792
Dim3d blkDim[maxFmaskSwizzleModeType] = {{}, {}};
2793
Dim3d padDim[maxFmaskSwizzleModeType] = {{}, {}};
2794
UINT_64 padSize[maxFmaskSwizzleModeType] = {};
2795
2796
for (UINT_8 i = 0; i < maxFmaskSwizzleModeType; i++)
2797
{
2798
ComputeBlockDimensionForSurf(&blkDim[i].w,
2799
&blkDim[i].h,
2800
&blkDim[i].d,
2801
fmaskBpp,
2802
1,
2803
pOut->resourceType,
2804
swMode[i]);
2805
2806
padSize[i] = ComputePadSize(&blkDim[i], width, height, numSlices, &padDim[i]);
2807
padSize[i] = PowTwoAlign(padSize[i], sizeAlignInElement);
2808
}
2809
2810
if (BlockTypeWithinMemoryBudget(padSize[0],
2811
padSize[1],
2812
ratioLow,
2813
ratioHi,
2814
pIn->memoryBudget,
2815
GetBlockSizeLog2(swMode[1]) >= GetBlockSizeLog2(swMode[0])))
2816
{
2817
use64KbBlockType = FALSE;
2818
}
2819
}
2820
else if (forbidVarBlockType)
2821
{
2822
use64KbBlockType = TRUE;
2823
}
2824
2825
if (use64KbBlockType)
2826
{
2827
pOut->swizzleMode = ADDR_SW_64KB_Z_X;
2828
}
2829
else
2830
{
2831
pOut->swizzleMode = ADDR_SW_VAR_Z_X;
2832
}
2833
}
2834
}
2835
else
2836
{
2837
UINT_32 bpp = pIn->bpp;
2838
UINT_32 width = Max(pIn->width, 1u);
2839
UINT_32 height = Max(pIn->height, 1u);
2840
2841
// Set format to INVALID will skip this conversion
2842
if (pIn->format != ADDR_FMT_INVALID)
2843
{
2844
ElemMode elemMode = ADDR_UNCOMPRESSED;
2845
UINT_32 expandX, expandY;
2846
2847
// Get compression/expansion factors and element mode which indicates compression/expansion
2848
bpp = GetElemLib()->GetBitsPerPixel(pIn->format,
2849
&elemMode,
2850
&expandX,
2851
&expandY);
2852
2853
UINT_32 basePitch = 0;
2854
GetElemLib()->AdjustSurfaceInfo(elemMode,
2855
expandX,
2856
expandY,
2857
&bpp,
2858
&basePitch,
2859
&width,
2860
&height);
2861
}
2862
2863
const UINT_32 numSlices = Max(pIn->numSlices, 1u);
2864
const UINT_32 numMipLevels = Max(pIn->numMipLevels, 1u);
2865
const UINT_32 numSamples = Max(pIn->numSamples, 1u);
2866
const UINT_32 numFrags = (pIn->numFrags == 0) ? numSamples : pIn->numFrags;
2867
const BOOL_32 msaa = (numFrags > 1) || (numSamples > 1);
2868
2869
// Pre sanity check on non swizzle mode parameters
2870
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
2871
localIn.flags = pIn->flags;
2872
localIn.resourceType = pIn->resourceType;
2873
localIn.format = pIn->format;
2874
localIn.bpp = bpp;
2875
localIn.width = width;
2876
localIn.height = height;
2877
localIn.numSlices = numSlices;
2878
localIn.numMipLevels = numMipLevels;
2879
localIn.numSamples = numSamples;
2880
localIn.numFrags = numFrags;
2881
2882
if (ValidateNonSwModeParams(&localIn))
2883
{
2884
// Forbid swizzle mode(s) by client setting
2885
ADDR2_SWMODE_SET allowedSwModeSet = {};
2886
allowedSwModeSet.value |= pIn->forbiddenBlock.linear ? 0 : Gfx10LinearSwModeMask;
2887
allowedSwModeSet.value |= pIn->forbiddenBlock.micro ? 0 : Gfx10Blk256BSwModeMask;
2888
allowedSwModeSet.value |=
2889
pIn->forbiddenBlock.macroThin4KB ? 0 :
2890
((pIn->resourceType == ADDR_RSRC_TEX_3D) ? 0 : Gfx10Blk4KBSwModeMask);
2891
allowedSwModeSet.value |=
2892
pIn->forbiddenBlock.macroThick4KB ? 0 :
2893
((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick4KBSwModeMask : 0);
2894
allowedSwModeSet.value |=
2895
pIn->forbiddenBlock.macroThin64KB ? 0 :
2896
((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask);
2897
allowedSwModeSet.value |=
2898
pIn->forbiddenBlock.macroThick64KB ? 0 :
2899
((pIn->resourceType == ADDR_RSRC_TEX_3D) ? Gfx10Rsrc3dThick64KBSwModeMask : 0);
2900
allowedSwModeSet.value |=
2901
pIn->forbiddenBlock.var ? 0 : (m_blockVarSizeLog2 ? Gfx10BlkVarSwModeMask : 0);
2902
2903
if (pIn->preferredSwSet.value != 0)
2904
{
2905
allowedSwModeSet.value &= pIn->preferredSwSet.sw_Z ? ~0 : ~Gfx10ZSwModeMask;
2906
allowedSwModeSet.value &= pIn->preferredSwSet.sw_S ? ~0 : ~Gfx10StandardSwModeMask;
2907
allowedSwModeSet.value &= pIn->preferredSwSet.sw_D ? ~0 : ~Gfx10DisplaySwModeMask;
2908
allowedSwModeSet.value &= pIn->preferredSwSet.sw_R ? ~0 : ~Gfx10RenderSwModeMask;
2909
}
2910
2911
if (pIn->noXor)
2912
{
2913
allowedSwModeSet.value &= ~Gfx10XorSwModeMask;
2914
}
2915
2916
if (pIn->maxAlign > 0)
2917
{
2918
if (pIn->maxAlign < (1u << m_blockVarSizeLog2))
2919
{
2920
allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
2921
}
2922
2923
if (pIn->maxAlign < Size64K)
2924
{
2925
allowedSwModeSet.value &= ~Gfx10Blk64KBSwModeMask;
2926
}
2927
2928
if (pIn->maxAlign < Size4K)
2929
{
2930
allowedSwModeSet.value &= ~Gfx10Blk4KBSwModeMask;
2931
}
2932
2933
if (pIn->maxAlign < Size256)
2934
{
2935
allowedSwModeSet.value &= ~Gfx10Blk256BSwModeMask;
2936
}
2937
}
2938
2939
// Filter out invalid swizzle mode(s) by image attributes and HW restrictions
2940
switch (pIn->resourceType)
2941
{
2942
case ADDR_RSRC_TEX_1D:
2943
allowedSwModeSet.value &= Gfx10Rsrc1dSwModeMask;
2944
break;
2945
2946
case ADDR_RSRC_TEX_2D:
2947
allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc2dPrtSwModeMask : Gfx10Rsrc2dSwModeMask;
2948
2949
break;
2950
2951
case ADDR_RSRC_TEX_3D:
2952
allowedSwModeSet.value &= pIn->flags.prt ? Gfx10Rsrc3dPrtSwModeMask : Gfx10Rsrc3dSwModeMask;
2953
2954
if (pIn->flags.view3dAs2dArray)
2955
{
2956
allowedSwModeSet.value &= Gfx10Rsrc3dThinSwModeMask;
2957
}
2958
break;
2959
2960
default:
2961
ADDR_ASSERT_ALWAYS();
2962
allowedSwModeSet.value = 0;
2963
break;
2964
}
2965
2966
if (ElemLib::IsBlockCompressed(pIn->format) ||
2967
ElemLib::IsMacroPixelPacked(pIn->format) ||
2968
(bpp > 64) ||
2969
(msaa && ((bpp > 32) || pIn->flags.color || pIn->flags.unordered)))
2970
{
2971
allowedSwModeSet.value &= ~Gfx10ZSwModeMask;
2972
}
2973
2974
if (pIn->format == ADDR_FMT_32_32_32)
2975
{
2976
allowedSwModeSet.value &= Gfx10LinearSwModeMask;
2977
}
2978
2979
if (msaa)
2980
{
2981
allowedSwModeSet.value &= Gfx10MsaaSwModeMask;
2982
}
2983
2984
if (pIn->flags.depth || pIn->flags.stencil)
2985
{
2986
allowedSwModeSet.value &= Gfx10ZSwModeMask;
2987
}
2988
2989
if (pIn->flags.display)
2990
{
2991
allowedSwModeSet.value &= GetValidDisplaySwizzleModes(bpp);
2992
}
2993
2994
if (allowedSwModeSet.value != 0)
2995
{
2996
#if DEBUG
2997
// Post sanity check, at least AddrLib should accept the output generated by its own
2998
UINT_32 validateSwModeSet = allowedSwModeSet.value;
2999
3000
for (UINT_32 i = 0; validateSwModeSet != 0; i++)
3001
{
3002
if (validateSwModeSet & 1)
3003
{
3004
localIn.swizzleMode = static_cast<AddrSwizzleMode>(i);
3005
ADDR_ASSERT(ValidateSwModeParams(&localIn));
3006
}
3007
3008
validateSwModeSet >>= 1;
3009
}
3010
#endif
3011
3012
pOut->resourceType = pIn->resourceType;
3013
pOut->validSwModeSet = allowedSwModeSet;
3014
pOut->canXor = (allowedSwModeSet.value & Gfx10XorSwModeMask) ? TRUE : FALSE;
3015
pOut->validBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3016
pOut->validSwTypeSet = GetAllowedSwSet(allowedSwModeSet);
3017
3018
pOut->clientPreferredSwSet = pIn->preferredSwSet;
3019
3020
if (pOut->clientPreferredSwSet.value == 0)
3021
{
3022
pOut->clientPreferredSwSet.value = AddrSwSetAll;
3023
}
3024
3025
// Apply optional restrictions
3026
if ((pIn->flags.depth || pIn->flags.stencil) && msaa && m_configFlags.nonPower2MemConfig)
3027
{
3028
if ((allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask) != 0)
3029
{
3030
// MSAA depth in non power of 2 memory configs would suffer from non-local channel accesses from
3031
// the GL2 in VAR mode, so it should be avoided.
3032
allowedSwModeSet.value &= ~Gfx10BlkVarSwModeMask;
3033
}
3034
else
3035
{
3036
// We should still be able to use VAR for non power of 2 memory configs with MSAA z/stencil.
3037
// But we have to suffer from low performance because there is no other choice...
3038
ADDR_ASSERT_ALWAYS();
3039
}
3040
}
3041
3042
if (pIn->flags.needEquation)
3043
{
3044
FilterInvalidEqSwizzleMode(allowedSwModeSet, pIn->resourceType, Log2(bpp >> 3));
3045
}
3046
3047
if (allowedSwModeSet.value == Gfx10LinearSwModeMask)
3048
{
3049
pOut->swizzleMode = ADDR_SW_LINEAR;
3050
}
3051
else
3052
{
3053
const BOOL_32 computeMinSize = (pIn->flags.minimizeAlign == 1) || (pIn->memoryBudget >= 1.0);
3054
3055
if ((height > 1) && (computeMinSize == FALSE))
3056
{
3057
// Always ignore linear swizzle mode if:
3058
// 1. This is a (2D/3D) resource with height > 1
3059
// 2. Client doesn't require computing minimize size
3060
allowedSwModeSet.swLinear = 0;
3061
}
3062
3063
ADDR2_BLOCK_SET allowedBlockSet = GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType);
3064
3065
// Determine block size if there are 2 or more block type candidates
3066
if (IsPow2(allowedBlockSet.value) == FALSE)
3067
{
3068
AddrSwizzleMode swMode[AddrBlockMaxTiledType] = {};
3069
3070
swMode[AddrBlockLinear] = ADDR_SW_LINEAR;
3071
3072
if (m_blockVarSizeLog2 != 0)
3073
{
3074
swMode[AddrBlockThinVar] = ADDR_SW_VAR_R_X;
3075
}
3076
3077
if (pOut->resourceType == ADDR_RSRC_TEX_3D)
3078
{
3079
swMode[AddrBlockThick4KB] = ADDR_SW_4KB_S;
3080
swMode[AddrBlockThin64KB] = ADDR_SW_64KB_R_X;
3081
swMode[AddrBlockThick64KB] = ADDR_SW_64KB_S;
3082
}
3083
else
3084
{
3085
swMode[AddrBlockMicro] = ADDR_SW_256B_S;
3086
swMode[AddrBlockThin4KB] = ADDR_SW_4KB_S;
3087
swMode[AddrBlockThin64KB] = ADDR_SW_64KB_S;
3088
}
3089
3090
UINT_64 padSize[AddrBlockMaxTiledType] = {};
3091
3092
const UINT_32 ratioLow = computeMinSize ? 1 : (pIn->flags.opt4space ? 3 : 2);
3093
const UINT_32 ratioHi = computeMinSize ? 1 : (pIn->flags.opt4space ? 2 : 1);
3094
const UINT_64 sizeAlignInElement = Max(NextPow2(pIn->minSizeAlign) / (bpp >> 3), 1u);
3095
UINT_32 minSizeBlk = AddrBlockMicro;
3096
UINT_64 minSize = 0;
3097
3098
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
3099
3100
for (UINT_32 i = AddrBlockLinear; i < AddrBlockMaxTiledType; i++)
3101
{
3102
if (IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3103
{
3104
localIn.swizzleMode = swMode[i];
3105
3106
if (localIn.swizzleMode == ADDR_SW_LINEAR)
3107
{
3108
returnCode = HwlComputeSurfaceInfoLinear(&localIn, &localOut);
3109
}
3110
else
3111
{
3112
returnCode = HwlComputeSurfaceInfoTiled(&localIn, &localOut);
3113
}
3114
3115
if (returnCode == ADDR_OK)
3116
{
3117
padSize[i] = localOut.surfSize;
3118
3119
if (minSize == 0)
3120
{
3121
minSize = padSize[i];
3122
minSizeBlk = i;
3123
}
3124
else
3125
{
3126
if (BlockTypeWithinMemoryBudget(
3127
minSize,
3128
padSize[i],
3129
ratioLow,
3130
ratioHi,
3131
0.0,
3132
GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])))
3133
{
3134
minSize = padSize[i];
3135
minSizeBlk = i;
3136
}
3137
}
3138
}
3139
else
3140
{
3141
ADDR_ASSERT_ALWAYS();
3142
break;
3143
}
3144
}
3145
}
3146
3147
if (pIn->memoryBudget > 1.0)
3148
{
3149
// If minimum size is given by swizzle mode with bigger-block type, then don't ever check
3150
// smaller-block type again in coming loop
3151
switch (minSizeBlk)
3152
{
3153
case AddrBlockThick64KB:
3154
allowedBlockSet.macroThin64KB = 0;
3155
case AddrBlockThinVar:
3156
case AddrBlockThin64KB:
3157
allowedBlockSet.macroThick4KB = 0;
3158
case AddrBlockThick4KB:
3159
allowedBlockSet.macroThin4KB = 0;
3160
case AddrBlockThin4KB:
3161
allowedBlockSet.micro = 0;
3162
case AddrBlockMicro:
3163
allowedBlockSet.linear = 0;
3164
case AddrBlockLinear:
3165
break;
3166
3167
default:
3168
ADDR_ASSERT_ALWAYS();
3169
break;
3170
}
3171
3172
for (UINT_32 i = AddrBlockMicro; i < AddrBlockMaxTiledType; i++)
3173
{
3174
if ((i != minSizeBlk) &&
3175
IsBlockTypeAvaiable(allowedBlockSet, static_cast<AddrBlockType>(i)))
3176
{
3177
if (BlockTypeWithinMemoryBudget(
3178
minSize,
3179
padSize[i],
3180
0,
3181
0,
3182
pIn->memoryBudget,
3183
GetBlockSizeLog2(swMode[i]) >= GetBlockSizeLog2(swMode[minSizeBlk])) == FALSE)
3184
{
3185
// Clear the block type if the memory waste is unacceptable
3186
allowedBlockSet.value &= ~(1u << (i - 1));
3187
}
3188
}
3189
}
3190
3191
// Remove VAR block type if bigger block type is allowed
3192
if (GetBlockSizeLog2(swMode[AddrBlockThinVar]) < GetBlockSizeLog2(ADDR_SW_64KB_R_X))
3193
{
3194
if (allowedBlockSet.macroThick64KB || allowedBlockSet.macroThin64KB)
3195
{
3196
allowedBlockSet.var = 0;
3197
}
3198
}
3199
3200
// Remove linear block type if 2 or more block types are allowed
3201
if (IsPow2(allowedBlockSet.value) == FALSE)
3202
{
3203
allowedBlockSet.linear = 0;
3204
}
3205
3206
// Select the biggest allowed block type
3207
minSizeBlk = Log2NonPow2(allowedBlockSet.value) + 1;
3208
3209
if (minSizeBlk == static_cast<UINT_32>(AddrBlockMaxTiledType))
3210
{
3211
minSizeBlk = AddrBlockLinear;
3212
}
3213
}
3214
3215
switch (minSizeBlk)
3216
{
3217
case AddrBlockLinear:
3218
allowedSwModeSet.value &= Gfx10LinearSwModeMask;
3219
break;
3220
3221
case AddrBlockMicro:
3222
ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3223
allowedSwModeSet.value &= Gfx10Blk256BSwModeMask;
3224
break;
3225
3226
case AddrBlockThin4KB:
3227
ADDR_ASSERT(pOut->resourceType != ADDR_RSRC_TEX_3D);
3228
allowedSwModeSet.value &= Gfx10Blk4KBSwModeMask;
3229
break;
3230
3231
case AddrBlockThick4KB:
3232
ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3233
allowedSwModeSet.value &= Gfx10Rsrc3dThick4KBSwModeMask;
3234
break;
3235
3236
case AddrBlockThin64KB:
3237
allowedSwModeSet.value &= (pOut->resourceType == ADDR_RSRC_TEX_3D) ?
3238
Gfx10Rsrc3dThin64KBSwModeMask : Gfx10Blk64KBSwModeMask;
3239
break;
3240
3241
case AddrBlockThick64KB:
3242
ADDR_ASSERT(pOut->resourceType == ADDR_RSRC_TEX_3D);
3243
allowedSwModeSet.value &= Gfx10Rsrc3dThick64KBSwModeMask;
3244
break;
3245
3246
case AddrBlockThinVar:
3247
allowedSwModeSet.value &= Gfx10BlkVarSwModeMask;
3248
break;
3249
3250
default:
3251
ADDR_ASSERT_ALWAYS();
3252
allowedSwModeSet.value = 0;
3253
break;
3254
}
3255
}
3256
3257
// Block type should be determined.
3258
ADDR_ASSERT(IsPow2(GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).value));
3259
3260
ADDR2_SWTYPE_SET allowedSwSet = GetAllowedSwSet(allowedSwModeSet);
3261
3262
// Determine swizzle type if there are 2 or more swizzle type candidates
3263
if ((allowedSwSet.value != 0) && (IsPow2(allowedSwSet.value) == FALSE))
3264
{
3265
if (ElemLib::IsBlockCompressed(pIn->format))
3266
{
3267
if (allowedSwSet.sw_D)
3268
{
3269
allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3270
}
3271
else if (allowedSwSet.sw_S)
3272
{
3273
allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3274
}
3275
else
3276
{
3277
ADDR_ASSERT(allowedSwSet.sw_R);
3278
allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3279
}
3280
}
3281
else if (ElemLib::IsMacroPixelPacked(pIn->format))
3282
{
3283
if (allowedSwSet.sw_S)
3284
{
3285
allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3286
}
3287
else if (allowedSwSet.sw_D)
3288
{
3289
allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3290
}
3291
else
3292
{
3293
ADDR_ASSERT(allowedSwSet.sw_R);
3294
allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3295
}
3296
}
3297
else if (pIn->resourceType == ADDR_RSRC_TEX_3D)
3298
{
3299
if (pIn->flags.color &&
3300
GetAllowedBlockSet(allowedSwModeSet, pOut->resourceType).macroThick64KB &&
3301
allowedSwSet.sw_D)
3302
{
3303
allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3304
}
3305
else if (allowedSwSet.sw_S)
3306
{
3307
allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3308
}
3309
else if (allowedSwSet.sw_R)
3310
{
3311
allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3312
}
3313
else
3314
{
3315
ADDR_ASSERT(allowedSwSet.sw_Z);
3316
allowedSwModeSet.value &= Gfx10ZSwModeMask;
3317
}
3318
}
3319
else
3320
{
3321
if (allowedSwSet.sw_R)
3322
{
3323
allowedSwModeSet.value &= Gfx10RenderSwModeMask;
3324
}
3325
else if (allowedSwSet.sw_D)
3326
{
3327
allowedSwModeSet.value &= Gfx10DisplaySwModeMask;
3328
}
3329
else if (allowedSwSet.sw_S)
3330
{
3331
allowedSwModeSet.value &= Gfx10StandardSwModeMask;
3332
}
3333
else
3334
{
3335
ADDR_ASSERT(allowedSwSet.sw_Z);
3336
allowedSwModeSet.value &= Gfx10ZSwModeMask;
3337
}
3338
}
3339
3340
// Swizzle type should be determined.
3341
ADDR_ASSERT(IsPow2(GetAllowedSwSet(allowedSwModeSet).value));
3342
}
3343
3344
// Determine swizzle mode now. Always select the "largest" swizzle mode for a given block type +
3345
// swizzle type combination. E.g, for AddrBlockThin64KB + ADDR_SW_S, select SW_64KB_S_X(25) if it's
3346
// available, or otherwise select SW_64KB_S_T(17) if it's available, or otherwise select SW_64KB_S(9).
3347
pOut->swizzleMode = static_cast<AddrSwizzleMode>(Log2NonPow2(allowedSwModeSet.value));
3348
}
3349
}
3350
else
3351
{
3352
// Invalid combination...
3353
ADDR_ASSERT_ALWAYS();
3354
returnCode = ADDR_INVALIDPARAMS;
3355
}
3356
}
3357
else
3358
{
3359
// Invalid combination...
3360
ADDR_ASSERT_ALWAYS();
3361
returnCode = ADDR_INVALIDPARAMS;
3362
}
3363
}
3364
3365
return returnCode;
3366
}
3367
3368
/**
3369
************************************************************************************************************************
3370
* Gfx10Lib::ComputeStereoInfo
3371
*
3372
* @brief
3373
* Compute height alignment and right eye pipeBankXor for stereo surface
3374
*
3375
* @return
3376
* Error code
3377
*
3378
************************************************************************************************************************
3379
*/
3380
ADDR_E_RETURNCODE Gfx10Lib::ComputeStereoInfo(
3381
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< Compute surface info
3382
UINT_32* pAlignY, ///< Stereo requested additional alignment in Y
3383
UINT_32* pRightXor ///< Right eye xor
3384
) const
3385
{
3386
ADDR_E_RETURNCODE ret = ADDR_OK;
3387
3388
*pRightXor = 0;
3389
3390
if (IsNonPrtXor(pIn->swizzleMode))
3391
{
3392
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3393
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
3394
const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
3395
const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
3396
const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
3397
3398
if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
3399
{
3400
UINT_32 yMax = 0;
3401
UINT_32 yPosMask = 0;
3402
3403
// First get "max y bit"
3404
for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3405
{
3406
ADDR_ASSERT(m_equationTable[eqIndex].addr[i].valid == 1);
3407
3408
if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3409
(m_equationTable[eqIndex].addr[i].index > yMax))
3410
{
3411
yMax = m_equationTable[eqIndex].addr[i].index;
3412
}
3413
3414
if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3415
(m_equationTable[eqIndex].xor1[i].channel == 1) &&
3416
(m_equationTable[eqIndex].xor1[i].index > yMax))
3417
{
3418
yMax = m_equationTable[eqIndex].xor1[i].index;
3419
}
3420
3421
if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3422
(m_equationTable[eqIndex].xor2[i].channel == 1) &&
3423
(m_equationTable[eqIndex].xor2[i].index > yMax))
3424
{
3425
yMax = m_equationTable[eqIndex].xor2[i].index;
3426
}
3427
}
3428
3429
// Then loop again for populating a position mask of "max Y bit"
3430
for (UINT_32 i = m_pipeInterleaveLog2; i < blkSizeLog2; i++)
3431
{
3432
if ((m_equationTable[eqIndex].addr[i].channel == 1) &&
3433
(m_equationTable[eqIndex].addr[i].index == yMax))
3434
{
3435
yPosMask |= 1u << i;
3436
}
3437
else if ((m_equationTable[eqIndex].xor1[i].valid == 1) &&
3438
(m_equationTable[eqIndex].xor1[i].channel == 1) &&
3439
(m_equationTable[eqIndex].xor1[i].index == yMax))
3440
{
3441
yPosMask |= 1u << i;
3442
}
3443
else if ((m_equationTable[eqIndex].xor2[i].valid == 1) &&
3444
(m_equationTable[eqIndex].xor2[i].channel == 1) &&
3445
(m_equationTable[eqIndex].xor2[i].index == yMax))
3446
{
3447
yPosMask |= 1u << i;
3448
}
3449
}
3450
3451
const UINT_32 additionalAlign = 1 << yMax;
3452
3453
if (additionalAlign >= *pAlignY)
3454
{
3455
*pAlignY = additionalAlign;
3456
3457
const UINT_32 alignedHeight = PowTwoAlign(pIn->height, additionalAlign);
3458
3459
if ((alignedHeight >> yMax) & 1)
3460
{
3461
*pRightXor = yPosMask >> m_pipeInterleaveLog2;
3462
}
3463
}
3464
}
3465
else
3466
{
3467
ret = ADDR_INVALIDPARAMS;
3468
}
3469
}
3470
3471
return ret;
3472
}
3473
3474
/**
3475
************************************************************************************************************************
3476
* Gfx10Lib::HwlComputeSurfaceInfoTiled
3477
*
3478
* @brief
3479
* Internal function to calculate alignment for tiled surface
3480
*
3481
* @return
3482
* ADDR_E_RETURNCODE
3483
************************************************************************************************************************
3484
*/
3485
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoTiled(
3486
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3487
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3488
) const
3489
{
3490
ADDR_E_RETURNCODE ret;
3491
3492
// Mip chain dimesion and epitch has no meaning in GFX10, set to default value
3493
pOut->mipChainPitch = 0;
3494
pOut->mipChainHeight = 0;
3495
pOut->mipChainSlice = 0;
3496
pOut->epitchIsHeight = FALSE;
3497
3498
// Following information will be provided in ComputeSurfaceInfoMacroTiled() if necessary
3499
pOut->mipChainInTail = FALSE;
3500
pOut->firstMipIdInTail = pIn->numMipLevels;
3501
3502
if (IsBlock256b(pIn->swizzleMode))
3503
{
3504
ret = ComputeSurfaceInfoMicroTiled(pIn, pOut);
3505
}
3506
else
3507
{
3508
ret = ComputeSurfaceInfoMacroTiled(pIn, pOut);
3509
}
3510
3511
return ret;
3512
}
3513
3514
3515
/**
3516
************************************************************************************************************************
3517
* Gfx10Lib::ComputeSurfaceInfoMicroTiled
3518
*
3519
* @brief
3520
* Internal function to calculate alignment for micro tiled surface
3521
*
3522
* @return
3523
* ADDR_E_RETURNCODE
3524
************************************************************************************************************************
3525
*/
3526
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMicroTiled(
3527
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3528
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3529
) const
3530
{
3531
ADDR_E_RETURNCODE ret = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3532
&pOut->blockHeight,
3533
&pOut->blockSlices,
3534
pIn->bpp,
3535
pIn->numFrags,
3536
pIn->resourceType,
3537
pIn->swizzleMode);
3538
3539
if (ret == ADDR_OK)
3540
{
3541
const UINT_32 blockSize = GetBlockSize(pIn->swizzleMode);
3542
3543
pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3544
pOut->height = PowTwoAlign(pIn->height, pOut->blockHeight);
3545
pOut->numSlices = pIn->numSlices;
3546
pOut->baseAlign = blockSize;
3547
3548
if (pIn->numMipLevels > 1)
3549
{
3550
const UINT_32 mip0Width = pIn->width;
3551
const UINT_32 mip0Height = pIn->height;
3552
UINT_64 mipSliceSize = 0;
3553
3554
for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
3555
{
3556
UINT_32 mipWidth, mipHeight;
3557
3558
GetMipSize(mip0Width, mip0Height, 1, i, &mipWidth, &mipHeight);
3559
3560
const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pOut->blockWidth);
3561
const UINT_32 mipActualHeight = PowTwoAlign(mipHeight, pOut->blockHeight);
3562
3563
if (pOut->pMipInfo != NULL)
3564
{
3565
pOut->pMipInfo[i].pitch = mipActualWidth;
3566
pOut->pMipInfo[i].height = mipActualHeight;
3567
pOut->pMipInfo[i].depth = 1;
3568
pOut->pMipInfo[i].offset = mipSliceSize;
3569
pOut->pMipInfo[i].mipTailOffset = 0;
3570
pOut->pMipInfo[i].macroBlockOffset = mipSliceSize;
3571
}
3572
3573
mipSliceSize += mipActualWidth * mipActualHeight * (pIn->bpp >> 3);
3574
}
3575
3576
pOut->sliceSize = mipSliceSize;
3577
pOut->surfSize = mipSliceSize * pOut->numSlices;
3578
}
3579
else
3580
{
3581
pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3);
3582
pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3583
3584
if (pOut->pMipInfo != NULL)
3585
{
3586
pOut->pMipInfo[0].pitch = pOut->pitch;
3587
pOut->pMipInfo[0].height = pOut->height;
3588
pOut->pMipInfo[0].depth = 1;
3589
pOut->pMipInfo[0].offset = 0;
3590
pOut->pMipInfo[0].mipTailOffset = 0;
3591
pOut->pMipInfo[0].macroBlockOffset = 0;
3592
}
3593
}
3594
3595
}
3596
3597
return ret;
3598
}
3599
3600
/**
3601
************************************************************************************************************************
3602
* Gfx10Lib::ComputeSurfaceInfoMacroTiled
3603
*
3604
* @brief
3605
* Internal function to calculate alignment for macro tiled surface
3606
*
3607
* @return
3608
* ADDR_E_RETURNCODE
3609
************************************************************************************************************************
3610
*/
3611
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceInfoMacroTiled(
3612
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
3613
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
3614
) const
3615
{
3616
ADDR_E_RETURNCODE returnCode = ComputeBlockDimensionForSurf(&pOut->blockWidth,
3617
&pOut->blockHeight,
3618
&pOut->blockSlices,
3619
pIn->bpp,
3620
pIn->numFrags,
3621
pIn->resourceType,
3622
pIn->swizzleMode);
3623
3624
if (returnCode == ADDR_OK)
3625
{
3626
UINT_32 heightAlign = pOut->blockHeight;
3627
3628
if (pIn->flags.qbStereo)
3629
{
3630
UINT_32 rightXor = 0;
3631
3632
returnCode = ComputeStereoInfo(pIn, &heightAlign, &rightXor);
3633
3634
if (returnCode == ADDR_OK)
3635
{
3636
pOut->pStereoInfo->rightSwizzle = rightXor;
3637
}
3638
}
3639
3640
if (returnCode == ADDR_OK)
3641
{
3642
const UINT_32 blockSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
3643
const UINT_32 blockSize = 1 << blockSizeLog2;
3644
3645
pOut->pitch = PowTwoAlign(pIn->width, pOut->blockWidth);
3646
pOut->height = PowTwoAlign(pIn->height, heightAlign);
3647
pOut->numSlices = PowTwoAlign(pIn->numSlices, pOut->blockSlices);
3648
pOut->baseAlign = blockSize;
3649
3650
if (pIn->numMipLevels > 1)
3651
{
3652
const Dim3d tailMaxDim = GetMipTailDim(pIn->resourceType,
3653
pIn->swizzleMode,
3654
pOut->blockWidth,
3655
pOut->blockHeight,
3656
pOut->blockSlices);
3657
const UINT_32 mip0Width = pIn->width;
3658
const UINT_32 mip0Height = pIn->height;
3659
const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
3660
const UINT_32 mip0Depth = isThin ? 1 : pIn->numSlices;
3661
const UINT_32 maxMipsInTail = GetMaxNumMipsInTail(blockSizeLog2, isThin);
3662
const UINT_32 index = Log2(pIn->bpp >> 3);
3663
UINT_32 firstMipInTail = pIn->numMipLevels;
3664
UINT_64 mipChainSliceSize = 0;
3665
UINT_64 mipSize[MaxMipLevels];
3666
UINT_64 mipSliceSize[MaxMipLevels];
3667
3668
Dim3d fixedTailMaxDim = tailMaxDim;
3669
3670
if (m_settings.dsMipmapHtileFix && IsZOrderSwizzle(pIn->swizzleMode) && (index <= 1))
3671
{
3672
fixedTailMaxDim.w /= Block256_2d[index].w / Block256_2d[2].w;
3673
fixedTailMaxDim.h /= Block256_2d[index].h / Block256_2d[2].h;
3674
}
3675
3676
for (UINT_32 i = 0; i < pIn->numMipLevels; i++)
3677
{
3678
UINT_32 mipWidth, mipHeight, mipDepth;
3679
3680
GetMipSize(mip0Width, mip0Height, mip0Depth, i, &mipWidth, &mipHeight, &mipDepth);
3681
3682
if (IsInMipTail(fixedTailMaxDim, maxMipsInTail, mipWidth, mipHeight, pIn->numMipLevels - i))
3683
{
3684
firstMipInTail = i;
3685
mipChainSliceSize += blockSize / pOut->blockSlices;
3686
break;
3687
}
3688
else
3689
{
3690
const UINT_32 pitch = PowTwoAlign(mipWidth, pOut->blockWidth);
3691
const UINT_32 height = PowTwoAlign(mipHeight, pOut->blockHeight);
3692
const UINT_32 depth = PowTwoAlign(mipDepth, pOut->blockSlices);
3693
const UINT_64 sliceSize = static_cast<UINT_64>(pitch) * height * (pIn->bpp >> 3);
3694
3695
mipSize[i] = sliceSize * depth;
3696
mipSliceSize[i] = sliceSize * pOut->blockSlices;
3697
mipChainSliceSize += sliceSize;
3698
3699
if (pOut->pMipInfo != NULL)
3700
{
3701
pOut->pMipInfo[i].pitch = pitch;
3702
pOut->pMipInfo[i].height = height;
3703
pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3704
}
3705
}
3706
}
3707
3708
pOut->sliceSize = mipChainSliceSize;
3709
pOut->surfSize = mipChainSliceSize * pOut->numSlices;
3710
pOut->mipChainInTail = (firstMipInTail == 0) ? TRUE : FALSE;
3711
pOut->firstMipIdInTail = firstMipInTail;
3712
3713
if (pOut->pMipInfo != NULL)
3714
{
3715
UINT_64 offset = 0;
3716
UINT_64 macroBlkOffset = 0;
3717
UINT_32 tailMaxDepth = 0;
3718
3719
if (firstMipInTail != pIn->numMipLevels)
3720
{
3721
UINT_32 mipWidth, mipHeight;
3722
3723
GetMipSize(mip0Width, mip0Height, mip0Depth, firstMipInTail,
3724
&mipWidth, &mipHeight, &tailMaxDepth);
3725
3726
offset = blockSize * PowTwoAlign(tailMaxDepth, pOut->blockSlices) / pOut->blockSlices;
3727
macroBlkOffset = blockSize;
3728
}
3729
3730
for (INT_32 i = firstMipInTail - 1; i >= 0; i--)
3731
{
3732
pOut->pMipInfo[i].offset = offset;
3733
pOut->pMipInfo[i].macroBlockOffset = macroBlkOffset;
3734
pOut->pMipInfo[i].mipTailOffset = 0;
3735
3736
offset += mipSize[i];
3737
macroBlkOffset += mipSliceSize[i];
3738
}
3739
3740
UINT_32 pitch = tailMaxDim.w;
3741
UINT_32 height = tailMaxDim.h;
3742
UINT_32 depth = isThin ? 1 : PowTwoAlign(tailMaxDepth, Block256_3d[index].d);
3743
3744
tailMaxDepth = isThin ? 1 : (depth / Block256_3d[index].d);
3745
3746
for (UINT_32 i = firstMipInTail; i < pIn->numMipLevels; i++)
3747
{
3748
const UINT_32 m = maxMipsInTail - 1 - (i - firstMipInTail);
3749
const UINT_32 mipOffset = (m > 6) ? (16 << m) : (m << 8);
3750
3751
pOut->pMipInfo[i].offset = mipOffset * tailMaxDepth;
3752
pOut->pMipInfo[i].mipTailOffset = mipOffset;
3753
pOut->pMipInfo[i].macroBlockOffset = 0;
3754
3755
pOut->pMipInfo[i].pitch = pitch;
3756
pOut->pMipInfo[i].height = height;
3757
pOut->pMipInfo[i].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3758
3759
UINT_32 mipX = ((mipOffset >> 9) & 1) |
3760
((mipOffset >> 10) & 2) |
3761
((mipOffset >> 11) & 4) |
3762
((mipOffset >> 12) & 8) |
3763
((mipOffset >> 13) & 16) |
3764
((mipOffset >> 14) & 32);
3765
UINT_32 mipY = ((mipOffset >> 8) & 1) |
3766
((mipOffset >> 9) & 2) |
3767
((mipOffset >> 10) & 4) |
3768
((mipOffset >> 11) & 8) |
3769
((mipOffset >> 12) & 16) |
3770
((mipOffset >> 13) & 32);
3771
3772
if (blockSizeLog2 & 1)
3773
{
3774
const UINT_32 temp = mipX;
3775
mipX = mipY;
3776
mipY = temp;
3777
3778
if (index & 1)
3779
{
3780
mipY = (mipY << 1) | (mipX & 1);
3781
mipX = mipX >> 1;
3782
}
3783
}
3784
3785
if (isThin)
3786
{
3787
pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_2d[index].w;
3788
pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_2d[index].h;
3789
pOut->pMipInfo[i].mipTailCoordZ = 0;
3790
3791
pitch = Max(pitch >> 1, Block256_2d[index].w);
3792
height = Max(height >> 1, Block256_2d[index].h);
3793
}
3794
else
3795
{
3796
pOut->pMipInfo[i].mipTailCoordX = mipX * Block256_3d[index].w;
3797
pOut->pMipInfo[i].mipTailCoordY = mipY * Block256_3d[index].h;
3798
pOut->pMipInfo[i].mipTailCoordZ = 0;
3799
3800
pitch = Max(pitch >> 1, Block256_3d[index].w);
3801
height = Max(height >> 1, Block256_3d[index].h);
3802
}
3803
}
3804
}
3805
}
3806
else
3807
{
3808
pOut->sliceSize = static_cast<UINT_64>(pOut->pitch) * pOut->height * (pIn->bpp >> 3) * pIn->numFrags;
3809
pOut->surfSize = pOut->sliceSize * pOut->numSlices;
3810
3811
if (pOut->pMipInfo != NULL)
3812
{
3813
pOut->pMipInfo[0].pitch = pOut->pitch;
3814
pOut->pMipInfo[0].height = pOut->height;
3815
pOut->pMipInfo[0].depth = IsTex3d(pIn->resourceType) ? pOut->numSlices : 1;
3816
pOut->pMipInfo[0].offset = 0;
3817
pOut->pMipInfo[0].mipTailOffset = 0;
3818
pOut->pMipInfo[0].macroBlockOffset = 0;
3819
pOut->pMipInfo[0].mipTailCoordX = 0;
3820
pOut->pMipInfo[0].mipTailCoordY = 0;
3821
pOut->pMipInfo[0].mipTailCoordZ = 0;
3822
}
3823
}
3824
}
3825
}
3826
3827
return returnCode;
3828
}
3829
3830
/**
3831
************************************************************************************************************************
3832
* Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled
3833
*
3834
* @brief
3835
* Internal function to calculate address from coord for tiled swizzle surface
3836
*
3837
* @return
3838
* ADDR_E_RETURNCODE
3839
************************************************************************************************************************
3840
*/
3841
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceAddrFromCoordTiled(
3842
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
3843
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
3844
) const
3845
{
3846
ADDR_E_RETURNCODE ret;
3847
3848
if (IsBlock256b(pIn->swizzleMode))
3849
{
3850
ret = ComputeSurfaceAddrFromCoordMicroTiled(pIn, pOut);
3851
}
3852
else
3853
{
3854
ret = ComputeSurfaceAddrFromCoordMacroTiled(pIn, pOut);
3855
}
3856
3857
return ret;
3858
}
3859
3860
/**
3861
************************************************************************************************************************
3862
* Gfx10Lib::ComputeOffsetFromEquation
3863
*
3864
* @brief
3865
* Compute offset from equation
3866
*
3867
* @return
3868
* Offset
3869
************************************************************************************************************************
3870
*/
3871
UINT_32 Gfx10Lib::ComputeOffsetFromEquation(
3872
const ADDR_EQUATION* pEq, ///< Equation
3873
UINT_32 x, ///< x coord in bytes
3874
UINT_32 y, ///< y coord in pixel
3875
UINT_32 z ///< z coord in slice
3876
) const
3877
{
3878
UINT_32 offset = 0;
3879
3880
for (UINT_32 i = 0; i < pEq->numBits; i++)
3881
{
3882
UINT_32 v = 0;
3883
3884
if (pEq->addr[i].valid)
3885
{
3886
if (pEq->addr[i].channel == 0)
3887
{
3888
v ^= (x >> pEq->addr[i].index) & 1;
3889
}
3890
else if (pEq->addr[i].channel == 1)
3891
{
3892
v ^= (y >> pEq->addr[i].index) & 1;
3893
}
3894
else
3895
{
3896
ADDR_ASSERT(pEq->addr[i].channel == 2);
3897
v ^= (z >> pEq->addr[i].index) & 1;
3898
}
3899
}
3900
3901
if (pEq->xor1[i].valid)
3902
{
3903
if (pEq->xor1[i].channel == 0)
3904
{
3905
v ^= (x >> pEq->xor1[i].index) & 1;
3906
}
3907
else if (pEq->xor1[i].channel == 1)
3908
{
3909
v ^= (y >> pEq->xor1[i].index) & 1;
3910
}
3911
else
3912
{
3913
ADDR_ASSERT(pEq->xor1[i].channel == 2);
3914
v ^= (z >> pEq->xor1[i].index) & 1;
3915
}
3916
}
3917
3918
if (pEq->xor2[i].valid)
3919
{
3920
if (pEq->xor2[i].channel == 0)
3921
{
3922
v ^= (x >> pEq->xor2[i].index) & 1;
3923
}
3924
else if (pEq->xor2[i].channel == 1)
3925
{
3926
v ^= (y >> pEq->xor2[i].index) & 1;
3927
}
3928
else
3929
{
3930
ADDR_ASSERT(pEq->xor2[i].channel == 2);
3931
v ^= (z >> pEq->xor2[i].index) & 1;
3932
}
3933
}
3934
3935
offset |= (v << i);
3936
}
3937
3938
return offset;
3939
}
3940
3941
/**
3942
************************************************************************************************************************
3943
* Gfx10Lib::ComputeOffsetFromSwizzlePattern
3944
*
3945
* @brief
3946
* Compute offset from swizzle pattern
3947
*
3948
* @return
3949
* Offset
3950
************************************************************************************************************************
3951
*/
3952
UINT_32 Gfx10Lib::ComputeOffsetFromSwizzlePattern(
3953
const UINT_64* pPattern, ///< Swizzle pattern
3954
UINT_32 numBits, ///< Number of bits in pattern
3955
UINT_32 x, ///< x coord in pixel
3956
UINT_32 y, ///< y coord in pixel
3957
UINT_32 z, ///< z coord in slice
3958
UINT_32 s ///< sample id
3959
) const
3960
{
3961
UINT_32 offset = 0;
3962
const ADDR_BIT_SETTING* pSwizzlePattern = reinterpret_cast<const ADDR_BIT_SETTING*>(pPattern);
3963
3964
for (UINT_32 i = 0; i < numBits; i++)
3965
{
3966
UINT_32 v = 0;
3967
3968
if (pSwizzlePattern[i].x != 0)
3969
{
3970
UINT_16 mask = pSwizzlePattern[i].x;
3971
UINT_32 xBits = x;
3972
3973
while (mask != 0)
3974
{
3975
if (mask & 1)
3976
{
3977
v ^= xBits & 1;
3978
}
3979
3980
xBits >>= 1;
3981
mask >>= 1;
3982
}
3983
}
3984
3985
if (pSwizzlePattern[i].y != 0)
3986
{
3987
UINT_16 mask = pSwizzlePattern[i].y;
3988
UINT_32 yBits = y;
3989
3990
while (mask != 0)
3991
{
3992
if (mask & 1)
3993
{
3994
v ^= yBits & 1;
3995
}
3996
3997
yBits >>= 1;
3998
mask >>= 1;
3999
}
4000
}
4001
4002
if (pSwizzlePattern[i].z != 0)
4003
{
4004
UINT_16 mask = pSwizzlePattern[i].z;
4005
UINT_32 zBits = z;
4006
4007
while (mask != 0)
4008
{
4009
if (mask & 1)
4010
{
4011
v ^= zBits & 1;
4012
}
4013
4014
zBits >>= 1;
4015
mask >>= 1;
4016
}
4017
}
4018
4019
if (pSwizzlePattern[i].s != 0)
4020
{
4021
UINT_16 mask = pSwizzlePattern[i].s;
4022
UINT_32 sBits = s;
4023
4024
while (mask != 0)
4025
{
4026
if (mask & 1)
4027
{
4028
v ^= sBits & 1;
4029
}
4030
4031
sBits >>= 1;
4032
mask >>= 1;
4033
}
4034
}
4035
4036
offset |= (v << i);
4037
}
4038
4039
return offset;
4040
}
4041
4042
/**
4043
************************************************************************************************************************
4044
* Gfx10Lib::GetSwizzlePatternInfo
4045
*
4046
* @brief
4047
* Get swizzle pattern
4048
*
4049
* @return
4050
* Swizzle pattern information
4051
************************************************************************************************************************
4052
*/
4053
const ADDR_SW_PATINFO* Gfx10Lib::GetSwizzlePatternInfo(
4054
AddrSwizzleMode swizzleMode, ///< Swizzle mode
4055
AddrResourceType resourceType, ///< Resource type
4056
UINT_32 elemLog2, ///< Element size in bytes log2
4057
UINT_32 numFrag ///< Number of fragment
4058
) const
4059
{
4060
const UINT_32 index = IsXor(swizzleMode) ? (m_colorBaseIndex + elemLog2) : elemLog2;
4061
const ADDR_SW_PATINFO* patInfo = NULL;
4062
const UINT_32 swizzleMask = 1 << swizzleMode;
4063
4064
if (IsBlockVariable(swizzleMode))
4065
{
4066
if (m_blockVarSizeLog2 != 0)
4067
{
4068
ADDR_ASSERT(m_settings.supportRbPlus);
4069
4070
if (IsRtOptSwizzle(swizzleMode))
4071
{
4072
if (numFrag == 1)
4073
{
4074
patInfo = GFX10_SW_VAR_R_X_1xaa_RBPLUS_PATINFO;
4075
}
4076
else if (numFrag == 2)
4077
{
4078
patInfo = GFX10_SW_VAR_R_X_2xaa_RBPLUS_PATINFO;
4079
}
4080
else if (numFrag == 4)
4081
{
4082
patInfo = GFX10_SW_VAR_R_X_4xaa_RBPLUS_PATINFO;
4083
}
4084
else
4085
{
4086
ADDR_ASSERT(numFrag == 8);
4087
patInfo = GFX10_SW_VAR_R_X_8xaa_RBPLUS_PATINFO;
4088
}
4089
}
4090
else if (IsZOrderSwizzle(swizzleMode))
4091
{
4092
if (numFrag == 1)
4093
{
4094
patInfo = GFX10_SW_VAR_Z_X_1xaa_RBPLUS_PATINFO;
4095
}
4096
else if (numFrag == 2)
4097
{
4098
patInfo = GFX10_SW_VAR_Z_X_2xaa_RBPLUS_PATINFO;
4099
}
4100
else if (numFrag == 4)
4101
{
4102
patInfo = GFX10_SW_VAR_Z_X_4xaa_RBPLUS_PATINFO;
4103
}
4104
else
4105
{
4106
ADDR_ASSERT(numFrag == 8);
4107
patInfo = GFX10_SW_VAR_Z_X_8xaa_RBPLUS_PATINFO;
4108
}
4109
}
4110
}
4111
}
4112
else if (IsLinear(swizzleMode) == FALSE)
4113
{
4114
if (resourceType == ADDR_RSRC_TEX_3D)
4115
{
4116
ADDR_ASSERT(numFrag == 1);
4117
4118
if ((swizzleMask & Gfx10Rsrc3dSwModeMask) != 0)
4119
{
4120
if (IsRtOptSwizzle(swizzleMode))
4121
{
4122
patInfo = m_settings.supportRbPlus ?
4123
GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4124
}
4125
else if (IsZOrderSwizzle(swizzleMode))
4126
{
4127
patInfo = m_settings.supportRbPlus ?
4128
GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4129
}
4130
else if (IsDisplaySwizzle(resourceType, swizzleMode))
4131
{
4132
ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_X);
4133
patInfo = m_settings.supportRbPlus ?
4134
GFX10_SW_64K_D3_X_RBPLUS_PATINFO : GFX10_SW_64K_D3_X_PATINFO;
4135
}
4136
else
4137
{
4138
ADDR_ASSERT(IsStandardSwizzle(resourceType, swizzleMode));
4139
4140
if (IsBlock4kb(swizzleMode))
4141
{
4142
if (swizzleMode == ADDR_SW_4KB_S)
4143
{
4144
patInfo = m_settings.supportRbPlus ?
4145
GFX10_SW_4K_S3_RBPLUS_PATINFO : GFX10_SW_4K_S3_PATINFO;
4146
}
4147
else
4148
{
4149
ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4150
patInfo = m_settings.supportRbPlus ?
4151
GFX10_SW_4K_S3_X_RBPLUS_PATINFO : GFX10_SW_4K_S3_X_PATINFO;
4152
}
4153
}
4154
else
4155
{
4156
if (swizzleMode == ADDR_SW_64KB_S)
4157
{
4158
patInfo = m_settings.supportRbPlus ?
4159
GFX10_SW_64K_S3_RBPLUS_PATINFO : GFX10_SW_64K_S3_PATINFO;
4160
}
4161
else if (swizzleMode == ADDR_SW_64KB_S_X)
4162
{
4163
patInfo = m_settings.supportRbPlus ?
4164
GFX10_SW_64K_S3_X_RBPLUS_PATINFO : GFX10_SW_64K_S3_X_PATINFO;
4165
}
4166
else
4167
{
4168
ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4169
patInfo = m_settings.supportRbPlus ?
4170
GFX10_SW_64K_S3_T_RBPLUS_PATINFO : GFX10_SW_64K_S3_T_PATINFO;
4171
}
4172
}
4173
}
4174
}
4175
}
4176
else
4177
{
4178
if ((swizzleMask & Gfx10Rsrc2dSwModeMask) != 0)
4179
{
4180
if (IsBlock256b(swizzleMode))
4181
{
4182
if (swizzleMode == ADDR_SW_256B_S)
4183
{
4184
patInfo = m_settings.supportRbPlus ?
4185
GFX10_SW_256_S_RBPLUS_PATINFO : GFX10_SW_256_S_PATINFO;
4186
}
4187
else
4188
{
4189
ADDR_ASSERT(swizzleMode == ADDR_SW_256B_D);
4190
patInfo = m_settings.supportRbPlus ?
4191
GFX10_SW_256_D_RBPLUS_PATINFO : GFX10_SW_256_D_PATINFO;
4192
}
4193
}
4194
else if (IsBlock4kb(swizzleMode))
4195
{
4196
if (IsStandardSwizzle(resourceType, swizzleMode))
4197
{
4198
if (swizzleMode == ADDR_SW_4KB_S)
4199
{
4200
patInfo = m_settings.supportRbPlus ?
4201
GFX10_SW_4K_S_RBPLUS_PATINFO : GFX10_SW_4K_S_PATINFO;
4202
}
4203
else
4204
{
4205
ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_S_X);
4206
patInfo = m_settings.supportRbPlus ?
4207
GFX10_SW_4K_S_X_RBPLUS_PATINFO : GFX10_SW_4K_S_X_PATINFO;
4208
}
4209
}
4210
else
4211
{
4212
if (swizzleMode == ADDR_SW_4KB_D)
4213
{
4214
patInfo = m_settings.supportRbPlus ?
4215
GFX10_SW_4K_D_RBPLUS_PATINFO : GFX10_SW_4K_D_PATINFO;
4216
}
4217
else
4218
{
4219
ADDR_ASSERT(swizzleMode == ADDR_SW_4KB_D_X);
4220
patInfo = m_settings.supportRbPlus ?
4221
GFX10_SW_4K_D_X_RBPLUS_PATINFO : GFX10_SW_4K_D_X_PATINFO;
4222
}
4223
}
4224
}
4225
else
4226
{
4227
if (IsRtOptSwizzle(swizzleMode))
4228
{
4229
if (numFrag == 1)
4230
{
4231
patInfo = m_settings.supportRbPlus ?
4232
GFX10_SW_64K_R_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_1xaa_PATINFO;
4233
}
4234
else if (numFrag == 2)
4235
{
4236
patInfo = m_settings.supportRbPlus ?
4237
GFX10_SW_64K_R_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_2xaa_PATINFO;
4238
}
4239
else if (numFrag == 4)
4240
{
4241
patInfo = m_settings.supportRbPlus ?
4242
GFX10_SW_64K_R_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_4xaa_PATINFO;
4243
}
4244
else
4245
{
4246
ADDR_ASSERT(numFrag == 8);
4247
patInfo = m_settings.supportRbPlus ?
4248
GFX10_SW_64K_R_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_R_X_8xaa_PATINFO;
4249
}
4250
}
4251
else if (IsZOrderSwizzle(swizzleMode))
4252
{
4253
if (numFrag == 1)
4254
{
4255
patInfo = m_settings.supportRbPlus ?
4256
GFX10_SW_64K_Z_X_1xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_1xaa_PATINFO;
4257
}
4258
else if (numFrag == 2)
4259
{
4260
patInfo = m_settings.supportRbPlus ?
4261
GFX10_SW_64K_Z_X_2xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_2xaa_PATINFO;
4262
}
4263
else if (numFrag == 4)
4264
{
4265
patInfo = m_settings.supportRbPlus ?
4266
GFX10_SW_64K_Z_X_4xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_4xaa_PATINFO;
4267
}
4268
else
4269
{
4270
ADDR_ASSERT(numFrag == 8);
4271
patInfo = m_settings.supportRbPlus ?
4272
GFX10_SW_64K_Z_X_8xaa_RBPLUS_PATINFO : GFX10_SW_64K_Z_X_8xaa_PATINFO;
4273
}
4274
}
4275
else if (IsDisplaySwizzle(resourceType, swizzleMode))
4276
{
4277
if (swizzleMode == ADDR_SW_64KB_D)
4278
{
4279
patInfo = m_settings.supportRbPlus ?
4280
GFX10_SW_64K_D_RBPLUS_PATINFO : GFX10_SW_64K_D_PATINFO;
4281
}
4282
else if (swizzleMode == ADDR_SW_64KB_D_X)
4283
{
4284
patInfo = m_settings.supportRbPlus ?
4285
GFX10_SW_64K_D_X_RBPLUS_PATINFO : GFX10_SW_64K_D_X_PATINFO;
4286
}
4287
else
4288
{
4289
ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_D_T);
4290
patInfo = m_settings.supportRbPlus ?
4291
GFX10_SW_64K_D_T_RBPLUS_PATINFO : GFX10_SW_64K_D_T_PATINFO;
4292
}
4293
}
4294
else
4295
{
4296
if (swizzleMode == ADDR_SW_64KB_S)
4297
{
4298
patInfo = m_settings.supportRbPlus ?
4299
GFX10_SW_64K_S_RBPLUS_PATINFO : GFX10_SW_64K_S_PATINFO;
4300
}
4301
else if (swizzleMode == ADDR_SW_64KB_S_X)
4302
{
4303
patInfo = m_settings.supportRbPlus ?
4304
GFX10_SW_64K_S_X_RBPLUS_PATINFO : GFX10_SW_64K_S_X_PATINFO;
4305
}
4306
else
4307
{
4308
ADDR_ASSERT(swizzleMode == ADDR_SW_64KB_S_T);
4309
patInfo = m_settings.supportRbPlus ?
4310
GFX10_SW_64K_S_T_RBPLUS_PATINFO : GFX10_SW_64K_S_T_PATINFO;
4311
}
4312
}
4313
}
4314
}
4315
}
4316
}
4317
4318
return (patInfo != NULL) ? &patInfo[index] : NULL;
4319
}
4320
4321
4322
/**
4323
************************************************************************************************************************
4324
* Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled
4325
*
4326
* @brief
4327
* Internal function to calculate address from coord for micro tiled swizzle surface
4328
*
4329
* @return
4330
* ADDR_E_RETURNCODE
4331
************************************************************************************************************************
4332
*/
4333
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMicroTiled(
4334
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4335
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4336
) const
4337
{
4338
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4339
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4340
ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4341
4342
localIn.swizzleMode = pIn->swizzleMode;
4343
localIn.flags = pIn->flags;
4344
localIn.resourceType = pIn->resourceType;
4345
localIn.bpp = pIn->bpp;
4346
localIn.width = Max(pIn->unalignedWidth, 1u);
4347
localIn.height = Max(pIn->unalignedHeight, 1u);
4348
localIn.numSlices = Max(pIn->numSlices, 1u);
4349
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4350
localIn.numSamples = Max(pIn->numSamples, 1u);
4351
localIn.numFrags = Max(pIn->numFrags, 1u);
4352
localOut.pMipInfo = mipInfo;
4353
4354
ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMicroTiled(&localIn, &localOut);
4355
4356
if (ret == ADDR_OK)
4357
{
4358
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4359
const UINT_32 rsrcType = static_cast<UINT_32>(pIn->resourceType) - 1;
4360
const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4361
const UINT_32 eqIndex = m_equationLookupTable[rsrcType][swMode][elemLog2];
4362
4363
if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4364
{
4365
const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4366
const UINT_32 yb = pIn->y / localOut.blockHeight;
4367
const UINT_32 xb = pIn->x / localOut.blockWidth;
4368
const UINT_32 blockIndex = yb * pb + xb;
4369
const UINT_32 blockSize = 256;
4370
const UINT_32 blk256Offset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4371
pIn->x << elemLog2,
4372
pIn->y,
4373
0);
4374
pOut->addr = localOut.sliceSize * pIn->slice +
4375
mipInfo[pIn->mipId].macroBlockOffset +
4376
(blockIndex * blockSize) +
4377
blk256Offset;
4378
}
4379
else
4380
{
4381
ret = ADDR_INVALIDPARAMS;
4382
}
4383
}
4384
4385
return ret;
4386
}
4387
4388
/**
4389
************************************************************************************************************************
4390
* Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled
4391
*
4392
* @brief
4393
* Internal function to calculate address from coord for macro tiled swizzle surface
4394
*
4395
* @return
4396
* ADDR_E_RETURNCODE
4397
************************************************************************************************************************
4398
*/
4399
ADDR_E_RETURNCODE Gfx10Lib::ComputeSurfaceAddrFromCoordMacroTiled(
4400
const ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_INPUT* pIn, ///< [in] input structure
4401
ADDR2_COMPUTE_SURFACE_ADDRFROMCOORD_OUTPUT* pOut ///< [out] output structure
4402
) const
4403
{
4404
ADDR2_COMPUTE_SURFACE_INFO_INPUT localIn = {};
4405
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT localOut = {};
4406
ADDR2_MIP_INFO mipInfo[MaxMipLevels];
4407
4408
localIn.swizzleMode = pIn->swizzleMode;
4409
localIn.flags = pIn->flags;
4410
localIn.resourceType = pIn->resourceType;
4411
localIn.bpp = pIn->bpp;
4412
localIn.width = Max(pIn->unalignedWidth, 1u);
4413
localIn.height = Max(pIn->unalignedHeight, 1u);
4414
localIn.numSlices = Max(pIn->numSlices, 1u);
4415
localIn.numMipLevels = Max(pIn->numMipLevels, 1u);
4416
localIn.numSamples = Max(pIn->numSamples, 1u);
4417
localIn.numFrags = Max(pIn->numFrags, 1u);
4418
localOut.pMipInfo = mipInfo;
4419
4420
ADDR_E_RETURNCODE ret = ComputeSurfaceInfoMacroTiled(&localIn, &localOut);
4421
4422
if (ret == ADDR_OK)
4423
{
4424
const UINT_32 elemLog2 = Log2(pIn->bpp >> 3);
4425
const UINT_32 blkSizeLog2 = GetBlockSizeLog2(pIn->swizzleMode);
4426
const UINT_32 blkMask = (1 << blkSizeLog2) - 1;
4427
const UINT_32 pipeMask = (1 << m_pipesLog2) - 1;
4428
const UINT_32 bankMask = ((1 << GetBankXorBits(blkSizeLog2)) - 1) << (m_pipesLog2 + ColumnBits);
4429
const UINT_32 pipeBankXor = IsXor(pIn->swizzleMode) ?
4430
(((pIn->pipeBankXor & (pipeMask | bankMask)) << m_pipeInterleaveLog2) & blkMask) : 0;
4431
4432
if (localIn.numFrags > 1)
4433
{
4434
const ADDR_SW_PATINFO* pPatInfo = GetSwizzlePatternInfo(pIn->swizzleMode,
4435
pIn->resourceType,
4436
elemLog2,
4437
localIn.numFrags);
4438
4439
if (pPatInfo != NULL)
4440
{
4441
const UINT_32 pb = localOut.pitch / localOut.blockWidth;
4442
const UINT_32 yb = pIn->y / localOut.blockHeight;
4443
const UINT_32 xb = pIn->x / localOut.blockWidth;
4444
const UINT_64 blkIdx = yb * pb + xb;
4445
4446
ADDR_BIT_SETTING fullSwizzlePattern[20];
4447
GetSwizzlePatternFromPatternInfo(pPatInfo, fullSwizzlePattern);
4448
4449
const UINT_32 blkOffset =
4450
ComputeOffsetFromSwizzlePattern(reinterpret_cast<const UINT_64*>(fullSwizzlePattern),
4451
blkSizeLog2,
4452
pIn->x,
4453
pIn->y,
4454
pIn->slice,
4455
pIn->sample);
4456
4457
pOut->addr = (localOut.sliceSize * pIn->slice) +
4458
(blkIdx << blkSizeLog2) +
4459
(blkOffset ^ pipeBankXor);
4460
}
4461
else
4462
{
4463
ret = ADDR_INVALIDPARAMS;
4464
}
4465
}
4466
else
4467
{
4468
const UINT_32 rsrcIdx = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? 1 : 0;
4469
const UINT_32 swMode = static_cast<UINT_32>(pIn->swizzleMode);
4470
const UINT_32 eqIndex = m_equationLookupTable[rsrcIdx][swMode][elemLog2];
4471
4472
if (eqIndex != ADDR_INVALID_EQUATION_INDEX)
4473
{
4474
const BOOL_32 inTail = (mipInfo[pIn->mipId].mipTailOffset != 0) ? TRUE : FALSE;
4475
const BOOL_32 isThin = IsThin(pIn->resourceType, pIn->swizzleMode);
4476
const UINT_64 sliceSize = isThin ? localOut.sliceSize : (localOut.sliceSize * localOut.blockSlices);
4477
const UINT_32 sliceId = isThin ? pIn->slice : (pIn->slice / localOut.blockSlices);
4478
const UINT_32 x = inTail ? (pIn->x + mipInfo[pIn->mipId].mipTailCoordX) : pIn->x;
4479
const UINT_32 y = inTail ? (pIn->y + mipInfo[pIn->mipId].mipTailCoordY) : pIn->y;
4480
const UINT_32 z = inTail ? (pIn->slice + mipInfo[pIn->mipId].mipTailCoordZ) : pIn->slice;
4481
const UINT_32 pb = mipInfo[pIn->mipId].pitch / localOut.blockWidth;
4482
const UINT_32 yb = pIn->y / localOut.blockHeight;
4483
const UINT_32 xb = pIn->x / localOut.blockWidth;
4484
const UINT_64 blkIdx = yb * pb + xb;
4485
const UINT_32 blkOffset = ComputeOffsetFromEquation(&m_equationTable[eqIndex],
4486
x << elemLog2,
4487
y,
4488
z);
4489
pOut->addr = sliceSize * sliceId +
4490
mipInfo[pIn->mipId].macroBlockOffset +
4491
(blkIdx << blkSizeLog2) +
4492
(blkOffset ^ pipeBankXor);
4493
}
4494
else
4495
{
4496
ret = ADDR_INVALIDPARAMS;
4497
}
4498
}
4499
}
4500
4501
return ret;
4502
}
4503
4504
/**
4505
************************************************************************************************************************
4506
* Gfx10Lib::HwlComputeMaxBaseAlignments
4507
*
4508
* @brief
4509
* Gets maximum alignments
4510
* @return
4511
* maximum alignments
4512
************************************************************************************************************************
4513
*/
4514
UINT_32 Gfx10Lib::HwlComputeMaxBaseAlignments() const
4515
{
4516
return m_blockVarSizeLog2 ? Max(Size64K, 1u << m_blockVarSizeLog2) : Size64K;
4517
}
4518
4519
/**
4520
************************************************************************************************************************
4521
* Gfx10Lib::HwlComputeMaxMetaBaseAlignments
4522
*
4523
* @brief
4524
* Gets maximum alignments for metadata
4525
* @return
4526
* maximum alignments for metadata
4527
************************************************************************************************************************
4528
*/
4529
UINT_32 Gfx10Lib::HwlComputeMaxMetaBaseAlignments() const
4530
{
4531
Dim3d metaBlk;
4532
4533
const AddrSwizzleMode ValidSwizzleModeForXmask[] =
4534
{
4535
ADDR_SW_64KB_Z_X,
4536
m_blockVarSizeLog2 ? ADDR_SW_VAR_Z_X : ADDR_SW_64KB_Z_X,
4537
};
4538
4539
UINT_32 maxBaseAlignHtile = 0;
4540
UINT_32 maxBaseAlignCmask = 0;
4541
4542
for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForXmask) / sizeof(ValidSwizzleModeForXmask[0]); swIdx++)
4543
{
4544
for (UINT_32 bppLog2 = 0; bppLog2 < 3; bppLog2++)
4545
{
4546
for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4547
{
4548
// Max base alignment for Htile
4549
const UINT_32 metaBlkSizeHtile = GetMetaBlkSize(Gfx10DataDepthStencil,
4550
ADDR_RSRC_TEX_2D,
4551
ValidSwizzleModeForXmask[swIdx],
4552
bppLog2,
4553
numFragLog2,
4554
TRUE,
4555
&metaBlk);
4556
4557
maxBaseAlignHtile = Max(maxBaseAlignHtile, metaBlkSizeHtile);
4558
}
4559
}
4560
4561
// Max base alignment for Cmask
4562
const UINT_32 metaBlkSizeCmask = GetMetaBlkSize(Gfx10DataFmask,
4563
ADDR_RSRC_TEX_2D,
4564
ValidSwizzleModeForXmask[swIdx],
4565
0,
4566
0,
4567
TRUE,
4568
&metaBlk);
4569
4570
maxBaseAlignCmask = Max(maxBaseAlignCmask, metaBlkSizeCmask);
4571
}
4572
4573
// Max base alignment for 2D Dcc
4574
const AddrSwizzleMode ValidSwizzleModeForDcc2D[] =
4575
{
4576
ADDR_SW_64KB_S_X,
4577
ADDR_SW_64KB_D_X,
4578
ADDR_SW_64KB_R_X,
4579
m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4580
};
4581
4582
UINT_32 maxBaseAlignDcc2D = 0;
4583
4584
for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc2D) / sizeof(ValidSwizzleModeForDcc2D[0]); swIdx++)
4585
{
4586
for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4587
{
4588
for (UINT_32 numFragLog2 = 0; numFragLog2 < 4; numFragLog2++)
4589
{
4590
const UINT_32 metaBlkSize2D = GetMetaBlkSize(Gfx10DataColor,
4591
ADDR_RSRC_TEX_2D,
4592
ValidSwizzleModeForDcc2D[swIdx],
4593
bppLog2,
4594
numFragLog2,
4595
TRUE,
4596
&metaBlk);
4597
4598
maxBaseAlignDcc2D = Max(maxBaseAlignDcc2D, metaBlkSize2D);
4599
}
4600
}
4601
}
4602
4603
// Max base alignment for 3D Dcc
4604
const AddrSwizzleMode ValidSwizzleModeForDcc3D[] =
4605
{
4606
ADDR_SW_64KB_Z_X,
4607
ADDR_SW_64KB_S_X,
4608
ADDR_SW_64KB_D_X,
4609
ADDR_SW_64KB_R_X,
4610
m_blockVarSizeLog2 ? ADDR_SW_VAR_R_X : ADDR_SW_64KB_R_X,
4611
};
4612
4613
UINT_32 maxBaseAlignDcc3D = 0;
4614
4615
for (UINT_32 swIdx = 0; swIdx < sizeof(ValidSwizzleModeForDcc3D) / sizeof(ValidSwizzleModeForDcc3D[0]); swIdx++)
4616
{
4617
for (UINT_32 bppLog2 = 0; bppLog2 < MaxNumOfBpp; bppLog2++)
4618
{
4619
const UINT_32 metaBlkSize3D = GetMetaBlkSize(Gfx10DataColor,
4620
ADDR_RSRC_TEX_3D,
4621
ValidSwizzleModeForDcc3D[swIdx],
4622
bppLog2,
4623
0,
4624
TRUE,
4625
&metaBlk);
4626
4627
maxBaseAlignDcc3D = Max(maxBaseAlignDcc3D, metaBlkSize3D);
4628
}
4629
}
4630
4631
return Max(Max(maxBaseAlignHtile, maxBaseAlignCmask), Max(maxBaseAlignDcc2D, maxBaseAlignDcc3D));
4632
}
4633
4634
/**
4635
************************************************************************************************************************
4636
* Gfx10Lib::GetMetaElementSizeLog2
4637
*
4638
* @brief
4639
* Gets meta data element size log2
4640
* @return
4641
* Meta data element size log2
4642
************************************************************************************************************************
4643
*/
4644
INT_32 Gfx10Lib::GetMetaElementSizeLog2(
4645
Gfx10DataType dataType) ///< Data surface type
4646
{
4647
INT_32 elemSizeLog2 = 0;
4648
4649
if (dataType == Gfx10DataColor)
4650
{
4651
elemSizeLog2 = 0;
4652
}
4653
else if (dataType == Gfx10DataDepthStencil)
4654
{
4655
elemSizeLog2 = 2;
4656
}
4657
else
4658
{
4659
ADDR_ASSERT(dataType == Gfx10DataFmask);
4660
elemSizeLog2 = -1;
4661
}
4662
4663
return elemSizeLog2;
4664
}
4665
4666
/**
4667
************************************************************************************************************************
4668
* Gfx10Lib::GetMetaCacheSizeLog2
4669
*
4670
* @brief
4671
* Gets meta data cache line size log2
4672
* @return
4673
* Meta data cache line size log2
4674
************************************************************************************************************************
4675
*/
4676
INT_32 Gfx10Lib::GetMetaCacheSizeLog2(
4677
Gfx10DataType dataType) ///< Data surface type
4678
{
4679
INT_32 cacheSizeLog2 = 0;
4680
4681
if (dataType == Gfx10DataColor)
4682
{
4683
cacheSizeLog2 = 6;
4684
}
4685
else if (dataType == Gfx10DataDepthStencil)
4686
{
4687
cacheSizeLog2 = 8;
4688
}
4689
else
4690
{
4691
ADDR_ASSERT(dataType == Gfx10DataFmask);
4692
cacheSizeLog2 = 8;
4693
}
4694
return cacheSizeLog2;
4695
}
4696
4697
/**
4698
************************************************************************************************************************
4699
* Gfx10Lib::HwlComputeSurfaceInfoLinear
4700
*
4701
* @brief
4702
* Internal function to calculate alignment for linear surface
4703
*
4704
* @return
4705
* ADDR_E_RETURNCODE
4706
************************************************************************************************************************
4707
*/
4708
ADDR_E_RETURNCODE Gfx10Lib::HwlComputeSurfaceInfoLinear(
4709
const ADDR2_COMPUTE_SURFACE_INFO_INPUT* pIn, ///< [in] input structure
4710
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT* pOut ///< [out] output structure
4711
) const
4712
{
4713
ADDR_E_RETURNCODE returnCode = ADDR_OK;
4714
4715
if (IsTex1d(pIn->resourceType) && (pIn->height > 1))
4716
{
4717
returnCode = ADDR_INVALIDPARAMS;
4718
}
4719
else
4720
{
4721
const UINT_32 elementBytes = pIn->bpp >> 3;
4722
const UINT_32 pitchAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? 1 : (256 / elementBytes);
4723
const UINT_32 mipDepth = (pIn->resourceType == ADDR_RSRC_TEX_3D) ? pIn->numSlices : 1;
4724
UINT_32 pitch = PowTwoAlign(pIn->width, pitchAlign);
4725
UINT_32 actualHeight = pIn->height;
4726
UINT_64 sliceSize = 0;
4727
4728
if (pIn->numMipLevels > 1)
4729
{
4730
for (INT_32 i = static_cast<INT_32>(pIn->numMipLevels) - 1; i >= 0; i--)
4731
{
4732
UINT_32 mipWidth, mipHeight;
4733
4734
GetMipSize(pIn->width, pIn->height, 1, i, &mipWidth, &mipHeight);
4735
4736
const UINT_32 mipActualWidth = PowTwoAlign(mipWidth, pitchAlign);
4737
4738
if (pOut->pMipInfo != NULL)
4739
{
4740
pOut->pMipInfo[i].pitch = mipActualWidth;
4741
pOut->pMipInfo[i].height = mipHeight;
4742
pOut->pMipInfo[i].depth = mipDepth;
4743
pOut->pMipInfo[i].offset = sliceSize;
4744
pOut->pMipInfo[i].mipTailOffset = 0;
4745
pOut->pMipInfo[i].macroBlockOffset = sliceSize;
4746
}
4747
4748
sliceSize += static_cast<UINT_64>(mipActualWidth) * mipHeight * elementBytes;
4749
}
4750
}
4751
else
4752
{
4753
returnCode = ApplyCustomizedPitchHeight(pIn, elementBytes, pitchAlign, &pitch, &actualHeight);
4754
4755
if (returnCode == ADDR_OK)
4756
{
4757
sliceSize = static_cast<UINT_64>(pitch) * actualHeight * elementBytes;
4758
4759
if (pOut->pMipInfo != NULL)
4760
{
4761
pOut->pMipInfo[0].pitch = pitch;
4762
pOut->pMipInfo[0].height = actualHeight;
4763
pOut->pMipInfo[0].depth = mipDepth;
4764
pOut->pMipInfo[0].offset = 0;
4765
pOut->pMipInfo[0].mipTailOffset = 0;
4766
pOut->pMipInfo[0].macroBlockOffset = 0;
4767
}
4768
}
4769
}
4770
4771
if (returnCode == ADDR_OK)
4772
{
4773
pOut->pitch = pitch;
4774
pOut->height = actualHeight;
4775
pOut->numSlices = pIn->numSlices;
4776
pOut->sliceSize = sliceSize;
4777
pOut->surfSize = sliceSize * pOut->numSlices;
4778
pOut->baseAlign = (pIn->swizzleMode == ADDR_SW_LINEAR_GENERAL) ? elementBytes : 256;
4779
pOut->blockWidth = pitchAlign;
4780
pOut->blockHeight = 1;
4781
pOut->blockSlices = 1;
4782
4783
// Following members are useless on GFX10
4784
pOut->mipChainPitch = 0;
4785
pOut->mipChainHeight = 0;
4786
pOut->mipChainSlice = 0;
4787
pOut->epitchIsHeight = FALSE;
4788
4789
// Post calculation validate
4790
ADDR_ASSERT(pOut->sliceSize > 0);
4791
}
4792
}
4793
4794
return returnCode;
4795
}
4796
4797
} // V2
4798
} // Addr
4799
4800