Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/common/ac_surface.c
7132 views
1
/*
2
* Copyright © 2011 Red Hat All Rights Reserved.
3
* Copyright © 2017 Advanced Micro Devices, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining
7
* a copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
22
*
23
* The above copyright notice and this permission notice (including the
24
* next paragraph) shall be included in all copies or substantial portions
25
* of the Software.
26
*/
27
28
#define AC_SURFACE_INCLUDE_NIR
29
#include "ac_surface.h"
30
31
#include "ac_drm_fourcc.h"
32
#include "ac_gpu_info.h"
33
#include "addrlib/inc/addrinterface.h"
34
#include "addrlib/src/amdgpu_asic_addr.h"
35
#include "amd_family.h"
36
#include "sid.h"
37
#include "util/hash_table.h"
38
#include "util/macros.h"
39
#include "util/simple_mtx.h"
40
#include "util/u_atomic.h"
41
#include "util/format/u_format.h"
42
#include "util/u_math.h"
43
#include "util/u_memory.h"
44
45
#include <errno.h>
46
#include <stdio.h>
47
#include <stdlib.h>
48
49
#ifdef _WIN32
50
#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0
51
#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf
52
#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4
53
#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f
54
#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9
55
#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7
56
#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12
57
#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7
58
#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15
59
#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3
60
#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17
61
#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3
62
#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19
63
#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3
64
#define AMDGPU_TILING_NUM_BANKS_SHIFT 21
65
#define AMDGPU_TILING_NUM_BANKS_MASK 0x3
66
#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0
67
#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f
68
#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5
69
#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF
70
#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29
71
#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF
72
#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43
73
#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1
74
#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44
75
#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1
76
#define AMDGPU_TILING_SCANOUT_SHIFT 63
77
#define AMDGPU_TILING_SCANOUT_MASK 0x1
78
#define AMDGPU_TILING_SET(field, value) \
79
(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
80
#define AMDGPU_TILING_GET(value, field) \
81
(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
82
#else
83
#include "drm-uapi/amdgpu_drm.h"
84
#endif
85
86
#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
87
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
88
#endif
89
90
#ifndef CIASICIDGFXENGINE_ARCTICISLAND
91
#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
92
#endif
93
94
struct ac_addrlib {
95
ADDR_HANDLE handle;
96
};
97
98
bool ac_modifier_has_dcc(uint64_t modifier)
99
{
100
return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
101
}
102
103
bool ac_modifier_has_dcc_retile(uint64_t modifier)
104
{
105
return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier);
106
}
107
108
static
109
AddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier)
110
{
111
if (modifier == DRM_FORMAT_MOD_LINEAR)
112
return ADDR_SW_LINEAR;
113
114
return AMD_FMT_MOD_GET(TILE, modifier);
115
}
116
static void
117
ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf,
118
ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info)
119
{
120
assert(ac_modifier_has_dcc(modifier));
121
122
if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
123
surf_info->flags.metaPipeUnaligned = 0;
124
} else {
125
surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
126
}
127
128
/* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on
129
* non-displayable DCC surfaces just because num_render_backends = 1 */
130
surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
131
AMD_FMT_MOD_GET(RB, modifier) == 0 &&
132
surf_info->flags.metaPipeUnaligned;
133
134
surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
135
surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
136
surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
137
}
138
139
bool ac_is_modifier_supported(const struct radeon_info *info,
140
const struct ac_modifier_options *options,
141
enum pipe_format format,
142
uint64_t modifier)
143
{
144
145
if (util_format_is_compressed(format) ||
146
util_format_is_depth_or_stencil(format) ||
147
util_format_get_blocksizebits(format) > 64)
148
return false;
149
150
if (info->chip_class < GFX9)
151
return false;
152
153
if(modifier == DRM_FORMAT_MOD_LINEAR)
154
return true;
155
156
/* GFX8 may need a different modifier for each plane */
157
if (info->chip_class < GFX9 && util_format_get_num_planes(format) > 1)
158
return false;
159
160
uint32_t allowed_swizzles = 0xFFFFFFFF;
161
switch(info->chip_class) {
162
case GFX9:
163
allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;
164
break;
165
case GFX10:
166
case GFX10_3:
167
allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660;
168
break;
169
default:
170
return false;
171
}
172
173
if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles))
174
return false;
175
176
if (ac_modifier_has_dcc(modifier)) {
177
/* TODO: support multi-planar formats with DCC */
178
if (util_format_get_num_planes(format) > 1)
179
return false;
180
181
if (!info->has_graphics)
182
return false;
183
184
if (!options->dcc)
185
return false;
186
187
if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile)
188
return false;
189
}
190
191
return true;
192
}
193
194
bool ac_get_supported_modifiers(const struct radeon_info *info,
195
const struct ac_modifier_options *options,
196
enum pipe_format format,
197
unsigned *mod_count,
198
uint64_t *mods)
199
{
200
unsigned current_mod = 0;
201
202
#define ADD_MOD(name) \
203
if (ac_is_modifier_supported(info, options, format, (name))) { \
204
if (mods && current_mod < *mod_count) \
205
mods[current_mod] = (name); \
206
++current_mod; \
207
}
208
209
/* The modifiers have to be added in descending order of estimated
210
* performance. The drivers will prefer modifiers that come earlier
211
* in the list. */
212
switch (info->chip_class) {
213
case GFX9: {
214
unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +
215
G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);
216
unsigned bank_xor_bits = MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits);
217
unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);
218
unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +
219
G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config);
220
221
uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) |
222
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
223
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
224
AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) |
225
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
226
AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits);
227
228
ADD_MOD(AMD_FMT_MOD |
229
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
230
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
231
AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
232
common_dcc |
233
AMD_FMT_MOD_SET(PIPE, pipes) |
234
AMD_FMT_MOD_SET(RB, rb))
235
236
ADD_MOD(AMD_FMT_MOD |
237
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
238
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
239
AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
240
common_dcc |
241
AMD_FMT_MOD_SET(PIPE, pipes) |
242
AMD_FMT_MOD_SET(RB, rb))
243
244
if (util_format_get_blocksizebits(format) == 32) {
245
if (info->max_render_backends == 1) {
246
ADD_MOD(AMD_FMT_MOD |
247
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
248
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
249
common_dcc);
250
}
251
252
253
ADD_MOD(AMD_FMT_MOD |
254
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
255
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
256
AMD_FMT_MOD_SET(DCC_RETILE, 1) |
257
common_dcc |
258
AMD_FMT_MOD_SET(PIPE, pipes) |
259
AMD_FMT_MOD_SET(RB, rb))
260
}
261
262
263
ADD_MOD(AMD_FMT_MOD |
264
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
265
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
266
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
267
AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
268
269
ADD_MOD(AMD_FMT_MOD |
270
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
271
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
272
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
273
AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
274
275
ADD_MOD(AMD_FMT_MOD |
276
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
277
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
278
279
ADD_MOD(AMD_FMT_MOD |
280
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
281
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
282
283
ADD_MOD(DRM_FORMAT_MOD_LINEAR)
284
break;
285
}
286
case GFX10:
287
case GFX10_3: {
288
bool rbplus = info->chip_class >= GFX10_3;
289
unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
290
unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;
291
292
unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10;
293
uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) |
294
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
295
AMD_FMT_MOD_SET(DCC, 1) |
296
AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
297
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
298
AMD_FMT_MOD_SET(PACKERS, pkrs);
299
300
ADD_MOD(AMD_FMT_MOD | common_dcc |
301
AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
302
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
303
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
304
305
if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14 || info->chip_class >= GFX10_3) {
306
bool independent_128b = info->chip_class >= GFX10_3;
307
308
if (info->max_render_backends == 1) {
309
ADD_MOD(AMD_FMT_MOD | common_dcc |
310
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
311
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |
312
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
313
}
314
315
ADD_MOD(AMD_FMT_MOD | common_dcc |
316
AMD_FMT_MOD_SET(DCC_RETILE, 1) |
317
AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
318
AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, independent_128b) |
319
AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
320
}
321
322
ADD_MOD(AMD_FMT_MOD |
323
AMD_FMT_MOD_SET(TILE_VERSION, version) |
324
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
325
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
326
AMD_FMT_MOD_SET(PACKERS, pkrs))
327
328
ADD_MOD(AMD_FMT_MOD |
329
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
330
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
331
AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits))
332
333
if (util_format_get_blocksizebits(format) != 32) {
334
ADD_MOD(AMD_FMT_MOD |
335
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
336
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
337
}
338
339
ADD_MOD(AMD_FMT_MOD |
340
AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
341
AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
342
343
ADD_MOD(DRM_FORMAT_MOD_LINEAR)
344
break;
345
}
346
default:
347
break;
348
}
349
350
#undef ADD_MOD
351
352
if (!mods) {
353
*mod_count = current_mod;
354
return true;
355
}
356
357
bool complete = current_mod <= *mod_count;
358
*mod_count = MIN2(*mod_count, current_mod);
359
return complete;
360
}
361
362
static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)
363
{
364
return malloc(pInput->sizeInBytes);
365
}
366
367
static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)
368
{
369
free(pInput->pVirtAddr);
370
return ADDR_OK;
371
}
372
373
struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
374
uint64_t *max_alignment)
375
{
376
ADDR_CREATE_INPUT addrCreateInput = {0};
377
ADDR_CREATE_OUTPUT addrCreateOutput = {0};
378
ADDR_REGISTER_VALUE regValue = {0};
379
ADDR_CREATE_FLAGS createFlags = {{0}};
380
ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
381
ADDR_E_RETURNCODE addrRet;
382
383
addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
384
addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
385
386
regValue.gbAddrConfig = info->gb_addr_config;
387
createFlags.value = 0;
388
389
addrCreateInput.chipFamily = info->family_id;
390
addrCreateInput.chipRevision = info->chip_external_rev;
391
392
if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
393
return NULL;
394
395
if (addrCreateInput.chipFamily >= FAMILY_AI) {
396
addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
397
} else {
398
regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;
399
regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;
400
401
regValue.backendDisables = info->enabled_rb_mask;
402
regValue.pTileConfig = info->si_tile_mode_array;
403
regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array);
404
if (addrCreateInput.chipFamily == FAMILY_SI) {
405
regValue.pMacroTileConfig = NULL;
406
regValue.noOfMacroEntries = 0;
407
} else {
408
regValue.pMacroTileConfig = info->cik_macrotile_mode_array;
409
regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array);
410
}
411
412
createFlags.useTileIndex = 1;
413
createFlags.useHtileSliceAlign = 1;
414
415
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
416
}
417
418
addrCreateInput.callbacks.allocSysMem = allocSysMem;
419
addrCreateInput.callbacks.freeSysMem = freeSysMem;
420
addrCreateInput.callbacks.debugPrint = 0;
421
addrCreateInput.createFlags = createFlags;
422
addrCreateInput.regValue = regValue;
423
424
addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
425
if (addrRet != ADDR_OK)
426
return NULL;
427
428
if (max_alignment) {
429
addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
430
if (addrRet == ADDR_OK) {
431
*max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
432
}
433
}
434
435
struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
436
if (!addrlib) {
437
AddrDestroy(addrCreateOutput.hLib);
438
return NULL;
439
}
440
441
addrlib->handle = addrCreateOutput.hLib;
442
return addrlib;
443
}
444
445
void ac_addrlib_destroy(struct ac_addrlib *addrlib)
446
{
447
AddrDestroy(addrlib->handle);
448
free(addrlib);
449
}
450
451
void *ac_addrlib_get_handle(struct ac_addrlib *addrlib)
452
{
453
return addrlib->handle;
454
}
455
456
static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)
457
{
458
/* FMASK is allocated together with the color surface and can't be
459
* allocated separately.
460
*/
461
assert(!(flags & RADEON_SURF_FMASK));
462
if (flags & RADEON_SURF_FMASK)
463
return -EINVAL;
464
465
/* all dimension must be at least 1 ! */
466
if (!config->info.width || !config->info.height || !config->info.depth ||
467
!config->info.array_size || !config->info.levels)
468
return -EINVAL;
469
470
switch (config->info.samples) {
471
case 0:
472
case 1:
473
case 2:
474
case 4:
475
case 8:
476
break;
477
case 16:
478
if (flags & RADEON_SURF_Z_OR_SBUFFER)
479
return -EINVAL;
480
break;
481
default:
482
return -EINVAL;
483
}
484
485
if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
486
switch (config->info.storage_samples) {
487
case 0:
488
case 1:
489
case 2:
490
case 4:
491
case 8:
492
break;
493
default:
494
return -EINVAL;
495
}
496
}
497
498
if (config->is_3d && config->info.array_size > 1)
499
return -EINVAL;
500
if (config->is_cube && config->info.depth > 1)
501
return -EINVAL;
502
503
return 0;
504
}
505
506
static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,
507
struct radeon_surf *surf, bool is_stencil, unsigned level,
508
bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
509
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
510
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
511
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
512
ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
513
ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
514
{
515
struct legacy_surf_level *surf_level;
516
struct legacy_surf_dcc_level *dcc_level;
517
ADDR_E_RETURNCODE ret;
518
519
AddrSurfInfoIn->mipLevel = level;
520
AddrSurfInfoIn->width = u_minify(config->info.width, level);
521
AddrSurfInfoIn->height = u_minify(config->info.height, level);
522
523
/* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
524
* because GFX9 needs linear alignment of 256 bytes.
525
*/
526
if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
527
AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
528
unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
529
530
AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
531
}
532
533
/* addrlib assumes the bytes/pixel is a divisor of 64, which is not
534
* true for r32g32b32 formats. */
535
if (AddrSurfInfoIn->bpp == 96) {
536
assert(config->info.levels == 1);
537
assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
538
539
/* The least common multiple of 64 bytes and 12 bytes/pixel is
540
* 192 bytes, or 16 pixels. */
541
AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
542
}
543
544
if (config->is_3d)
545
AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
546
else if (config->is_cube)
547
AddrSurfInfoIn->numSlices = 6;
548
else
549
AddrSurfInfoIn->numSlices = config->info.array_size;
550
551
if (level > 0) {
552
/* Set the base level pitch. This is needed for calculation
553
* of non-zero levels. */
554
if (is_stencil)
555
AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x;
556
else
557
AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
558
559
/* Convert blocks to pixels for compressed formats. */
560
if (compressed)
561
AddrSurfInfoIn->basePitch *= surf->blk_w;
562
}
563
564
ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);
565
if (ret != ADDR_OK) {
566
return ret;
567
}
568
569
surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level];
570
dcc_level = &surf->u.legacy.color.dcc_level[level];
571
surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256;
572
surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
573
surf_level->nblk_x = AddrSurfInfoOut->pitch;
574
surf_level->nblk_y = AddrSurfInfoOut->height;
575
576
switch (AddrSurfInfoOut->tileMode) {
577
case ADDR_TM_LINEAR_ALIGNED:
578
surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
579
break;
580
case ADDR_TM_1D_TILED_THIN1:
581
case ADDR_TM_PRT_TILED_THIN1:
582
surf_level->mode = RADEON_SURF_MODE_1D;
583
break;
584
case ADDR_TM_2D_TILED_THIN1:
585
case ADDR_TM_PRT_2D_TILED_THIN1:
586
surf_level->mode = RADEON_SURF_MODE_2D;
587
break;
588
default:
589
assert(0);
590
}
591
592
if (is_stencil)
593
surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
594
else
595
surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
596
597
if (AddrSurfInfoIn->flags.prt) {
598
if (level == 0) {
599
surf->prt_tile_width = AddrSurfInfoOut->pitchAlign;
600
surf->prt_tile_height = AddrSurfInfoOut->heightAlign;
601
}
602
if (surf_level->nblk_x >= surf->prt_tile_width &&
603
surf_level->nblk_y >= surf->prt_tile_height) {
604
/* +1 because the current level is not in the miptail */
605
surf->first_mip_tail_level = level + 1;
606
}
607
}
608
609
surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize;
610
611
/* Clear DCC fields at the beginning. */
612
if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil)
613
dcc_level->dcc_offset = 0;
614
615
/* The previous level's flag tells us if we can use DCC for this level. */
616
if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {
617
bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;
618
619
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
620
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
621
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
622
AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
623
AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
624
625
ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
626
627
if (ret == ADDR_OK) {
628
dcc_level->dcc_offset = surf->meta_size;
629
surf->num_meta_levels = level + 1;
630
surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize;
631
surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign));
632
633
/* If the DCC size of a subresource (1 mip level or 1 slice)
634
* is not aligned, the DCC memory layout is not contiguous for
635
* that subresource, which means we can't use fast clear.
636
*
637
* We only do fast clears for whole mipmap levels. If we did
638
* per-slice fast clears, the same restriction would apply.
639
* (i.e. only compute the slice size and see if it's aligned)
640
*
641
* The last level can be non-contiguous and still be clearable
642
* if it's interleaved with the next level that doesn't exist.
643
*/
644
if (AddrDccOut->dccRamSizeAligned ||
645
(prev_level_clearable && level == config->info.levels - 1))
646
dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
647
else
648
dcc_level->dcc_fast_clear_size = 0;
649
650
/* Compute the DCC slice size because addrlib doesn't
651
* provide this info. As DCC memory is linear (each
652
* slice is the same size) it's easy to compute.
653
*/
654
surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
655
656
/* For arrays, we have to compute the DCC info again
657
* with one slice size to get a correct fast clear
658
* size.
659
*/
660
if (config->info.array_size > 1) {
661
AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
662
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
663
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
664
AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
665
AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
666
667
ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
668
if (ret == ADDR_OK) {
669
/* If the DCC memory isn't properly
670
* aligned, the data are interleaved
671
* accross slices.
672
*/
673
if (AddrDccOut->dccRamSizeAligned)
674
dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
675
else
676
dcc_level->dcc_slice_fast_clear_size = 0;
677
}
678
679
if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
680
surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) {
681
surf->meta_size = 0;
682
surf->num_meta_levels = 0;
683
AddrDccOut->subLvlCompressible = false;
684
}
685
} else {
686
dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size;
687
}
688
}
689
}
690
691
/* HTILE. */
692
if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D &&
693
level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {
694
AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
695
AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
696
AddrHtileIn->height = AddrSurfInfoOut->height;
697
AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
698
AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
699
AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
700
AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
701
AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
702
AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
703
704
ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);
705
706
if (ret == ADDR_OK) {
707
surf->meta_size = AddrHtileOut->htileBytes;
708
surf->meta_slice_size = AddrHtileOut->sliceSize;
709
surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign);
710
surf->meta_pitch = AddrHtileOut->pitch;
711
surf->num_meta_levels = level + 1;
712
}
713
}
714
715
return 0;
716
}
717
718
static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)
719
{
720
uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
721
722
if (info->chip_class >= GFX7)
723
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
724
else
725
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
726
}
727
728
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
729
{
730
unsigned index, tileb;
731
732
tileb = 8 * 8 * surf->bpe;
733
tileb = MIN2(surf->u.legacy.tile_split, tileb);
734
735
for (index = 0; tileb > 64; index++)
736
tileb >>= 1;
737
738
assert(index < 16);
739
return index;
740
}
741
742
static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)
743
{
744
unsigned num_channels = config->info.num_channels;
745
unsigned bpe = surf->bpe;
746
747
/* With modifiers the kernel is in charge of whether it is displayable.
748
* We need to ensure at least 32 pixels pitch alignment, but this is
749
* always the case when the blocksize >= 4K.
750
*/
751
if (surf->modifier != DRM_FORMAT_MOD_INVALID)
752
return false;
753
754
if (!config->is_3d && !config->is_cube && !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
755
surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&
756
surf->blk_h == 1) {
757
/* subsampled */
758
if (surf->blk_w == 2 && surf->blk_h == 1)
759
return true;
760
761
if (/* RGBA8 or RGBA16F */
762
(bpe >= 4 && bpe <= 8 && num_channels == 4) ||
763
/* R5G6B5 or R5G5B5A1 */
764
(bpe == 2 && num_channels >= 3) ||
765
/* C8 palette */
766
(bpe == 1 && num_channels == 1))
767
return true;
768
}
769
return false;
770
}
771
772
/**
773
* This must be called after the first level is computed.
774
*
775
* Copy surface-global settings like pipe/bank config from level 0 surface
776
* computation, and compute tile swizzle.
777
*/
778
static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,
779
const struct ac_surf_config *config,
780
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)
781
{
782
surf->surf_alignment_log2 = util_logbase2(csio->baseAlign);
783
surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
784
gfx6_set_micro_tile_mode(surf, info);
785
786
/* For 2D modes only. */
787
if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
788
surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
789
surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
790
surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
791
surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
792
surf->u.legacy.num_banks = csio->pTileInfo->banks;
793
surf->u.legacy.macro_tile_index = csio->macroModeIndex;
794
} else {
795
surf->u.legacy.macro_tile_index = 0;
796
}
797
798
/* Compute tile swizzle. */
799
/* TODO: fix tile swizzle with mipmapping for GFX6 */
800
if ((info->chip_class >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
801
surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
802
!(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
803
!get_display_flag(config, surf)) {
804
ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
805
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
806
807
AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
808
AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
809
810
AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
811
AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
812
AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
813
AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
814
AddrBaseSwizzleIn.tileMode = csio->tileMode;
815
816
int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
817
if (r != ADDR_OK)
818
return r;
819
820
assert(AddrBaseSwizzleOut.tileSwizzle <=
821
u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
822
surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
823
}
824
return 0;
825
}
826
827
static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
828
struct radeon_surf *surf)
829
{
830
unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
831
unsigned num_pipes = info->num_tile_pipes;
832
unsigned cl_width, cl_height;
833
834
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
835
(config->info.samples >= 2 && !surf->fmask_size))
836
return;
837
838
assert(info->chip_class <= GFX8);
839
840
switch (num_pipes) {
841
case 2:
842
cl_width = 32;
843
cl_height = 16;
844
break;
845
case 4:
846
cl_width = 32;
847
cl_height = 32;
848
break;
849
case 8:
850
cl_width = 64;
851
cl_height = 32;
852
break;
853
case 16: /* Hawaii */
854
cl_width = 64;
855
cl_height = 64;
856
break;
857
default:
858
assert(0);
859
return;
860
}
861
862
unsigned base_align = num_pipes * pipe_interleave_bytes;
863
864
unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
865
unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
866
unsigned slice_elements = (width * height) / (8 * 8);
867
868
/* Each element of CMASK is a nibble. */
869
unsigned slice_bytes = slice_elements / 2;
870
871
surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128);
872
if (surf->u.legacy.color.cmask_slice_tile_max)
873
surf->u.legacy.color.cmask_slice_tile_max -= 1;
874
875
unsigned num_layers;
876
if (config->is_3d)
877
num_layers = config->info.depth;
878
else if (config->is_cube)
879
num_layers = 6;
880
else
881
num_layers = config->info.array_size;
882
883
surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align));
884
surf->cmask_slice_size = align(slice_bytes, base_align);
885
surf->cmask_size = surf->cmask_slice_size * num_layers;
886
}
887
888
/**
889
* Fill in the tiling information in \p surf based on the given surface config.
890
*
891
* The following fields of \p surf must be initialized by the caller:
892
* blk_w, blk_h, bpe, flags.
893
*/
894
static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
895
const struct ac_surf_config *config, enum radeon_surf_mode mode,
896
struct radeon_surf *surf)
897
{
898
unsigned level;
899
bool compressed;
900
ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
901
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
902
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
903
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
904
ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
905
ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
906
ADDR_TILEINFO AddrTileInfoIn = {0};
907
ADDR_TILEINFO AddrTileInfoOut = {0};
908
int r;
909
910
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
911
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
912
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
913
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
914
AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
915
AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
916
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
917
918
compressed = surf->blk_w == 4 && surf->blk_h == 4;
919
920
/* MSAA requires 2D tiling. */
921
if (config->info.samples > 1)
922
mode = RADEON_SURF_MODE_2D;
923
924
/* DB doesn't support linear layouts. */
925
if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)
926
mode = RADEON_SURF_MODE_1D;
927
928
/* Set the requested tiling mode. */
929
switch (mode) {
930
case RADEON_SURF_MODE_LINEAR_ALIGNED:
931
AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
932
break;
933
case RADEON_SURF_MODE_1D:
934
if (surf->flags & RADEON_SURF_PRT)
935
AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1;
936
else
937
AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
938
break;
939
case RADEON_SURF_MODE_2D:
940
if (surf->flags & RADEON_SURF_PRT)
941
AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
942
else
943
AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
944
break;
945
default:
946
assert(0);
947
}
948
949
/* The format must be set correctly for the allocation of compressed
950
* textures to work. In other cases, setting the bpp is sufficient.
951
*/
952
if (compressed) {
953
switch (surf->bpe) {
954
case 8:
955
AddrSurfInfoIn.format = ADDR_FMT_BC1;
956
break;
957
case 16:
958
AddrSurfInfoIn.format = ADDR_FMT_BC3;
959
break;
960
default:
961
assert(0);
962
}
963
} else {
964
AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
965
}
966
967
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
968
AddrSurfInfoIn.tileIndex = -1;
969
970
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
971
AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
972
}
973
974
/* Set the micro tile type. */
975
if (surf->flags & RADEON_SURF_SCANOUT)
976
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
977
else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
978
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
979
else
980
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
981
982
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
983
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
984
AddrSurfInfoIn.flags.cube = config->is_cube;
985
AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
986
AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
987
AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
988
AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
989
990
/* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
991
* requested, because TC-compatible HTILE requires 2D tiling.
992
*/
993
AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
994
!AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&
995
!(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
996
997
/* DCC notes:
998
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
999
* with samples >= 4.
1000
* - Mipmapped array textures have low performance (discovered by a closed
1001
* driver team).
1002
*/
1003
AddrSurfInfoIn.flags.dccCompatible =
1004
info->chip_class >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
1005
!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
1006
!compressed &&
1007
((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
1008
1009
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
1010
AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1011
1012
/* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
1013
* for Z and stencil. This can cause a number of problems which we work
1014
* around here:
1015
*
1016
* - a depth part that is incompatible with mipmapped texturing
1017
* - at least on Stoney, entirely incompatible Z/S aspects (e.g.
1018
* incorrect tiling applied to the stencil part, stencil buffer
1019
* memory accesses that go out of bounds) even without mipmapping
1020
*
1021
* Some piglit tests that are prone to different types of related
1022
* failures:
1023
* ./bin/ext_framebuffer_multisample-upsample 2 stencil
1024
* ./bin/framebuffer-blit-levels {draw,read} stencil
1025
* ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
1026
* ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
1027
* ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
1028
*/
1029
int stencil_tile_idx = -1;
1030
1031
if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
1032
(config->info.levels > 1 || info->family == CHIP_STONEY)) {
1033
/* Compute stencilTileIdx that is compatible with the (depth)
1034
* tileIdx. This degrades the depth surface if necessary to
1035
* ensure that a matching stencilTileIdx exists. */
1036
AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
1037
1038
/* Keep the depth mip-tail compatible with texturing. */
1039
AddrSurfInfoIn.flags.noStencil = 1;
1040
}
1041
1042
/* Set preferred macrotile parameters. This is usually required
1043
* for shared resources. This is for 2D tiling only. */
1044
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
1045
AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&
1046
surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
1047
/* If any of these parameters are incorrect, the calculation
1048
* will fail. */
1049
AddrTileInfoIn.banks = surf->u.legacy.num_banks;
1050
AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
1051
AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
1052
AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
1053
AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
1054
AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
1055
AddrSurfInfoIn.flags.opt4Space = 0;
1056
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
1057
1058
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
1059
* the tile index, because we are expected to know it if
1060
* we know the other parameters.
1061
*
1062
* This is something that can easily be fixed in Addrlib.
1063
* For now, just figure it out here.
1064
* Note that only 2D_TILE_THIN1 is handled here.
1065
*/
1066
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1067
assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
1068
1069
if (info->chip_class == GFX6) {
1070
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
1071
if (surf->bpe == 2)
1072
AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
1073
else
1074
AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
1075
} else {
1076
if (surf->bpe == 1)
1077
AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
1078
else if (surf->bpe == 2)
1079
AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
1080
else if (surf->bpe == 4)
1081
AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
1082
else
1083
AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
1084
}
1085
} else {
1086
/* GFX7 - GFX8 */
1087
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
1088
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
1089
else
1090
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
1091
1092
/* Addrlib doesn't set this if tileIndex is forced like above. */
1093
AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
1094
}
1095
}
1096
1097
surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
1098
surf->num_meta_levels = 0;
1099
surf->surf_size = 0;
1100
surf->meta_size = 0;
1101
surf->meta_slice_size = 0;
1102
surf->meta_alignment_log2 = 0;
1103
1104
const bool only_stencil =
1105
(surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
1106
1107
/* Calculate texture layout information. */
1108
if (!only_stencil) {
1109
for (level = 0; level < config->info.levels; level++) {
1110
r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,
1111
&AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,
1112
&AddrHtileOut);
1113
if (r)
1114
return r;
1115
1116
if (level > 0)
1117
continue;
1118
1119
if (!AddrSurfInfoOut.tcCompatible) {
1120
AddrSurfInfoIn.flags.tcCompatible = 0;
1121
surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1122
}
1123
1124
if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
1125
AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
1126
AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
1127
stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
1128
1129
assert(stencil_tile_idx >= 0);
1130
}
1131
1132
r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1133
if (r)
1134
return r;
1135
}
1136
}
1137
1138
/* Calculate texture layout information for stencil. */
1139
if (surf->flags & RADEON_SURF_SBUFFER) {
1140
AddrSurfInfoIn.tileIndex = stencil_tile_idx;
1141
AddrSurfInfoIn.bpp = 8;
1142
AddrSurfInfoIn.flags.depth = 0;
1143
AddrSurfInfoIn.flags.stencil = 1;
1144
AddrSurfInfoIn.flags.tcCompatible = 0;
1145
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
1146
AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
1147
1148
for (level = 0; level < config->info.levels; level++) {
1149
r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,
1150
&AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);
1151
if (r)
1152
return r;
1153
1154
/* DB uses the depth pitch for both stencil and depth. */
1155
if (!only_stencil) {
1156
if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)
1157
surf->u.legacy.stencil_adjusted = true;
1158
} else {
1159
surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x;
1160
}
1161
1162
if (level == 0) {
1163
if (only_stencil) {
1164
r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1165
if (r)
1166
return r;
1167
}
1168
1169
/* For 2D modes only. */
1170
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
1171
surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
1172
}
1173
}
1174
}
1175
}
1176
1177
/* Compute FMASK. */
1178
if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&
1179
!(surf->flags & RADEON_SURF_NO_FMASK)) {
1180
ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
1181
ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1182
ADDR_TILEINFO fmask_tile_info = {0};
1183
1184
fin.size = sizeof(fin);
1185
fout.size = sizeof(fout);
1186
1187
fin.tileMode = AddrSurfInfoOut.tileMode;
1188
fin.pitch = AddrSurfInfoOut.pitch;
1189
fin.height = config->info.height;
1190
fin.numSlices = AddrSurfInfoIn.numSlices;
1191
fin.numSamples = AddrSurfInfoIn.numSamples;
1192
fin.numFrags = AddrSurfInfoIn.numFrags;
1193
fin.tileIndex = -1;
1194
fout.pTileInfo = &fmask_tile_info;
1195
1196
r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
1197
if (r)
1198
return r;
1199
1200
surf->fmask_size = fout.fmaskBytes;
1201
surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
1202
surf->fmask_slice_size = fout.sliceSize;
1203
surf->fmask_tile_swizzle = 0;
1204
1205
surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;
1206
if (surf->u.legacy.color.fmask.slice_tile_max)
1207
surf->u.legacy.color.fmask.slice_tile_max -= 1;
1208
1209
surf->u.legacy.color.fmask.tiling_index = fout.tileIndex;
1210
surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight;
1211
surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch;
1212
1213
/* Compute tile swizzle for FMASK. */
1214
if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {
1215
ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
1216
ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
1217
1218
xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
1219
xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
1220
1221
/* This counter starts from 1 instead of 0. */
1222
xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1223
xin.tileIndex = fout.tileIndex;
1224
xin.macroModeIndex = fout.macroModeIndex;
1225
xin.pTileInfo = fout.pTileInfo;
1226
xin.tileMode = fin.tileMode;
1227
1228
int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
1229
if (r != ADDR_OK)
1230
return r;
1231
1232
assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1233
surf->fmask_tile_swizzle = xout.tileSwizzle;
1234
}
1235
}
1236
1237
/* Recalculate the whole DCC miptree size including disabled levels.
1238
* This is what addrlib does, but calling addrlib would be a lot more
1239
* complicated.
1240
*/
1241
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
1242
/* The smallest miplevels that are never compressed by DCC
1243
* still read the DCC buffer via TC if the base level uses DCC,
1244
* and for some reason the DCC buffer needs to be larger if
1245
* the miptree uses non-zero tile_swizzle. Otherwise there are
1246
* VM faults.
1247
*
1248
* "dcc_alignment * 4" was determined by trial and error.
1249
*/
1250
surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4);
1251
}
1252
1253
/* Make sure HTILE covers the whole miptree, because the shader reads
1254
* TC-compatible HTILE even for levels where it's disabled by DB.
1255
*/
1256
if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) &&
1257
surf->meta_size && config->info.levels > 1) {
1258
/* MSAA can't occur with levels > 1, so ignore the sample count. */
1259
const unsigned total_pixels = surf->surf_size / surf->bpe;
1260
const unsigned htile_block_size = 8 * 8;
1261
const unsigned htile_element_size = 4;
1262
1263
surf->meta_size = (total_pixels / htile_block_size) * htile_element_size;
1264
surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2);
1265
} else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) {
1266
/* Unset this if HTILE is not present. */
1267
surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1268
}
1269
1270
surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
1271
surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
1272
surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
1273
1274
/* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1275
* used at the same time. This case is not currently expected to occur
1276
* because we don't use rotated. Enforce this restriction on all chips
1277
* to facilitate testing.
1278
*/
1279
if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
1280
assert(!"rotate micro tile mode is unsupported");
1281
return ADDR_ERROR;
1282
}
1283
1284
ac_compute_cmask(info, config, surf);
1285
return 0;
1286
}
1287
1288
/* This is only called when expecting a tiled layout. */
1289
static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info,
1290
struct radeon_surf *surf,
1291
ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,
1292
AddrSwizzleMode *swizzle_mode)
1293
{
1294
ADDR_E_RETURNCODE ret;
1295
ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
1296
ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
1297
1298
sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
1299
sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
1300
1301
sin.flags = in->flags;
1302
sin.resourceType = in->resourceType;
1303
sin.format = in->format;
1304
sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
1305
/* TODO: We could allow some of these: */
1306
sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
1307
sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */
1308
sin.bpp = in->bpp;
1309
sin.width = in->width;
1310
sin.height = in->height;
1311
sin.numSlices = in->numSlices;
1312
sin.numMipLevels = in->numMipLevels;
1313
sin.numSamples = in->numSamples;
1314
sin.numFrags = in->numFrags;
1315
1316
if (is_fmask) {
1317
sin.flags.display = 0;
1318
sin.flags.color = 0;
1319
sin.flags.fmask = 1;
1320
}
1321
1322
/* With PRT images we want to force 64 KiB block size so that the image
1323
* created is consistent with the format properties returned in Vulkan
1324
* independent of the image. */
1325
if (sin.flags.prt) {
1326
sin.forbiddenBlock.macroThin4KB = 1;
1327
sin.forbiddenBlock.macroThick4KB = 1;
1328
sin.forbiddenBlock.linear = 1;
1329
}
1330
1331
if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
1332
sin.forbiddenBlock.linear = 1;
1333
1334
if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1335
sin.preferredSwSet.sw_D = 1;
1336
else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
1337
sin.preferredSwSet.sw_S = 1;
1338
else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
1339
sin.preferredSwSet.sw_Z = 1;
1340
else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
1341
sin.preferredSwSet.sw_R = 1;
1342
}
1343
1344
if (info->chip_class >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
1345
/* 3D textures should use S swizzle modes for the best performance.
1346
* THe only exception is 3D render targets, which prefer 64KB_D_X.
1347
*
1348
* 3D texture sampler performance with a very large 3D texture:
1349
* ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off)
1350
* ADDR_SW_64KB_Z_X = 25 FPS
1351
* ADDR_SW_64KB_D_X = 53 FPS
1352
* ADDR_SW_4KB_S = 53 FPS
1353
* ADDR_SW_64KB_S = 53 FPS
1354
* ADDR_SW_64KB_S_T = 61 FPS
1355
* ADDR_SW_4KB_S_X = 63 FPS
1356
* ADDR_SW_64KB_S_X = 62 FPS
1357
*/
1358
sin.preferredSwSet.sw_S = 1;
1359
}
1360
1361
ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
1362
if (ret != ADDR_OK)
1363
return ret;
1364
1365
*swizzle_mode = sout.swizzleMode;
1366
return 0;
1367
}
1368
1369
static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
1370
{
1371
if (info->chip_class >= GFX10)
1372
return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
1373
1374
return sw_mode != ADDR_SW_LINEAR;
1375
}
1376
1377
ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
1378
const struct radeon_surf *surf)
1379
{
1380
if (info->chip_class <= GFX9) {
1381
/* Only independent 64B blocks are supported. */
1382
return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks &&
1383
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
1384
}
1385
1386
if (info->family == CHIP_NAVI10) {
1387
/* Only independent 128B blocks are supported. */
1388
return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks &&
1389
surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
1390
}
1391
1392
if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
1393
/* Either 64B or 128B can be used, but not both.
1394
* If 64B is used, DCC image stores are unsupported.
1395
*/
1396
return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks &&
1397
(!surf->u.gfx9.color.dcc.independent_64B_blocks ||
1398
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B) &&
1399
(!surf->u.gfx9.color.dcc.independent_128B_blocks ||
1400
surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B);
1401
}
1402
1403
/* 128B is recommended, but 64B can be set too if needed for 4K by DCN.
1404
* Since there is no reason to ever disable 128B, require it.
1405
* DCC image stores are always supported.
1406
*/
1407
return surf->u.gfx9.color.dcc.independent_128B_blocks &&
1408
surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
1409
}
1410
1411
static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
1412
const struct ac_surf_config *config,
1413
const struct radeon_surf *surf, bool rb_aligned,
1414
bool pipe_aligned)
1415
{
1416
if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)
1417
return false;
1418
1419
/* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1420
if (surf->bpe != 4)
1421
return false;
1422
1423
/* Handle unaligned DCC. */
1424
if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
1425
return false;
1426
1427
switch (info->chip_class) {
1428
case GFX9:
1429
/* Only support 64KB_S_X, so that we have only 1 variant of the retile shader. */
1430
if (info->use_display_dcc_with_retile_blit &&
1431
surf->u.gfx9.swizzle_mode != ADDR_SW_64KB_S_X)
1432
return false;
1433
1434
/* There are more constraints, but we always set
1435
* INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1436
* which always works.
1437
*/
1438
assert(surf->u.gfx9.color.dcc.independent_64B_blocks &&
1439
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1440
return true;
1441
case GFX10:
1442
case GFX10_3:
1443
/* Only support 64KB_R_X, so that we have only 1 variant of the retile shader. */
1444
if (info->use_display_dcc_with_retile_blit &&
1445
surf->u.gfx9.swizzle_mode != ADDR_SW_64KB_R_X)
1446
return false;
1447
1448
/* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1449
if (info->chip_class == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
1450
return false;
1451
1452
/* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1. */
1453
return ((config->info.width <= 2560 && config->info.height <= 2560) ||
1454
(surf->u.gfx9.color.dcc.independent_64B_blocks &&
1455
surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
1456
default:
1457
unreachable("unhandled chip");
1458
return false;
1459
}
1460
}
1461
1462
static void ac_copy_dcc_equation(const struct radeon_info *info,
1463
ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc,
1464
struct gfx9_meta_equation *equation)
1465
{
1466
equation->meta_block_width = dcc->metaBlkWidth;
1467
equation->meta_block_height = dcc->metaBlkHeight;
1468
equation->meta_block_depth = dcc->metaBlkDepth;
1469
1470
if (info->chip_class >= GFX10) {
1471
/* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
1472
for (unsigned i = 0; i < 4; i++)
1473
assert(dcc->equation.gfx10_bits[i] == 0);
1474
1475
for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++)
1476
assert(dcc->equation.gfx10_bits[i] == 0);
1477
1478
memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4,
1479
sizeof(equation->u.gfx10_bits));
1480
} else {
1481
assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
1482
1483
equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits;
1484
equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits;
1485
for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
1486
for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
1487
equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim;
1488
equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord;
1489
}
1490
}
1491
}
1492
}
1493
1494
static void ac_copy_htile_equation(const struct radeon_info *info,
1495
ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile,
1496
struct gfx9_meta_equation *equation)
1497
{
1498
equation->meta_block_width = htile->metaBlkWidth;
1499
equation->meta_block_height = htile->metaBlkHeight;
1500
1501
/* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */
1502
for (unsigned i = 0; i < 8; i++)
1503
assert(htile->equation.gfx10_bits[i] == 0);
1504
1505
for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++)
1506
assert(htile->equation.gfx10_bits[i] == 0);
1507
1508
memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8,
1509
sizeof(equation->u.gfx10_bits));
1510
}
1511
1512
static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
1513
const struct ac_surf_config *config, struct radeon_surf *surf,
1514
bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
1515
{
1516
ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1517
ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
1518
ADDR_E_RETURNCODE ret;
1519
1520
out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
1521
out.pMipInfo = mip_info;
1522
1523
ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
1524
if (ret != ADDR_OK)
1525
return ret;
1526
1527
if (in->flags.prt) {
1528
surf->prt_tile_width = out.blockWidth;
1529
surf->prt_tile_height = out.blockHeight;
1530
1531
for (surf->first_mip_tail_level = 0; surf->first_mip_tail_level < in->numMipLevels;
1532
++surf->first_mip_tail_level) {
1533
if(mip_info[surf->first_mip_tail_level].pitch < out.blockWidth ||
1534
mip_info[surf->first_mip_tail_level].height < out.blockHeight)
1535
break;
1536
}
1537
1538
for (unsigned i = 0; i < in->numMipLevels; i++) {
1539
surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
1540
1541
if (info->chip_class >= GFX10)
1542
surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
1543
else
1544
surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;
1545
}
1546
}
1547
1548
if (in->flags.stencil) {
1549
surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;
1550
surf->u.gfx9.zs.stencil_epitch =
1551
out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1552
surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
1553
surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);
1554
surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
1555
return 0;
1556
}
1557
1558
surf->u.gfx9.swizzle_mode = in->swizzleMode;
1559
surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1560
1561
/* CMASK fast clear uses these even if FMASK isn't allocated.
1562
* FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1563
*/
1564
if (!in->flags.depth) {
1565
surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3;
1566
surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch;
1567
}
1568
1569
surf->u.gfx9.surf_slice_size = out.sliceSize;
1570
surf->u.gfx9.surf_pitch = out.pitch;
1571
surf->u.gfx9.surf_height = out.height;
1572
surf->surf_size = out.surfSize;
1573
surf->surf_alignment_log2 = util_logbase2(out.baseAlign);
1574
1575
if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
1576
surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
1577
/* Adjust surf_pitch to be in elements units not in pixels */
1578
surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
1579
surf->u.gfx9.epitch =
1580
MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
1581
/* The surface is really a surf->bpe bytes per pixel surface even if we
1582
* use it as a surf->bpe bytes per element one.
1583
* Adjust surf_slice_size and surf_size to reflect the change
1584
* made to surf_pitch.
1585
*/
1586
surf->u.gfx9.surf_slice_size =
1587
MAX2(surf->u.gfx9.surf_slice_size,
1588
surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
1589
surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
1590
}
1591
1592
if (in->swizzleMode == ADDR_SW_LINEAR) {
1593
for (unsigned i = 0; i < in->numMipLevels; i++) {
1594
surf->u.gfx9.offset[i] = mip_info[i].offset;
1595
surf->u.gfx9.pitch[i] = mip_info[i].pitch;
1596
}
1597
}
1598
1599
surf->u.gfx9.base_mip_width = mip_info[0].pitch;
1600
surf->u.gfx9.base_mip_height = mip_info[0].height;
1601
1602
if (in->flags.depth) {
1603
assert(in->swizzleMode != ADDR_SW_LINEAR);
1604
1605
if (surf->flags & RADEON_SURF_NO_HTILE)
1606
return 0;
1607
1608
/* HTILE */
1609
ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
1610
ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
1611
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1612
1613
hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
1614
hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
1615
hout.pMipInfo = meta_mip_info;
1616
1617
assert(in->flags.metaPipeUnaligned == 0);
1618
assert(in->flags.metaRbUnaligned == 0);
1619
1620
hin.hTileFlags.pipeAligned = 1;
1621
hin.hTileFlags.rbAligned = 1;
1622
hin.depthFlags = in->flags;
1623
hin.swizzleMode = in->swizzleMode;
1624
hin.unalignedWidth = in->width;
1625
hin.unalignedHeight = in->height;
1626
hin.numSlices = in->numSlices;
1627
hin.numMipLevels = in->numMipLevels;
1628
hin.firstMipIdInTail = out.firstMipIdInTail;
1629
1630
ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
1631
if (ret != ADDR_OK)
1632
return ret;
1633
1634
surf->meta_size = hout.htileBytes;
1635
surf->meta_slice_size = hout.sliceSize;
1636
surf->meta_alignment_log2 = util_logbase2(hout.baseAlign);
1637
surf->meta_pitch = hout.pitch;
1638
surf->num_meta_levels = in->numMipLevels;
1639
1640
for (unsigned i = 0; i < in->numMipLevels; i++) {
1641
surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
1642
surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
1643
1644
if (meta_mip_info[i].inMiptail) {
1645
/* GFX10 can only compress the first level
1646
* in the mip tail.
1647
*/
1648
surf->num_meta_levels = i + 1;
1649
break;
1650
}
1651
}
1652
1653
if (!surf->num_meta_levels)
1654
surf->meta_size = 0;
1655
1656
if (info->chip_class >= GFX10)
1657
ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);
1658
return 0;
1659
}
1660
1661
{
1662
/* Compute tile swizzle for the color surface.
1663
* All *_X and *_T modes can use the swizzle.
1664
*/
1665
if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&
1666
!(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {
1667
ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
1668
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
1669
1670
xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
1671
xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
1672
1673
xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
1674
xin.flags = in->flags;
1675
xin.swizzleMode = in->swizzleMode;
1676
xin.resourceType = in->resourceType;
1677
xin.format = in->format;
1678
xin.numSamples = in->numSamples;
1679
xin.numFrags = in->numFrags;
1680
1681
ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
1682
if (ret != ADDR_OK)
1683
return ret;
1684
1685
assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1686
surf->tile_swizzle = xout.pipeBankXor;
1687
}
1688
1689
/* DCC */
1690
if (info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&
1691
is_dcc_supported_by_CB(info, in->swizzleMode) &&
1692
(!in->flags.display ||
1693
is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,
1694
!in->flags.metaPipeUnaligned)) &&
1695
(surf->modifier == DRM_FORMAT_MOD_INVALID ||
1696
ac_modifier_has_dcc(surf->modifier))) {
1697
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
1698
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
1699
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1700
1701
din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
1702
dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
1703
dout.pMipInfo = meta_mip_info;
1704
1705
din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
1706
din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
1707
din.resourceType = in->resourceType;
1708
din.swizzleMode = in->swizzleMode;
1709
din.bpp = in->bpp;
1710
din.unalignedWidth = in->width;
1711
din.unalignedHeight = in->height;
1712
din.numSlices = in->numSlices;
1713
din.numFrags = in->numFrags;
1714
din.numMipLevels = in->numMipLevels;
1715
din.dataSurfaceSize = out.surfSize;
1716
din.firstMipIdInTail = out.firstMipIdInTail;
1717
1718
ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
1719
if (ret != ADDR_OK)
1720
return ret;
1721
1722
surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
1723
surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
1724
surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth;
1725
surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight;
1726
surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth;
1727
surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1;
1728
surf->u.gfx9.color.dcc_height = dout.height;
1729
surf->meta_size = dout.dccRamSize;
1730
surf->meta_slice_size = dout.dccRamSliceSize;
1731
surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
1732
surf->num_meta_levels = in->numMipLevels;
1733
1734
/* Disable DCC for levels that are in the mip tail.
1735
*
1736
* There are two issues that this is intended to
1737
* address:
1738
*
1739
* 1. Multiple mip levels may share a cache line. This
1740
* can lead to corruption when switching between
1741
* rendering to different mip levels because the
1742
* RBs don't maintain coherency.
1743
*
1744
* 2. Texturing with metadata after rendering sometimes
1745
* fails with corruption, probably for a similar
1746
* reason.
1747
*
1748
* Working around these issues for all levels in the
1749
* mip tail may be overly conservative, but it's what
1750
* Vulkan does.
1751
*
1752
* Alternative solutions that also work but are worse:
1753
* - Disable DCC entirely.
1754
* - Flush TC L2 after rendering.
1755
*/
1756
for (unsigned i = 0; i < in->numMipLevels; i++) {
1757
surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
1758
surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
1759
1760
if (meta_mip_info[i].inMiptail) {
1761
/* GFX10 can only compress the first level
1762
* in the mip tail.
1763
*
1764
* TODO: Try to do the same thing for gfx9
1765
* if there are no regressions.
1766
*/
1767
if (info->chip_class >= GFX10)
1768
surf->num_meta_levels = i + 1;
1769
else
1770
surf->num_meta_levels = i;
1771
break;
1772
}
1773
}
1774
1775
if (!surf->num_meta_levels)
1776
surf->meta_size = 0;
1777
1778
surf->u.gfx9.color.display_dcc_size = surf->meta_size;
1779
surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2;
1780
surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max;
1781
surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height;
1782
1783
if (in->resourceType == ADDR_RSRC_TEX_2D)
1784
ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation);
1785
1786
/* Compute displayable DCC. */
1787
if (((in->flags.display && info->use_display_dcc_with_retile_blit) ||
1788
ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) {
1789
/* Compute displayable DCC info. */
1790
din.dccKeyFlags.pipeAligned = 0;
1791
din.dccKeyFlags.rbAligned = 0;
1792
1793
assert(din.numSlices == 1);
1794
assert(din.numMipLevels == 1);
1795
assert(din.numFrags == 1);
1796
assert(surf->tile_swizzle == 0);
1797
assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);
1798
1799
ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
1800
if (ret != ADDR_OK)
1801
return ret;
1802
1803
surf->u.gfx9.color.display_dcc_size = dout.dccRamSize;
1804
surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
1805
surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1;
1806
surf->u.gfx9.color.display_dcc_height = dout.height;
1807
assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size);
1808
1809
ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation);
1810
surf->u.gfx9.color.dcc.display_equation_valid = true;
1811
}
1812
}
1813
1814
/* FMASK */
1815
if (in->numSamples > 1 && info->has_graphics && !(surf->flags & RADEON_SURF_NO_FMASK)) {
1816
ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
1817
ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1818
1819
fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
1820
fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
1821
1822
ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode);
1823
if (ret != ADDR_OK)
1824
return ret;
1825
1826
fin.unalignedWidth = in->width;
1827
fin.unalignedHeight = in->height;
1828
fin.numSlices = in->numSlices;
1829
fin.numSamples = in->numSamples;
1830
fin.numFrags = in->numFrags;
1831
1832
ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
1833
if (ret != ADDR_OK)
1834
return ret;
1835
1836
surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode;
1837
surf->u.gfx9.color.fmask_epitch = fout.pitch - 1;
1838
surf->fmask_size = fout.fmaskBytes;
1839
surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
1840
surf->fmask_slice_size = fout.sliceSize;
1841
1842
/* Compute tile swizzle for the FMASK surface. */
1843
if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
1844
!(surf->flags & RADEON_SURF_SHAREABLE)) {
1845
ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
1846
ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
1847
1848
xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
1849
xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
1850
1851
/* This counter starts from 1 instead of 0. */
1852
xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1853
xin.flags = in->flags;
1854
xin.swizzleMode = fin.swizzleMode;
1855
xin.resourceType = in->resourceType;
1856
xin.format = in->format;
1857
xin.numSamples = in->numSamples;
1858
xin.numFrags = in->numFrags;
1859
1860
ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
1861
if (ret != ADDR_OK)
1862
return ret;
1863
1864
assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
1865
surf->fmask_tile_swizzle = xout.pipeBankXor;
1866
}
1867
}
1868
1869
/* CMASK -- on GFX10 only for FMASK */
1870
if (in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
1871
((info->chip_class <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
1872
in->flags.metaRbUnaligned == 0) ||
1873
(surf->fmask_size && in->numSamples >= 2))) {
1874
ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
1875
ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
1876
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1877
1878
cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
1879
cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
1880
cout.pMipInfo = meta_mip_info;
1881
1882
assert(in->flags.metaPipeUnaligned == 0);
1883
assert(in->flags.metaRbUnaligned == 0);
1884
1885
cin.cMaskFlags.pipeAligned = 1;
1886
cin.cMaskFlags.rbAligned = 1;
1887
cin.resourceType = in->resourceType;
1888
cin.unalignedWidth = in->width;
1889
cin.unalignedHeight = in->height;
1890
cin.numSlices = in->numSlices;
1891
cin.numMipLevels = in->numMipLevels;
1892
cin.firstMipIdInTail = out.firstMipIdInTail;
1893
1894
if (in->numSamples > 1)
1895
cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode;
1896
else
1897
cin.swizzleMode = in->swizzleMode;
1898
1899
ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
1900
if (ret != ADDR_OK)
1901
return ret;
1902
1903
surf->cmask_size = cout.cmaskBytes;
1904
surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign);
1905
surf->cmask_slice_size = cout.sliceSize;
1906
surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset;
1907
surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize;
1908
}
1909
}
1910
1911
return 0;
1912
}
1913
1914
static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
1915
const struct ac_surf_config *config, enum radeon_surf_mode mode,
1916
struct radeon_surf *surf)
1917
{
1918
bool compressed;
1919
ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
1920
int r;
1921
1922
AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
1923
1924
compressed = surf->blk_w == 4 && surf->blk_h == 4;
1925
1926
/* The format must be set correctly for the allocation of compressed
1927
* textures to work. In other cases, setting the bpp is sufficient. */
1928
if (compressed) {
1929
switch (surf->bpe) {
1930
case 8:
1931
AddrSurfInfoIn.format = ADDR_FMT_BC1;
1932
break;
1933
case 16:
1934
AddrSurfInfoIn.format = ADDR_FMT_BC3;
1935
break;
1936
default:
1937
assert(0);
1938
}
1939
} else {
1940
switch (surf->bpe) {
1941
case 1:
1942
assert(!(surf->flags & RADEON_SURF_ZBUFFER));
1943
AddrSurfInfoIn.format = ADDR_FMT_8;
1944
break;
1945
case 2:
1946
assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
1947
AddrSurfInfoIn.format = ADDR_FMT_16;
1948
break;
1949
case 4:
1950
assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
1951
AddrSurfInfoIn.format = ADDR_FMT_32;
1952
break;
1953
case 8:
1954
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1955
AddrSurfInfoIn.format = ADDR_FMT_32_32;
1956
break;
1957
case 12:
1958
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1959
AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
1960
break;
1961
case 16:
1962
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1963
AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
1964
break;
1965
default:
1966
assert(0);
1967
}
1968
AddrSurfInfoIn.bpp = surf->bpe * 8;
1969
}
1970
1971
bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1972
AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
1973
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
1974
AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
1975
/* flags.texture currently refers to TC-compatible HTILE */
1976
AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
1977
AddrSurfInfoIn.flags.opt4space = 1;
1978
AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
1979
1980
AddrSurfInfoIn.numMipLevels = config->info.levels;
1981
AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
1982
AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
1983
1984
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
1985
AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
1986
1987
/* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
1988
* as 2D to avoid having shader variants for 1D vs 2D, so all shaders
1989
* must sample 1D textures as 2D. */
1990
if (config->is_3d)
1991
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
1992
else if (info->chip_class != GFX9 && config->is_1d)
1993
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
1994
else
1995
AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
1996
1997
AddrSurfInfoIn.width = config->info.width;
1998
AddrSurfInfoIn.height = config->info.height;
1999
2000
if (config->is_3d)
2001
AddrSurfInfoIn.numSlices = config->info.depth;
2002
else if (config->is_cube)
2003
AddrSurfInfoIn.numSlices = 6;
2004
else
2005
AddrSurfInfoIn.numSlices = config->info.array_size;
2006
2007
/* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
2008
AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
2009
AddrSurfInfoIn.flags.metaRbUnaligned = 0;
2010
2011
if (ac_modifier_has_dcc(surf->modifier)) {
2012
ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn);
2013
} else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) {
2014
/* Optimal values for the L2 cache. */
2015
if (info->chip_class == GFX9) {
2016
surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2017
surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2018
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2019
} else if (info->chip_class >= GFX10) {
2020
surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
2021
surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2022
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
2023
}
2024
2025
if (AddrSurfInfoIn.flags.display) {
2026
/* The display hardware can only read DCC with RB_ALIGNED=0 and
2027
* PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
2028
*
2029
* The CB block requires RB_ALIGNED=1 except 1 RB chips.
2030
* PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
2031
* after rendering, so PIPE_ALIGNED=1 is recommended.
2032
*/
2033
if (info->use_display_dcc_unaligned) {
2034
AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
2035
AddrSurfInfoIn.flags.metaRbUnaligned = 1;
2036
}
2037
2038
/* Adjust DCC settings to meet DCN requirements. */
2039
if (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) {
2040
/* Only Navi12/14 support independent 64B blocks in L2,
2041
* but without DCC image stores.
2042
*/
2043
if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
2044
surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2045
surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2046
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2047
}
2048
2049
if (info->chip_class >= GFX10_3) {
2050
surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2051
surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2052
surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2053
}
2054
}
2055
}
2056
}
2057
2058
if (surf->modifier == DRM_FORMAT_MOD_INVALID) {
2059
switch (mode) {
2060
case RADEON_SURF_MODE_LINEAR_ALIGNED:
2061
assert(config->info.samples <= 1);
2062
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2063
AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
2064
break;
2065
2066
case RADEON_SURF_MODE_1D:
2067
case RADEON_SURF_MODE_2D:
2068
if (surf->flags & RADEON_SURF_IMPORTED ||
2069
(info->chip_class >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
2070
AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
2071
break;
2072
}
2073
2074
r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2075
&AddrSurfInfoIn.swizzleMode);
2076
if (r)
2077
return r;
2078
break;
2079
2080
default:
2081
assert(0);
2082
}
2083
} else {
2084
/* We have a valid and required modifier here. */
2085
2086
assert(!compressed);
2087
assert(!ac_modifier_has_dcc(surf->modifier) ||
2088
!(surf->flags & RADEON_SURF_DISABLE_DCC));
2089
2090
AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier);
2091
}
2092
2093
surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
2094
surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
2095
2096
surf->num_meta_levels = 0;
2097
surf->surf_size = 0;
2098
surf->fmask_size = 0;
2099
surf->meta_size = 0;
2100
surf->meta_slice_size = 0;
2101
surf->u.gfx9.surf_offset = 0;
2102
if (AddrSurfInfoIn.flags.stencil)
2103
surf->u.gfx9.zs.stencil_offset = 0;
2104
surf->cmask_size = 0;
2105
2106
const bool only_stencil =
2107
(surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
2108
2109
/* Calculate texture layout information. */
2110
if (!only_stencil) {
2111
r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2112
if (r)
2113
return r;
2114
}
2115
2116
/* Calculate texture layout information for stencil. */
2117
if (surf->flags & RADEON_SURF_SBUFFER) {
2118
AddrSurfInfoIn.flags.stencil = 1;
2119
AddrSurfInfoIn.bpp = 8;
2120
AddrSurfInfoIn.format = ADDR_FMT_8;
2121
2122
if (!AddrSurfInfoIn.flags.depth) {
2123
r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2124
&AddrSurfInfoIn.swizzleMode);
2125
if (r)
2126
return r;
2127
} else
2128
AddrSurfInfoIn.flags.depth = 0;
2129
2130
r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2131
if (r)
2132
return r;
2133
}
2134
2135
surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR;
2136
2137
/* Query whether the surface is displayable. */
2138
/* This is only useful for surfaces that are allocated without SCANOUT. */
2139
BOOL_32 displayable = false;
2140
if (!config->is_3d && !config->is_cube) {
2141
r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode,
2142
surf->bpe * 8, &displayable);
2143
if (r)
2144
return r;
2145
2146
/* Display needs unaligned DCC. */
2147
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
2148
surf->num_meta_levels &&
2149
(!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2150
surf->u.gfx9.color.dcc.pipe_aligned) ||
2151
/* Don't set is_displayable if displayable DCC is missing. */
2152
(info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid)))
2153
displayable = false;
2154
}
2155
surf->is_displayable = displayable;
2156
2157
/* Validate that we allocated a displayable surface if requested. */
2158
assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
2159
2160
/* Validate that DCC is set up correctly. */
2161
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) {
2162
assert(is_dcc_supported_by_L2(info, surf));
2163
if (AddrSurfInfoIn.flags.color)
2164
assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode));
2165
if (AddrSurfInfoIn.flags.display) {
2166
assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2167
surf->u.gfx9.color.dcc.pipe_aligned));
2168
}
2169
}
2170
2171
if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&
2172
AddrSurfInfoIn.flags.color && !surf->is_linear &&
2173
(1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */
2174
!(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |
2175
RADEON_SURF_FORCE_MICRO_TILE_MODE)) &&
2176
(surf->modifier == DRM_FORMAT_MOD_INVALID ||
2177
ac_modifier_has_dcc(surf->modifier))) {
2178
/* Validate that DCC is enabled if DCN can do it. */
2179
if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&
2180
AddrSurfInfoIn.flags.display && surf->bpe == 4) {
2181
assert(surf->num_meta_levels);
2182
}
2183
2184
/* Validate that non-scanout DCC is always enabled. */
2185
if (!AddrSurfInfoIn.flags.display)
2186
assert(surf->num_meta_levels);
2187
}
2188
2189
if (!surf->meta_size) {
2190
/* Unset this if HTILE is not present. */
2191
surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
2192
}
2193
2194
switch (surf->u.gfx9.swizzle_mode) {
2195
/* S = standard. */
2196
case ADDR_SW_256B_S:
2197
case ADDR_SW_4KB_S:
2198
case ADDR_SW_64KB_S:
2199
case ADDR_SW_64KB_S_T:
2200
case ADDR_SW_4KB_S_X:
2201
case ADDR_SW_64KB_S_X:
2202
surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
2203
break;
2204
2205
/* D = display. */
2206
case ADDR_SW_LINEAR:
2207
case ADDR_SW_256B_D:
2208
case ADDR_SW_4KB_D:
2209
case ADDR_SW_64KB_D:
2210
case ADDR_SW_64KB_D_T:
2211
case ADDR_SW_4KB_D_X:
2212
case ADDR_SW_64KB_D_X:
2213
surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
2214
break;
2215
2216
/* R = rotated (gfx9), render target (gfx10). */
2217
case ADDR_SW_256B_R:
2218
case ADDR_SW_4KB_R:
2219
case ADDR_SW_64KB_R:
2220
case ADDR_SW_64KB_R_T:
2221
case ADDR_SW_4KB_R_X:
2222
case ADDR_SW_64KB_R_X:
2223
case ADDR_SW_VAR_R_X:
2224
/* The rotated micro tile mode doesn't work if both CMASK and RB+ are
2225
* used at the same time. We currently do not use rotated
2226
* in gfx9.
2227
*/
2228
assert(info->chip_class >= GFX10 || !"rotate micro tile mode is unsupported");
2229
surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
2230
break;
2231
2232
/* Z = depth. */
2233
case ADDR_SW_4KB_Z:
2234
case ADDR_SW_64KB_Z:
2235
case ADDR_SW_64KB_Z_T:
2236
case ADDR_SW_4KB_Z_X:
2237
case ADDR_SW_64KB_Z_X:
2238
case ADDR_SW_VAR_Z_X:
2239
surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
2240
break;
2241
2242
default:
2243
assert(0);
2244
}
2245
2246
return 0;
2247
}
2248
2249
int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
2250
const struct ac_surf_config *config, enum radeon_surf_mode mode,
2251
struct radeon_surf *surf)
2252
{
2253
int r;
2254
2255
r = surf_config_sanity(config, surf->flags);
2256
if (r)
2257
return r;
2258
2259
if (info->family_id >= FAMILY_AI)
2260
r = gfx9_compute_surface(addrlib, info, config, mode, surf);
2261
else
2262
r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
2263
2264
if (r)
2265
return r;
2266
2267
/* Determine the memory layout of multiple allocations in one buffer. */
2268
surf->total_size = surf->surf_size;
2269
surf->alignment_log2 = surf->surf_alignment_log2;
2270
2271
/* Ensure the offsets are always 0 if not available. */
2272
surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0;
2273
2274
if (surf->fmask_size) {
2275
assert(config->info.samples >= 2);
2276
surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2);
2277
surf->total_size = surf->fmask_offset + surf->fmask_size;
2278
surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2);
2279
}
2280
2281
/* Single-sample CMASK is in a separate buffer. */
2282
if (surf->cmask_size && config->info.samples >= 2) {
2283
surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
2284
surf->total_size = surf->cmask_offset + surf->cmask_size;
2285
surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
2286
}
2287
2288
if (surf->is_displayable)
2289
surf->flags |= RADEON_SURF_SCANOUT;
2290
2291
if (surf->meta_size &&
2292
/* dcc_size is computed on GFX9+ only if it's displayable. */
2293
(info->chip_class >= GFX9 || !get_display_flag(config, surf))) {
2294
/* It's better when displayable DCC is immediately after
2295
* the image due to hw-specific reasons.
2296
*/
2297
if (info->chip_class >= GFX9 &&
2298
!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
2299
surf->u.gfx9.color.dcc.display_equation_valid) {
2300
/* Add space for the displayable DCC buffer. */
2301
surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2);
2302
surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size;
2303
}
2304
2305
surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2);
2306
surf->total_size = surf->meta_offset + surf->meta_size;
2307
surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2);
2308
}
2309
2310
return 0;
2311
}
2312
2313
/* This is meant to be used for disabling DCC. */
2314
void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
2315
{
2316
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
2317
return;
2318
2319
surf->meta_offset = 0;
2320
surf->display_dcc_offset = 0;
2321
if (!surf->fmask_offset && !surf->cmask_offset) {
2322
surf->total_size = surf->surf_size;
2323
surf->alignment_log2 = surf->surf_alignment_log2;
2324
}
2325
}
2326
2327
static unsigned eg_tile_split(unsigned tile_split)
2328
{
2329
switch (tile_split) {
2330
case 0:
2331
tile_split = 64;
2332
break;
2333
case 1:
2334
tile_split = 128;
2335
break;
2336
case 2:
2337
tile_split = 256;
2338
break;
2339
case 3:
2340
tile_split = 512;
2341
break;
2342
default:
2343
case 4:
2344
tile_split = 1024;
2345
break;
2346
case 5:
2347
tile_split = 2048;
2348
break;
2349
case 6:
2350
tile_split = 4096;
2351
break;
2352
}
2353
return tile_split;
2354
}
2355
2356
static unsigned eg_tile_split_rev(unsigned eg_tile_split)
2357
{
2358
switch (eg_tile_split) {
2359
case 64:
2360
return 0;
2361
case 128:
2362
return 1;
2363
case 256:
2364
return 2;
2365
case 512:
2366
return 3;
2367
default:
2368
case 1024:
2369
return 4;
2370
case 2048:
2371
return 5;
2372
case 4096:
2373
return 6;
2374
}
2375
}
2376
2377
#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
2378
#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3
2379
2380
/* This should be called before ac_compute_surface. */
2381
void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2382
uint64_t tiling_flags, enum radeon_surf_mode *mode)
2383
{
2384
bool scanout;
2385
2386
if (info->chip_class >= GFX9) {
2387
surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
2388
surf->u.gfx9.color.dcc.independent_64B_blocks =
2389
AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
2390
surf->u.gfx9.color.dcc.independent_128B_blocks =
2391
AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
2392
surf->u.gfx9.color.dcc.max_compressed_block_size =
2393
AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
2394
surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
2395
scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
2396
*mode =
2397
surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
2398
} else {
2399
surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
2400
surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
2401
surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
2402
surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
2403
surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
2404
surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
2405
scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
2406
2407
if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
2408
*mode = RADEON_SURF_MODE_2D;
2409
else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
2410
*mode = RADEON_SURF_MODE_1D;
2411
else
2412
*mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
2413
}
2414
2415
if (scanout)
2416
surf->flags |= RADEON_SURF_SCANOUT;
2417
else
2418
surf->flags &= ~RADEON_SURF_SCANOUT;
2419
}
2420
2421
void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2422
uint64_t *tiling_flags)
2423
{
2424
*tiling_flags = 0;
2425
2426
if (info->chip_class >= GFX9) {
2427
uint64_t dcc_offset = 0;
2428
2429
if (surf->meta_offset) {
2430
dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset;
2431
assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
2432
}
2433
2434
*tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);
2435
*tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
2436
*tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max);
2437
*tiling_flags |=
2438
AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks);
2439
*tiling_flags |=
2440
AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks);
2441
*tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,
2442
surf->u.gfx9.color.dcc.max_compressed_block_size);
2443
*tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
2444
} else {
2445
if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
2446
*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
2447
else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
2448
*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
2449
else
2450
*tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
2451
2452
*tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);
2453
*tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
2454
*tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
2455
if (surf->u.legacy.tile_split)
2456
*tiling_flags |=
2457
AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
2458
*tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
2459
*tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);
2460
2461
if (surf->flags & RADEON_SURF_SCANOUT)
2462
*tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
2463
else
2464
*tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
2465
}
2466
}
2467
2468
static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)
2469
{
2470
return (ATI_VENDOR_ID << 16) | info->pci_id;
2471
}
2472
2473
/* This should be called after ac_compute_surface. */
2474
bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2475
unsigned num_storage_samples, unsigned num_mipmap_levels,
2476
unsigned size_metadata, const uint32_t metadata[64])
2477
{
2478
const uint32_t *desc = &metadata[2];
2479
uint64_t offset;
2480
2481
if (surf->modifier != DRM_FORMAT_MOD_INVALID)
2482
return true;
2483
2484
if (info->chip_class >= GFX9)
2485
offset = surf->u.gfx9.surf_offset;
2486
else
2487
offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;
2488
2489
if (offset || /* Non-zero planes ignore metadata. */
2490
size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
2491
metadata[0] == 0 || /* invalid version number */
2492
metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {
2493
/* Disable DCC because it might not be enabled. */
2494
ac_surface_zero_dcc_fields(surf);
2495
2496
/* Don't report an error if the texture comes from an incompatible driver,
2497
* but this might not work.
2498
*/
2499
return true;
2500
}
2501
2502
/* Validate that sample counts and the number of mipmap levels match. */
2503
unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]);
2504
unsigned type = G_008F1C_TYPE(desc[3]);
2505
2506
if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2507
unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));
2508
2509
if (desc_last_level != log_samples) {
2510
fprintf(stderr,
2511
"amdgpu: invalid MSAA texture import, "
2512
"metadata has log2(samples) = %u, the caller set %u\n",
2513
desc_last_level, log_samples);
2514
return false;
2515
}
2516
} else {
2517
if (desc_last_level != num_mipmap_levels - 1) {
2518
fprintf(stderr,
2519
"amdgpu: invalid mipmapped texture import, "
2520
"metadata has last_level = %u, the caller set %u\n",
2521
desc_last_level, num_mipmap_levels - 1);
2522
return false;
2523
}
2524
}
2525
2526
if (info->chip_class >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
2527
/* Read DCC information. */
2528
switch (info->chip_class) {
2529
case GFX8:
2530
surf->meta_offset = (uint64_t)desc[7] << 8;
2531
break;
2532
2533
case GFX9:
2534
surf->meta_offset =
2535
((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
2536
surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);
2537
surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);
2538
2539
/* If DCC is unaligned, this can only be a displayable image. */
2540
if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned)
2541
assert(surf->is_displayable);
2542
break;
2543
2544
case GFX10:
2545
case GFX10_3:
2546
surf->meta_offset =
2547
((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
2548
surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
2549
break;
2550
2551
default:
2552
assert(0);
2553
return false;
2554
}
2555
} else {
2556
/* Disable DCC. dcc_offset is always set by texture_from_handle
2557
* and must be cleared here.
2558
*/
2559
ac_surface_zero_dcc_fields(surf);
2560
}
2561
2562
return true;
2563
}
2564
2565
void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2566
unsigned num_mipmap_levels, uint32_t desc[8],
2567
unsigned *size_metadata, uint32_t metadata[64])
2568
{
2569
/* Clear the base address and set the relative DCC offset. */
2570
desc[0] = 0;
2571
desc[1] &= C_008F14_BASE_ADDRESS_HI;
2572
2573
switch (info->chip_class) {
2574
case GFX6:
2575
case GFX7:
2576
break;
2577
case GFX8:
2578
desc[7] = surf->meta_offset >> 8;
2579
break;
2580
case GFX9:
2581
desc[7] = surf->meta_offset >> 8;
2582
desc[5] &= C_008F24_META_DATA_ADDRESS;
2583
desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40);
2584
break;
2585
case GFX10:
2586
case GFX10_3:
2587
desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
2588
desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8);
2589
desc[7] = surf->meta_offset >> 16;
2590
break;
2591
default:
2592
assert(0);
2593
}
2594
2595
/* Metadata image format format version 1:
2596
* [0] = 1 (metadata format identifier)
2597
* [1] = (VENDOR_ID << 16) | PCI_ID
2598
* [2:9] = image descriptor for the whole resource
2599
* [2] is always 0, because the base address is cleared
2600
* [9] is the DCC offset bits [39:8] from the beginning of
2601
* the buffer
2602
* [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
2603
*/
2604
2605
metadata[0] = 1; /* metadata image format version 1 */
2606
2607
/* Tiling modes are ambiguous without a PCI ID. */
2608
metadata[1] = ac_get_umd_metadata_word1(info);
2609
2610
/* Dwords [2:9] contain the image descriptor. */
2611
memcpy(&metadata[2], desc, 8 * 4);
2612
*size_metadata = 10 * 4;
2613
2614
/* Dwords [10:..] contain the mipmap level offsets. */
2615
if (info->chip_class <= GFX8) {
2616
for (unsigned i = 0; i < num_mipmap_levels; i++)
2617
metadata[10 + i] = surf->u.legacy.level[i].offset_256B;
2618
2619
*size_metadata += num_mipmap_levels * 4;
2620
}
2621
}
2622
2623
static uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf)
2624
{
2625
if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR)
2626
return 256 / surf->bpe;
2627
2628
if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)
2629
return 1; /* TODO */
2630
2631
unsigned bpe_shift = util_logbase2(surf->bpe) / 2;
2632
switch(surf->u.gfx9.swizzle_mode & ~3) {
2633
case ADDR_SW_LINEAR: /* 256B block. */
2634
return 16 >> bpe_shift;
2635
case ADDR_SW_4KB_Z:
2636
case ADDR_SW_4KB_Z_X:
2637
return 64 >> bpe_shift;
2638
case ADDR_SW_64KB_Z:
2639
case ADDR_SW_64KB_Z_T:
2640
case ADDR_SW_64KB_Z_X:
2641
return 256 >> bpe_shift;
2642
case ADDR_SW_VAR_Z_X:
2643
default:
2644
return 1; /* TODO */
2645
}
2646
}
2647
2648
bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
2649
unsigned num_mipmap_levels, uint64_t offset, unsigned pitch)
2650
{
2651
/*
2652
* GFX10 and newer don't support custom strides. Furthermore, for
2653
* multiple miplevels or compression data we'd really need to rerun
2654
* addrlib to update all the fields in the surface. That, however, is a
2655
* software limitation and could be relaxed later.
2656
*/
2657
bool require_equal_pitch = surf->surf_size != surf->total_size ||
2658
num_mipmap_levels != 1 ||
2659
info->chip_class >= GFX10;
2660
2661
if (info->chip_class >= GFX9) {
2662
if (pitch) {
2663
if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)
2664
return false;
2665
2666
if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch)
2667
return false;
2668
2669
if (pitch != surf->u.gfx9.surf_pitch) {
2670
unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size;
2671
2672
surf->u.gfx9.surf_pitch = pitch;
2673
surf->u.gfx9.epitch = pitch - 1;
2674
surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
2675
surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices;
2676
}
2677
}
2678
surf->u.gfx9.surf_offset = offset;
2679
if (surf->u.gfx9.zs.stencil_offset)
2680
surf->u.gfx9.zs.stencil_offset += offset;
2681
} else {
2682
if (pitch) {
2683
if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch)
2684
return false;
2685
2686
surf->u.legacy.level[0].nblk_x = pitch;
2687
surf->u.legacy.level[0].slice_size_dw =
2688
((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
2689
}
2690
2691
if (offset) {
2692
for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)
2693
surf->u.legacy.level[i].offset_256B += offset / 256;
2694
}
2695
}
2696
2697
if (offset & ((1 << surf->alignment_log2) - 1) ||
2698
offset >= UINT64_MAX - surf->total_size)
2699
return false;
2700
2701
if (surf->meta_offset)
2702
surf->meta_offset += offset;
2703
if (surf->fmask_offset)
2704
surf->fmask_offset += offset;
2705
if (surf->cmask_offset)
2706
surf->cmask_offset += offset;
2707
if (surf->display_dcc_offset)
2708
surf->display_dcc_offset += offset;
2709
return true;
2710
}
2711
2712
unsigned ac_surface_get_nplanes(const struct radeon_surf *surf)
2713
{
2714
if (surf->modifier == DRM_FORMAT_MOD_INVALID)
2715
return 1;
2716
else if (surf->display_dcc_offset)
2717
return 3;
2718
else if (surf->meta_offset)
2719
return 2;
2720
else
2721
return 1;
2722
}
2723
2724
uint64_t ac_surface_get_plane_offset(enum chip_class chip_class,
2725
const struct radeon_surf *surf,
2726
unsigned plane, unsigned layer)
2727
{
2728
switch (plane) {
2729
case 0:
2730
if (chip_class >= GFX9) {
2731
return surf->u.gfx9.surf_offset +
2732
layer * surf->u.gfx9.surf_slice_size;
2733
} else {
2734
return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 +
2735
layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4;
2736
}
2737
case 1:
2738
assert(!layer);
2739
return surf->display_dcc_offset ?
2740
surf->display_dcc_offset : surf->meta_offset;
2741
case 2:
2742
assert(!layer);
2743
return surf->meta_offset;
2744
default:
2745
unreachable("Invalid plane index");
2746
}
2747
}
2748
2749
uint64_t ac_surface_get_plane_stride(enum chip_class chip_class,
2750
const struct radeon_surf *surf,
2751
unsigned plane)
2752
{
2753
switch (plane) {
2754
case 0:
2755
if (chip_class >= GFX9) {
2756
return surf->u.gfx9.surf_pitch * surf->bpe;
2757
} else {
2758
return surf->u.legacy.level[0].nblk_x * surf->bpe;
2759
}
2760
case 1:
2761
return 1 + (surf->display_dcc_offset ?
2762
surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max);
2763
case 2:
2764
return surf->u.gfx9.color.dcc_pitch_max + 1;
2765
default:
2766
unreachable("Invalid plane index");
2767
}
2768
}
2769
2770
uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
2771
unsigned plane)
2772
{
2773
switch (plane) {
2774
case 0:
2775
return surf->surf_size;
2776
case 1:
2777
return surf->display_dcc_offset ?
2778
surf->u.gfx9.color.display_dcc_size : surf->meta_size;
2779
case 2:
2780
return surf->meta_size;
2781
default:
2782
unreachable("Invalid plane index");
2783
}
2784
}
2785
2786
void ac_surface_print_info(FILE *out, const struct radeon_info *info,
2787
const struct radeon_surf *surf)
2788
{
2789
if (info->chip_class >= GFX9) {
2790
fprintf(out,
2791
" Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "
2792
"alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, "
2793
"blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n",
2794
surf->surf_size, surf->u.gfx9.surf_slice_size,
2795
1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode,
2796
surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch,
2797
surf->blk_w, surf->blk_h, surf->bpe, surf->flags);
2798
2799
if (surf->fmask_offset)
2800
fprintf(out,
2801
" FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
2802
"alignment=%u, swmode=%u, epitch=%u\n",
2803
surf->fmask_offset, surf->fmask_size,
2804
1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode,
2805
surf->u.gfx9.color.fmask_epitch);
2806
2807
if (surf->cmask_offset)
2808
fprintf(out,
2809
" CMask: offset=%" PRIu64 ", size=%u, "
2810
"alignment=%u\n",
2811
surf->cmask_offset, surf->cmask_size,
2812
1 << surf->cmask_alignment_log2);
2813
2814
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
2815
fprintf(out,
2816
" HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
2817
surf->meta_offset, surf->meta_size,
2818
1 << surf->meta_alignment_log2);
2819
2820
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
2821
fprintf(out,
2822
" DCC: offset=%" PRIu64 ", size=%u, "
2823
"alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
2824
surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2,
2825
surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels);
2826
2827
if (surf->has_stencil)
2828
fprintf(out,
2829
" Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n",
2830
surf->u.gfx9.zs.stencil_offset,
2831
surf->u.gfx9.zs.stencil_swizzle_mode,
2832
surf->u.gfx9.zs.stencil_epitch);
2833
} else {
2834
fprintf(out,
2835
" Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, "
2836
"bpe=%u, flags=0x%"PRIx64"\n",
2837
surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w,
2838
surf->blk_h, surf->bpe, surf->flags);
2839
2840
fprintf(out,
2841
" Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, "
2842
"nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
2843
surf->surf_size, 1 << surf->surf_alignment_log2,
2844
surf->u.legacy.bankw, surf->u.legacy.bankh,
2845
surf->u.legacy.num_banks, surf->u.legacy.mtilea,
2846
surf->u.legacy.tile_split, surf->u.legacy.pipe_config,
2847
(surf->flags & RADEON_SURF_SCANOUT) != 0);
2848
2849
if (surf->fmask_offset)
2850
fprintf(out,
2851
" FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
2852
"alignment=%u, pitch_in_pixels=%u, bankh=%u, "
2853
"slice_tile_max=%u, tile_mode_index=%u\n",
2854
surf->fmask_offset, surf->fmask_size,
2855
1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels,
2856
surf->u.legacy.color.fmask.bankh,
2857
surf->u.legacy.color.fmask.slice_tile_max,
2858
surf->u.legacy.color.fmask.tiling_index);
2859
2860
if (surf->cmask_offset)
2861
fprintf(out,
2862
" CMask: offset=%" PRIu64 ", size=%u, alignment=%u, "
2863
"slice_tile_max=%u\n",
2864
surf->cmask_offset, surf->cmask_size,
2865
1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max);
2866
2867
if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
2868
fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
2869
surf->meta_offset, surf->meta_size,
2870
1 << surf->meta_alignment_log2);
2871
2872
if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
2873
fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n",
2874
surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2);
2875
2876
if (surf->has_stencil)
2877
fprintf(out, " StencilLayout: tilesplit=%u\n",
2878
surf->u.legacy.stencil_tile_split);
2879
}
2880
}
2881
2882
static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
2883
struct gfx9_meta_equation *equation,
2884
int blkSizeBias, unsigned blkStart,
2885
nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,
2886
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
2887
nir_ssa_def *pipe_xor)
2888
{
2889
nir_ssa_def *zero = nir_imm_int(b, 0);
2890
nir_ssa_def *one = nir_imm_int(b, 1);
2891
2892
assert(info->chip_class >= GFX10);
2893
2894
unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
2895
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
2896
unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
2897
2898
nir_ssa_def *coord[] = {x, y, z, 0};
2899
nir_ssa_def *address = zero;
2900
2901
for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
2902
nir_ssa_def *v = zero;
2903
2904
for (unsigned c = 0; c < 4; c++) {
2905
unsigned index = i * 4 + c - (blkStart * 4);
2906
if (equation->u.gfx10_bits[index]) {
2907
unsigned mask = equation->u.gfx10_bits[index];
2908
nir_ssa_def *bits = coord[c];
2909
2910
while (mask)
2911
v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
2912
}
2913
}
2914
2915
address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i)));
2916
}
2917
2918
unsigned blkMask = (1 << blkSizeLog2) - 1;
2919
unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
2920
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
2921
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
2922
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
2923
nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
2924
nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
2925
nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask),
2926
nir_imm_int(b, m_pipeInterleaveLog2)), blkMask);
2927
2928
return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z),
2929
nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))),
2930
nir_ixor(b, nir_ushr(b, address, one), pipeXor));
2931
}
2932
2933
nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
2934
unsigned bpe, struct gfx9_meta_equation *equation,
2935
nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
2936
nir_ssa_def *dcc_slice_size,
2937
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
2938
nir_ssa_def *sample, nir_ssa_def *pipe_xor)
2939
{
2940
nir_ssa_def *zero = nir_imm_int(b, 0);
2941
nir_ssa_def *one = nir_imm_int(b, 1);
2942
2943
if (info->chip_class >= GFX10) {
2944
unsigned bpp_log2 = util_logbase2(bpe);
2945
2946
return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,
2947
dcc_pitch, dcc_slice_size,
2948
x, y, z, pipe_xor);
2949
} else {
2950
assert(info->chip_class == GFX9);
2951
2952
unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
2953
unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
2954
unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth);
2955
2956
unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
2957
unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
2958
nir_ssa_def *pitchInBlock = nir_ushr_imm(b, dcc_pitch, meta_block_width_log2);
2959
nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, dcc_height, meta_block_height_log2),
2960
pitchInBlock);
2961
2962
nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
2963
nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
2964
nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
2965
2966
nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
2967
nir_imul(b, yb, pitchInBlock)), xb);
2968
nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};
2969
2970
nir_ssa_def *address = zero;
2971
unsigned num_bits = equation->u.gfx9.num_bits;
2972
assert(num_bits <= 32);
2973
2974
/* Compute the address up until the last bit that doesn't use the block index. */
2975
for (unsigned i = 0; i < num_bits - 1; i++) {
2976
nir_ssa_def *xor = zero;
2977
2978
for (unsigned c = 0; c < 5; c++) {
2979
if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
2980
continue;
2981
2982
assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
2983
nir_ssa_def *ison =
2984
nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
2985
equation->u.gfx9.bit[i].coord[c].ord), one);
2986
2987
xor = nir_ixor(b, xor, ison);
2988
}
2989
address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i)));
2990
}
2991
2992
/* Fill the remaining bits with the block index. */
2993
unsigned last = num_bits - 1;
2994
address = nir_ior(b, address,
2995
nir_ishl(b, nir_ushr_imm(b, blockIndex,
2996
equation->u.gfx9.bit[last].coord[0].ord),
2997
nir_imm_int(b, last)));
2998
2999
nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
3000
return nir_ixor(b, nir_ushr(b, address, one),
3001
nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2)));
3002
}
3003
}
3004
3005
nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3006
struct gfx9_meta_equation *equation,
3007
nir_ssa_def *htile_pitch,
3008
nir_ssa_def *htile_slice_size,
3009
nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3010
nir_ssa_def *pipe_xor)
3011
{
3012
return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
3013
htile_pitch, htile_slice_size,
3014
x, y, z, pipe_xor);
3015
}
3016
3017