Path: blob/21.2-virgl/src/amd/common/ac_surface_meta_address_test.c
7286 views
/*1* Copyright © 2021 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining5* a copy of this software and associated documentation files (the6* "Software"), to deal in the Software without restriction, including7* without limitation the rights to use, copy, modify, merge, publish,8* distribute, sub license, and/or sell copies of the Software, and to9* permit persons to whom the Software is furnished to do so, subject to10* the following conditions:11*12* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,13* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES14* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND15* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS16* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE19* USE OR OTHER DEALINGS IN THE SOFTWARE.20*21* The above copyright notice and this permission notice (including the22* next paragraph) shall be included in all copies or substantial portions23* of the Software.24*/2526/* Make the test not meaningless when asserts are disabled. */27#undef NDEBUG2829#include <assert.h>30#include <inttypes.h>31#include <stdio.h>32#include <stdlib.h>3334#include <amdgpu.h>35#include "drm-uapi/amdgpu_drm.h"36#include "drm-uapi/drm_fourcc.h"3738#include "ac_surface.h"39#include "util/macros.h"40#include "util/u_atomic.h"41#include "util/u_math.h"42#include "util/u_vector.h"43#include "util/mesa-sha1.h"44#include "addrlib/inc/addrinterface.h"4546#include "ac_surface_test_common.h"4748/*49* The main goal of this test is to validate that our dcc/htile addressing50* functions match addrlib behavior.51*/5253/* DCC address computation without mipmapping. */54static unsigned gfx9_dcc_addr_from_coord(const struct radeon_info *info,55/* Shader key inputs: */56/* equation varies with resource_type, swizzle_mode,57* bpp, number of fragments, pipe_aligned, rb_aligned */58ADDR2_COMPUTE_DCCINFO_OUTPUT *eq,59unsigned meta_block_width, unsigned meta_block_height,60unsigned meta_block_depth,61/* Shader inputs: */62unsigned dcc_pitch, unsigned dcc_height,63unsigned x, unsigned y, unsigned z,64unsigned sample, unsigned pipe_xor)65{66/* The compiled shader shouldn't be complicated considering there are a lot of constants here. */67unsigned meta_block_width_log2 = util_logbase2(meta_block_width);68unsigned meta_block_height_log2 = util_logbase2(meta_block_height);69unsigned meta_block_depth_log2 = util_logbase2(meta_block_depth);7071unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);72unsigned numPipeBits = eq->equation.gfx9.numPipeBits;73unsigned pitchInBlock = dcc_pitch >> meta_block_width_log2;74unsigned sliceSizeInBlock = (dcc_height >> meta_block_height_log2) * pitchInBlock;7576unsigned xb = x >> meta_block_width_log2;77unsigned yb = y >> meta_block_height_log2;78unsigned zb = z >> meta_block_depth_log2;7980unsigned blockIndex = zb * sliceSizeInBlock + yb * pitchInBlock + xb;81unsigned coords[] = {x, y, z, sample, blockIndex};8283unsigned address = 0;84unsigned num_bits = eq->equation.gfx9.num_bits;85assert(num_bits <= 32);8687/* Compute the address up until the last bit that doesn't use the block index. */88for (unsigned b = 0; b < num_bits - 1; b++) {89unsigned xor = 0;90for (unsigned c = 0; c < 5; c++) {91if (eq->equation.gfx9.bit[b].coord[c].dim >= 5)92continue;9394assert(eq->equation.gfx9.bit[b].coord[c].ord < 32);95unsigned ison = (coords[eq->equation.gfx9.bit[b].coord[c].dim] >>96eq->equation.gfx9.bit[b].coord[c].ord) & 0x1;9798xor ^= ison;99}100address |= xor << b;101}102103/* Fill the remaining bits with the block index. */104unsigned last = num_bits - 1;105address |= (blockIndex >> eq->equation.gfx9.bit[last].coord[0].ord) << last;106107unsigned pipeXor = pipe_xor & ((1 << numPipeBits) - 1);108return (address >> 1) ^ (pipeXor << m_pipeInterleaveLog2);109}110111/* DCC/HTILE address computation for GFX10. */112static unsigned gfx10_meta_addr_from_coord(const struct radeon_info *info,113/* Shader key inputs: */114const uint16_t *equation,115unsigned meta_block_width, unsigned meta_block_height,116unsigned blkSizeLog2,117/* Shader inputs: */118unsigned meta_pitch, unsigned meta_slice_size,119unsigned x, unsigned y, unsigned z,120unsigned pipe_xor)121{122/* The compiled shader shouldn't be complicated considering there are a lot of constants here. */123unsigned meta_block_width_log2 = util_logbase2(meta_block_width);124unsigned meta_block_height_log2 = util_logbase2(meta_block_height);125126unsigned coord[] = {x, y, z, 0};127unsigned address = 0;128129for (unsigned i = 0; i < blkSizeLog2 + 1; i++) {130unsigned v = 0;131132for (unsigned c = 0; c < 4; c++) {133if (equation[i*4+c] != 0) {134unsigned mask = equation[i*4+c];135unsigned bits = coord[c];136137while (mask)138v ^= (bits >> u_bit_scan(&mask)) & 0x1;139}140}141142address |= v << i;143}144145unsigned blkMask = (1 << blkSizeLog2) - 1;146unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;147unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);148unsigned xb = x >> meta_block_width_log2;149unsigned yb = y >> meta_block_height_log2;150unsigned pb = meta_pitch >> meta_block_width_log2;151unsigned blkIndex = (yb * pb) + xb;152unsigned pipeXor = ((pipe_xor & pipeMask) << m_pipeInterleaveLog2) & blkMask;153154return (meta_slice_size * z) +155(blkIndex * (1 << blkSizeLog2)) +156((address >> 1) ^ pipeXor);157}158159/* DCC address computation without mipmapping and MSAA. */160static unsigned gfx10_dcc_addr_from_coord(const struct radeon_info *info,161/* Shader key inputs: */162/* equation varies with bpp and pipe_aligned */163const uint16_t *equation, unsigned bpp,164unsigned meta_block_width, unsigned meta_block_height,165/* Shader inputs: */166unsigned dcc_pitch, unsigned dcc_slice_size,167unsigned x, unsigned y, unsigned z,168unsigned pipe_xor)169{170unsigned bpp_log2 = util_logbase2(bpp >> 3);171unsigned meta_block_width_log2 = util_logbase2(meta_block_width);172unsigned meta_block_height_log2 = util_logbase2(meta_block_height);173unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + bpp_log2 - 8;174175return gfx10_meta_addr_from_coord(info, equation,176meta_block_width, meta_block_height,177blkSizeLog2,178dcc_pitch, dcc_slice_size,179x, y, z, pipe_xor);180}181182static bool one_dcc_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,183const struct radeon_info *info, unsigned width, unsigned height,184unsigned depth, unsigned samples, unsigned bpp,185unsigned swizzle_mode, bool pipe_aligned, bool rb_aligned,186unsigned mrt_index,187unsigned start_x, unsigned start_y, unsigned start_z,188unsigned start_sample)189{190ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT)};191ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT)};192ADDR2_COMPUTE_DCCINFO_INPUT din = {sizeof(din)};193ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {sizeof(dout)};194ADDR2_COMPUTE_DCC_ADDRFROMCOORD_INPUT in = {sizeof(in)};195ADDR2_COMPUTE_DCC_ADDRFROMCOORD_OUTPUT out = {sizeof(out)};196ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};197198dout.pMipInfo = meta_mip_info;199200/* Compute DCC info. */201in.dccKeyFlags.pipeAligned = din.dccKeyFlags.pipeAligned = pipe_aligned;202in.dccKeyFlags.rbAligned = din.dccKeyFlags.rbAligned = rb_aligned;203xin.resourceType = in.resourceType = din.resourceType = ADDR_RSRC_TEX_2D;204xin.swizzleMode = in.swizzleMode = din.swizzleMode = swizzle_mode;205in.bpp = din.bpp = bpp;206xin.numFrags = xin.numSamples = in.numFrags = din.numFrags = samples;207in.numMipLevels = din.numMipLevels = 1; /* addrlib can't do DccAddrFromCoord with mipmapping */208din.unalignedWidth = width;209din.unalignedHeight = height;210din.numSlices = depth;211din.firstMipIdInTail = 1;212213int ret = Addr2ComputeDccInfo(addrlib, &din, &dout);214assert(ret == ADDR_OK);215216/* Compute xor. */217static AddrFormat format[] = {218ADDR_FMT_8,219ADDR_FMT_16,220ADDR_FMT_32,221ADDR_FMT_32_32,222ADDR_FMT_32_32_32_32,223};224xin.flags.color = 1;225xin.flags.texture = 1;226xin.flags.opt4space = 1;227xin.flags.metaRbUnaligned = !rb_aligned;228xin.flags.metaPipeUnaligned = !pipe_aligned;229xin.format = format[util_logbase2(bpp / 8)];230xin.surfIndex = mrt_index;231232ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);233assert(ret == ADDR_OK);234235/* Compute addresses */236in.compressBlkWidth = dout.compressBlkWidth;237in.compressBlkHeight = dout.compressBlkHeight;238in.compressBlkDepth = dout.compressBlkDepth;239in.metaBlkWidth = dout.metaBlkWidth;240in.metaBlkHeight = dout.metaBlkHeight;241in.metaBlkDepth = dout.metaBlkDepth;242in.dccRamSliceSize = dout.dccRamSliceSize;243244in.mipId = 0;245in.pitch = dout.pitch;246in.height = dout.height;247in.pipeXor = xout.pipeBankXor;248249/* Validate that the packed gfx9_meta_equation structure can fit all fields. */250const struct gfx9_meta_equation eq;251if (info->chip_class == GFX9) {252/* The bit array is smaller in gfx9_meta_equation than in addrlib. */253assert(dout.equation.gfx9.num_bits <= ARRAY_SIZE(eq.u.gfx9.bit));254} else {255/* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */256for (unsigned i = 0; i < 4; i++)257assert(dout.equation.gfx10_bits[i] == 0);258259for (unsigned i = ARRAY_SIZE(eq.u.gfx10_bits) + 4; i < 68; i++)260assert(dout.equation.gfx10_bits[i] == 0);261}262263for (in.x = start_x; in.x < in.pitch; in.x += dout.compressBlkWidth) {264for (in.y = start_y; in.y < in.height; in.y += dout.compressBlkHeight) {265for (in.slice = start_z; in.slice < depth; in.slice += dout.compressBlkDepth) {266for (in.sample = start_sample; in.sample < samples; in.sample++) {267int r = Addr2ComputeDccAddrFromCoord(addrlib, &in, &out);268if (r != ADDR_OK) {269printf("%s addrlib error: %s\n", name, test);270abort();271}272273unsigned addr;274if (info->chip_class == GFX9) {275addr = gfx9_dcc_addr_from_coord(info, &dout, dout.metaBlkWidth, dout.metaBlkHeight,276dout.metaBlkDepth, dout.pitch, dout.height,277in.x, in.y, in.slice, in.sample, in.pipeXor);278if (in.sample == 1) {279/* Sample 0 should be one byte before sample 1. The DCC MSAA clear relies on it. */280assert(addr - 1 ==281gfx9_dcc_addr_from_coord(info, &dout, dout.metaBlkWidth, dout.metaBlkHeight,282dout.metaBlkDepth, dout.pitch, dout.height,283in.x, in.y, in.slice, 0, in.pipeXor));284}285} else {286addr = gfx10_dcc_addr_from_coord(info, dout.equation.gfx10_bits,287in.bpp, dout.metaBlkWidth, dout.metaBlkHeight,288dout.pitch, dout.dccRamSliceSize,289in.x, in.y, in.slice, in.pipeXor);290}291292if (out.addr != addr) {293printf("%s fail (%s) at %ux%ux%u@%u: expected = %llu, got = %u\n",294name, test, in.x, in.y, in.slice, in.sample, out.addr, addr);295return false;296}297}298}299}300}301return true;302}303304static void run_dcc_address_test(const char *name, const struct radeon_info *info, bool full)305{306unsigned total = 0;307unsigned fails = 0;308unsigned swizzle_mode = info->chip_class == GFX9 ? ADDR_SW_64KB_S_X : ADDR_SW_64KB_R_X;309unsigned last_size, max_samples, min_bpp, max_bpp;310311if (full) {312last_size = 6*6 - 1;313max_samples = 8;314min_bpp = 8;315max_bpp = 128;316} else {317/* The test coverage is reduced for Gitlab CI because it timeouts. */318last_size = 0;319max_samples = 2;320min_bpp = 32;321max_bpp = 64;322}323324#ifdef HAVE_OPENMP325#pragma omp parallel for326#endif327for (unsigned size = 0; size <= last_size; size++) {328unsigned width = 8 + 379 * (size % 6);329unsigned height = 8 + 379 * ((size / 6) % 6);330331struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);332ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);333334unsigned local_fails = 0;335unsigned local_total = 0;336337for (unsigned bpp = min_bpp; bpp <= max_bpp; bpp *= 2) {338/* addrlib can do DccAddrFromCoord with MSAA images only on gfx9 */339for (unsigned samples = 1; samples <= (info->chip_class == GFX9 ? max_samples : 1); samples *= 2) {340for (int rb_aligned = true; rb_aligned >= (samples > 1 ? true : false); rb_aligned--) {341for (int pipe_aligned = true; pipe_aligned >= (samples > 1 ? true : false); pipe_aligned--) {342for (unsigned mrt_index = 0; mrt_index < 2; mrt_index++) {343unsigned depth = 2;344char test[256];345346snprintf(test, sizeof(test), "%ux%ux%u %ubpp %u samples rb:%u pipe:%u",347width, height, depth, bpp, samples, rb_aligned, pipe_aligned);348349if (one_dcc_address_test(name, test, addrlib, info, width, height, depth, samples,350bpp, swizzle_mode, pipe_aligned, rb_aligned, mrt_index,3510, 0, 0, 0)) {352} else {353local_fails++;354}355local_total++;356}357}358}359}360}361362ac_addrlib_destroy(ac_addrlib);363p_atomic_add(&fails, local_fails);364p_atomic_add(&total, local_total);365}366printf("%16s total: %u, fail: %u\n", name, total, fails);367}368369/* HTILE address computation without mipmapping. */370static unsigned gfx10_htile_addr_from_coord(const struct radeon_info *info,371const uint16_t *equation,372unsigned meta_block_width,373unsigned meta_block_height,374unsigned htile_pitch, unsigned htile_slice_size,375unsigned x, unsigned y, unsigned z,376unsigned pipe_xor)377{378unsigned meta_block_width_log2 = util_logbase2(meta_block_width);379unsigned meta_block_height_log2 = util_logbase2(meta_block_height);380unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 - 4;381382return gfx10_meta_addr_from_coord(info, equation,383meta_block_width, meta_block_height,384blkSizeLog2,385htile_pitch, htile_slice_size,386x, y, z, pipe_xor);387}388389static bool one_htile_address_test(const char *name, const char *test, ADDR_HANDLE addrlib,390const struct radeon_info *info,391unsigned width, unsigned height, unsigned depth,392unsigned bpp, unsigned swizzle_mode,393unsigned start_x, unsigned start_y, unsigned start_z)394{395ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};396ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};397ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};398ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};399ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_INPUT in = {0};400ADDR2_COMPUTE_HTILE_ADDRFROMCOORD_OUTPUT out = {0};401ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};402403hout.pMipInfo = meta_mip_info;404405/* Compute HTILE info. */406hin.hTileFlags.pipeAligned = 1;407hin.hTileFlags.rbAligned = 1;408hin.depthFlags.depth = 1;409hin.depthFlags.texture = 1;410hin.depthFlags.opt4space = 1;411hin.swizzleMode = in.swizzleMode = xin.swizzleMode = swizzle_mode;412hin.unalignedWidth = in.unalignedWidth = width;413hin.unalignedHeight = in.unalignedHeight = height;414hin.numSlices = in.numSlices = depth;415hin.numMipLevels = in.numMipLevels = 1; /* addrlib can't do HtileAddrFromCoord with mipmapping. */416hin.firstMipIdInTail = 1;417418int ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);419assert(ret == ADDR_OK);420421/* Compute xor. */422static AddrFormat format[] = {423ADDR_FMT_8, /* unused */424ADDR_FMT_16,425ADDR_FMT_32,426};427xin.flags = hin.depthFlags;428xin.resourceType = ADDR_RSRC_TEX_2D;429xin.format = format[util_logbase2(bpp / 8)];430xin.numFrags = xin.numSamples = in.numSamples = 1;431432ret = Addr2ComputePipeBankXor(addrlib, &xin, &xout);433assert(ret == ADDR_OK);434435in.hTileFlags = hin.hTileFlags;436in.depthflags = xin.flags;437in.bpp = bpp;438in.pipeXor = xout.pipeBankXor;439440for (in.x = start_x; in.x < width; in.x++) {441for (in.y = start_y; in.y < height; in.y++) {442for (in.slice = start_z; in.slice < depth; in.slice++) {443int r = Addr2ComputeHtileAddrFromCoord(addrlib, &in, &out);444if (r != ADDR_OK) {445printf("%s addrlib error: %s\n", name, test);446abort();447}448449unsigned addr =450gfx10_htile_addr_from_coord(info, hout.equation.gfx10_bits,451hout.metaBlkWidth, hout.metaBlkHeight,452hout.pitch, hout.sliceSize,453in.x, in.y, in.slice, in.pipeXor);454if (out.addr != addr) {455printf("%s fail (%s) at %ux%ux%u: expected = %llu, got = %u\n",456name, test, in.x, in.y, in.slice, out.addr, addr);457return false;458}459}460}461}462463return true;464}465466static void run_htile_address_test(const char *name, const struct radeon_info *info, bool full)467{468unsigned total = 0;469unsigned fails = 0;470unsigned first_size = 0, last_size = 6*6 - 1, max_bpp = 32;471472/* The test coverage is reduced for Gitlab CI because it timeouts. */473if (!full) {474first_size = last_size = 0;475}476477#ifdef HAVE_OPENMP478#pragma omp parallel for479#endif480for (unsigned size = first_size; size <= last_size; size++) {481unsigned width = 8 + 379 * (size % 6);482unsigned height = 8 + 379 * (size / 6);483484struct ac_addrlib *ac_addrlib = ac_addrlib_create(info, NULL);485ADDR_HANDLE addrlib = ac_addrlib_get_handle(ac_addrlib);486487for (unsigned depth = 1; depth <= 2; depth *= 2) {488for (unsigned bpp = 16; bpp <= max_bpp; bpp *= 2) {489if (one_htile_address_test(name, name, addrlib, info, width, height, depth,490bpp, ADDR_SW_64KB_Z_X, 0, 0, 0)) {491} else {492p_atomic_inc(&fails);493}494p_atomic_inc(&total);495}496}497498ac_addrlib_destroy(ac_addrlib);499}500printf("%16s total: %u, fail: %u\n", name, total, fails);501}502int main(int argc, char **argv)503{504bool full = false;505506if (argc == 2 && !strcmp(argv[1], "--full"))507full = true;508else509puts("Specify --full to run the full test.");510511puts("DCC:");512for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {513struct radeon_info info = get_radeon_info(&testcases[i]);514515run_dcc_address_test(testcases[i].name, &info, full);516}517518puts("HTILE:");519for (unsigned i = 0; i < ARRAY_SIZE(testcases); ++i) {520struct radeon_info info = get_radeon_info(&testcases[i]);521522/* Only GFX10+ is currently supported. */523if (info.chip_class < GFX10)524continue;525526run_htile_address_test(testcases[i].name, &info, full);527}528529return 0;530}531532533