Path: blob/21.2-virgl/src/gallium/drivers/r600/r600_test_dma.c
4570 views
/*1* Copyright 2016 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22*/2324/* This file implements randomized SDMA texture blit tests. */2526#include "r600_pipe_common.h"27#include "util/u_surface.h"28#include "util/rand_xor.h"2930static uint64_t seed_xorshift128plus[2];3132#define RAND_NUM_SIZE 83334/* The GPU blits are emulated on the CPU using these CPU textures. */3536struct cpu_texture {37uint8_t *ptr;38uint64_t size;39uint64_t layer_stride;40unsigned stride;41};4243static void alloc_cpu_texture(struct cpu_texture *tex,44struct pipe_resource *templ, int bpp)45{46tex->stride = align(templ->width0 * bpp, RAND_NUM_SIZE);47tex->layer_stride = (uint64_t)tex->stride * templ->height0;48tex->size = tex->layer_stride * templ->array_size;49tex->ptr = malloc(tex->size);50assert(tex->ptr);51}5253static void set_random_pixels(struct pipe_context *ctx,54struct pipe_resource *tex,55struct cpu_texture *cpu)56{57struct pipe_transfer *t;58uint8_t *map;59unsigned x,y,z;6061map = pipe_texture_map_3d(ctx, tex, 0, PIPE_MAP_WRITE,620, 0, 0, tex->width0, tex->height0,63tex->array_size, &t);64assert(map);6566for (z = 0; z < tex->array_size; z++) {67for (y = 0; y < tex->height0; y++) {68uint64_t *ptr = (uint64_t*)69(map + t->layer_stride*z + t->stride*y);70uint64_t *ptr_cpu = (uint64_t*)71(cpu->ptr + cpu->layer_stride*z + cpu->stride*y);72unsigned size = cpu->stride / RAND_NUM_SIZE;7374assert(t->stride % RAND_NUM_SIZE == 0);75assert(cpu->stride % RAND_NUM_SIZE == 0);7677for (x = 0; x < size; x++) {78*ptr++ = *ptr_cpu++ =79rand_xorshift128plus(seed_xorshift128plus);80}81}82}8384pipe_texture_unmap(ctx, t);85}8687static bool compare_textures(struct pipe_context *ctx,88struct pipe_resource *tex,89struct cpu_texture *cpu, int bpp)90{91struct pipe_transfer *t;92uint8_t *map;93int y,z;94bool pass = true;9596map = pipe_texture_map_3d(ctx, tex, 0, PIPE_MAP_READ,970, 0, 0, tex->width0, tex->height0,98tex->array_size, &t);99assert(map);100101for (z = 0; z < tex->array_size; z++) {102for (y = 0; y < tex->height0; y++) {103uint8_t *ptr = map + t->layer_stride*z + t->stride*y;104uint8_t *cpu_ptr = cpu->ptr +105cpu->layer_stride*z + cpu->stride*y;106107if (memcmp(ptr, cpu_ptr, tex->width0 * bpp)) {108pass = false;109goto done;110}111}112}113done:114pipe_texture_unmap(ctx, t);115return pass;116}117118static enum pipe_format get_format_from_bpp(int bpp)119{120switch (bpp) {121case 1:122return PIPE_FORMAT_R8_UINT;123case 2:124return PIPE_FORMAT_R16_UINT;125case 4:126return PIPE_FORMAT_R32_UINT;127case 8:128return PIPE_FORMAT_R32G32_UINT;129case 16:130return PIPE_FORMAT_R32G32B32A32_UINT;131default:132assert(0);133return PIPE_FORMAT_NONE;134}135}136137static const char *array_mode_to_string(struct r600_common_screen *rscreen,138struct radeon_surf *surf)139{140if (rscreen->chip_class >= GFX9) {141/* TODO */142return " UNKNOWN";143} else {144switch (surf->u.legacy.level[0].mode) {145case RADEON_SURF_MODE_LINEAR_ALIGNED:146return "LINEAR_ALIGNED";147case RADEON_SURF_MODE_1D:148return "1D_TILED_THIN1";149case RADEON_SURF_MODE_2D:150return "2D_TILED_THIN1";151default:152assert(0);153return " UNKNOWN";154}155}156}157158static unsigned generate_max_tex_side(unsigned max_tex_side)159{160switch (rand() % 4) {161case 0:162/* Try to hit large sizes in 1/4 of the cases. */163return max_tex_side;164case 1:165/* Try to hit 1D tiling in 1/4 of the cases. */166return 128;167default:168/* Try to hit common sizes in 2/4 of the cases. */169return 2048;170}171}172173void r600_test_dma(struct r600_common_screen *rscreen)174{175struct pipe_screen *screen = &rscreen->b;176struct pipe_context *ctx = screen->context_create(screen, NULL, 0);177struct r600_common_context *rctx = (struct r600_common_context*)ctx;178uint64_t max_alloc_size;179unsigned i, iterations, num_partial_copies, max_tex_side;180unsigned num_pass = 0, num_fail = 0;181182max_tex_side = screen->get_param(screen, PIPE_CAP_MAX_TEXTURE_2D_SIZE);183184/* Max 128 MB allowed for both textures. */185max_alloc_size = 128 * 1024 * 1024;186187/* the seed for random test parameters */188srand(0x9b47d95b);189/* the seed for random pixel data */190s_rand_xorshift128plus(seed_xorshift128plus, false);191192iterations = 1000000000; /* just kill it when you are bored */193num_partial_copies = 30;194195/* These parameters are randomly generated per test:196* - whether to do one whole-surface copy or N partial copies per test197* - which tiling modes to use (LINEAR_ALIGNED, 1D, 2D)198* - which texture dimensions to use199* - whether to use VRAM (all tiling modes) and GTT (staging, linear200* only) allocations201* - random initial pixels in src202* - generate random subrectangle copies for partial blits203*/204for (i = 0; i < iterations; i++) {205struct pipe_resource tsrc = {}, tdst = {}, *src, *dst;206struct r600_texture *rdst;207struct r600_texture *rsrc;208struct cpu_texture src_cpu, dst_cpu;209unsigned bpp, max_width, max_height, max_depth, j, num;210unsigned gfx_blits = 0, dma_blits = 0, max_tex_side_gen;211unsigned max_tex_layers;212bool pass;213bool do_partial_copies = rand() & 1;214215/* generate a random test case */216tsrc.target = tdst.target = PIPE_TEXTURE_2D_ARRAY;217tsrc.depth0 = tdst.depth0 = 1;218219bpp = 1 << (rand() % 5);220tsrc.format = tdst.format = get_format_from_bpp(bpp);221222max_tex_side_gen = generate_max_tex_side(max_tex_side);223max_tex_layers = rand() % 4 ? 1 : 5;224225tsrc.width0 = (rand() % max_tex_side_gen) + 1;226tsrc.height0 = (rand() % max_tex_side_gen) + 1;227tsrc.array_size = (rand() % max_tex_layers) + 1;228229/* Have a 1/4 chance of getting power-of-two dimensions. */230if (rand() % 4 == 0) {231tsrc.width0 = util_next_power_of_two(tsrc.width0);232tsrc.height0 = util_next_power_of_two(tsrc.height0);233}234235if (!do_partial_copies) {236/* whole-surface copies only, same dimensions */237tdst = tsrc;238} else {239max_tex_side_gen = generate_max_tex_side(max_tex_side);240max_tex_layers = rand() % 4 ? 1 : 5;241242/* many partial copies, dimensions can be different */243tdst.width0 = (rand() % max_tex_side_gen) + 1;244tdst.height0 = (rand() % max_tex_side_gen) + 1;245tdst.array_size = (rand() % max_tex_layers) + 1;246247/* Have a 1/4 chance of getting power-of-two dimensions. */248if (rand() % 4 == 0) {249tdst.width0 = util_next_power_of_two(tdst.width0);250tdst.height0 = util_next_power_of_two(tdst.height0);251}252}253254/* check texture sizes */255if ((uint64_t)tsrc.width0 * tsrc.height0 * tsrc.array_size * bpp +256(uint64_t)tdst.width0 * tdst.height0 * tdst.array_size * bpp >257max_alloc_size) {258/* too large, try again */259i--;260continue;261}262263/* VRAM + the tiling mode depends on dimensions (3/4 of cases),264* or GTT + linear only (1/4 of cases)265*/266tsrc.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;267tdst.usage = rand() % 4 ? PIPE_USAGE_DEFAULT : PIPE_USAGE_STAGING;268269/* Allocate textures (both the GPU and CPU copies).270* The CPU will emulate what the GPU should be doing.271*/272src = screen->resource_create(screen, &tsrc);273dst = screen->resource_create(screen, &tdst);274assert(src);275assert(dst);276rdst = (struct r600_texture*)dst;277rsrc = (struct r600_texture*)src;278alloc_cpu_texture(&src_cpu, &tsrc, bpp);279alloc_cpu_texture(&dst_cpu, &tdst, bpp);280281printf("%4u: dst = (%5u x %5u x %u, %s), "282" src = (%5u x %5u x %u, %s), bpp = %2u, ",283i, tdst.width0, tdst.height0, tdst.array_size,284array_mode_to_string(rscreen, &rdst->surface),285tsrc.width0, tsrc.height0, tsrc.array_size,286array_mode_to_string(rscreen, &rsrc->surface), bpp);287fflush(stdout);288289/* set src pixels */290set_random_pixels(ctx, src, &src_cpu);291292/* clear dst pixels */293rctx->clear_buffer(ctx, dst, 0, rdst->surface.surf_size, 0, true);294memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);295296/* preparation */297max_width = MIN2(tsrc.width0, tdst.width0);298max_height = MIN2(tsrc.height0, tdst.height0);299max_depth = MIN2(tsrc.array_size, tdst.array_size);300301num = do_partial_copies ? num_partial_copies : 1;302for (j = 0; j < num; j++) {303int width, height, depth;304int srcx, srcy, srcz, dstx, dsty, dstz;305struct pipe_box box;306unsigned old_num_draw_calls = rctx->num_draw_calls;307unsigned old_num_dma_calls = rctx->num_dma_calls;308309if (!do_partial_copies) {310/* copy whole src to dst */311width = max_width;312height = max_height;313depth = max_depth;314315srcx = srcy = srcz = dstx = dsty = dstz = 0;316} else {317/* random sub-rectangle copies from src to dst */318depth = (rand() % max_depth) + 1;319srcz = rand() % (tsrc.array_size - depth + 1);320dstz = rand() % (tdst.array_size - depth + 1);321322/* special code path to hit the tiled partial copies */323if (!rsrc->surface.is_linear &&324!rdst->surface.is_linear &&325rand() & 1) {326if (max_width < 8 || max_height < 8)327continue;328width = ((rand() % (max_width / 8)) + 1) * 8;329height = ((rand() % (max_height / 8)) + 1) * 8;330331srcx = rand() % (tsrc.width0 - width + 1) & ~0x7;332srcy = rand() % (tsrc.height0 - height + 1) & ~0x7;333334dstx = rand() % (tdst.width0 - width + 1) & ~0x7;335dsty = rand() % (tdst.height0 - height + 1) & ~0x7;336} else {337/* just make sure that it doesn't divide by zero */338assert(max_width > 0 && max_height > 0);339340width = (rand() % max_width) + 1;341height = (rand() % max_height) + 1;342343srcx = rand() % (tsrc.width0 - width + 1);344srcy = rand() % (tsrc.height0 - height + 1);345346dstx = rand() % (tdst.width0 - width + 1);347dsty = rand() % (tdst.height0 - height + 1);348}349350/* special code path to hit out-of-bounds reads in L2T */351if (rsrc->surface.is_linear &&352!rdst->surface.is_linear &&353rand() % 4 == 0) {354srcx = 0;355srcy = 0;356srcz = 0;357}358}359360/* GPU copy */361u_box_3d(srcx, srcy, srcz, width, height, depth, &box);362rctx->dma_copy(ctx, dst, 0, dstx, dsty, dstz, src, 0, &box);363364/* See which engine was used. */365gfx_blits += rctx->num_draw_calls > old_num_draw_calls;366dma_blits += rctx->num_dma_calls > old_num_dma_calls;367368/* CPU copy */369util_copy_box(dst_cpu.ptr, tdst.format, dst_cpu.stride,370dst_cpu.layer_stride,371dstx, dsty, dstz, width, height, depth,372src_cpu.ptr, src_cpu.stride,373src_cpu.layer_stride,374srcx, srcy, srcz);375}376377pass = compare_textures(ctx, dst, &dst_cpu, bpp);378if (pass)379num_pass++;380else381num_fail++;382383printf("BLITs: GFX = %2u, DMA = %2u, %s [%u/%u]\n",384gfx_blits, dma_blits, pass ? "pass" : "fail",385num_pass, num_pass+num_fail);386387/* cleanup */388pipe_resource_reference(&src, NULL);389pipe_resource_reference(&dst, NULL);390free(src_cpu.ptr);391free(dst_cpu.ptr);392}393394ctx->destroy(ctx);395exit(0);396}397398399