Path: blob/21.2-virgl/src/gallium/tests/trivial/compute.c
4561 views
/*1* Copyright (C) 2011 Francisco Jerez.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining5* a copy of this software and associated documentation files (the6* "Software"), to deal in the Software without restriction, including7* without limitation the rights to use, copy, modify, merge, publish,8* distribute, sublicense, and/or sell copies of the Software, and to9* permit persons to whom the Software is furnished to do so, subject to10* the following conditions:11*12* The above copyright notice and this permission notice (including the13* next paragraph) shall be included in all copies or substantial14* portions of the Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,17* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF18* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.19* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE20* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION21* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION22* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.23*24*/2526#include <fcntl.h>27#include <stdio.h>28#include <sys/stat.h>29#include <inttypes.h>30#include "pipe/p_state.h"31#include "pipe/p_context.h"32#include "pipe/p_screen.h"33#include "pipe/p_defines.h"34#include "pipe/p_shader_tokens.h"35#include "util/u_memory.h"36#include "util/u_inlines.h"37#include "util/u_sampler.h"38#include "util/format/u_format.h"39#include "tgsi/tgsi_text.h"40#include "pipe-loader/pipe_loader.h"4142#define MAX_RESOURCES 44344struct context {45struct pipe_loader_device *dev;46struct pipe_screen *screen;47struct pipe_context *pipe;48void *hwcs;49void *hwsmp[MAX_RESOURCES];50struct pipe_resource *tex[MAX_RESOURCES];51bool tex_rw[MAX_RESOURCES];52struct pipe_sampler_view *view[MAX_RESOURCES];53struct pipe_surface *surf[MAX_RESOURCES];54};5556#define DUMP_COMPUTE_PARAM(p, c) do { \57uint64_t __v[4]; \58int __i, __n; \59\60__n = ctx->screen->get_compute_param(ctx->screen, \61PIPE_SHADER_IR_TGSI, \62c, __v); \63printf("%s: {", #c); \64\65for (__i = 0; __i < __n / sizeof(*__v); ++__i) \66printf(" %"PRIu64, __v[__i]); \67\68printf(" }\n"); \69} while (0)7071static void init_ctx(struct context *ctx)72{73ASSERTED int ret;7475ret = pipe_loader_probe(&ctx->dev, 1);76assert(ret);7778ctx->screen = pipe_loader_create_screen(ctx->dev);79assert(ctx->screen);8081ctx->pipe = ctx->screen->context_create(ctx->screen, NULL, 0);82assert(ctx->pipe);8384DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION);85DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE);86DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE);87}8889static void destroy_ctx(struct context *ctx)90{91ctx->pipe->destroy(ctx->pipe);92ctx->screen->destroy(ctx->screen);93pipe_loader_release(&ctx->dev, 1);94FREE(ctx);95}9697static char *98preprocess_prog(struct context *ctx, const char *src, const char *defs)99{100const char header[] =101"#define RGLOBAL RES[32767]\n"102"#define RLOCAL RES[32766]\n"103"#define RPRIVATE RES[32765]\n"104"#define RINPUT RES[32764]\n";105char cmd[512];106char tmp[] = "/tmp/test-compute.tgsi-XXXXXX";107char *buf;108int fd, ret;109struct stat st;110FILE *p;111112/* Open a temporary file */113fd = mkstemp(tmp);114assert(fd >= 0);115snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s",116defs ? defs : "", tmp);117118/* Preprocess */119p = popen(cmd, "w");120fwrite(header, strlen(header), 1, p);121fwrite(src, strlen(src), 1, p);122ret = pclose(p);123assert(!ret);124125/* Read back */126ret = fstat(fd, &st);127assert(!ret);128129buf = malloc(st.st_size + 1);130ret = read(fd, buf, st.st_size);131assert(ret == st.st_size);132buf[ret] = 0;133134/* Clean up */135close(fd);136unlink(tmp);137138return buf;139}140141static void init_prog(struct context *ctx, unsigned local_sz,142unsigned private_sz, unsigned input_sz,143const char *src, const char *defs)144{145struct pipe_context *pipe = ctx->pipe;146struct tgsi_token prog[1024];147struct pipe_compute_state cs = {148.ir_type = PIPE_SHADER_IR_TGSI,149.prog = prog,150.req_local_mem = local_sz,151.req_private_mem = private_sz,152.req_input_mem = input_sz153};154char *psrc = preprocess_prog(ctx, src, defs);155ASSERTED int ret;156157ret = tgsi_text_translate(psrc, prog, ARRAY_SIZE(prog));158assert(ret);159free(psrc);160161ctx->hwcs = pipe->create_compute_state(pipe, &cs);162assert(ctx->hwcs);163164pipe->bind_compute_state(pipe, ctx->hwcs);165}166167static void destroy_prog(struct context *ctx)168{169struct pipe_context *pipe = ctx->pipe;170171pipe->delete_compute_state(pipe, ctx->hwcs);172ctx->hwcs = NULL;173}174175static void init_tex(struct context *ctx, int slot,176enum pipe_texture_target target, bool rw,177enum pipe_format format, int w, int h,178void (*init)(void *, int, int, int))179{180struct pipe_context *pipe = ctx->pipe;181struct pipe_resource **tex = &ctx->tex[slot];182struct pipe_resource ttex = {183.target = target,184.format = format,185.width0 = w,186.height0 = h,187.depth0 = 1,188.array_size = 1,189.bind = (PIPE_BIND_SAMPLER_VIEW |190PIPE_BIND_COMPUTE_RESOURCE |191PIPE_BIND_GLOBAL)192};193int dx = util_format_get_blocksize(format);194int dy = util_format_get_stride(format, w);195int nx = (target == PIPE_BUFFER ? (w / dx) :196util_format_get_nblocksx(format, w));197int ny = (target == PIPE_BUFFER ? 1 :198util_format_get_nblocksy(format, h));199struct pipe_transfer *xfer;200char *map;201int x, y;202203*tex = ctx->screen->resource_create(ctx->screen, &ttex);204assert(*tex);205206map = pipe->texture_map(pipe, *tex, 0, PIPE_MAP_WRITE,207&(struct pipe_box) { .width = w,208.height = h,209.depth = 1 }, &xfer);210assert(xfer);211assert(map);212213for (y = 0; y < ny; ++y) {214for (x = 0; x < nx; ++x) {215init(map + y * dy + x * dx, slot, x, y);216}217}218219pipe->texture_unmap(pipe, xfer);220221ctx->tex_rw[slot] = rw;222}223224static bool default_check(void *x, void *y, int sz) {225return !memcmp(x, y, sz);226}227228static void check_tex(struct context *ctx, int slot,229void (*expect)(void *, int, int, int),230bool (*check)(void *, void *, int))231{232struct pipe_context *pipe = ctx->pipe;233struct pipe_resource *tex = ctx->tex[slot];234int dx = util_format_get_blocksize(tex->format);235int dy = util_format_get_stride(tex->format, tex->width0);236int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) :237util_format_get_nblocksx(tex->format, tex->width0));238int ny = (tex->target == PIPE_BUFFER ? 1 :239util_format_get_nblocksy(tex->format, tex->height0));240struct pipe_transfer *xfer;241char *map;242int x = 0, y, i;243int err = 0;244245if (!check)246check = default_check;247248map = pipe->texture_map(pipe, tex, 0, PIPE_MAP_READ,249&(struct pipe_box) { .width = tex->width0,250.height = tex->height0,251.depth = 1 }, &xfer);252assert(xfer);253assert(map);254255for (y = 0; y < ny; ++y) {256for (x = 0; x < nx; ++x) {257uint32_t exp[4];258uint32_t *res = (uint32_t *)(map + y * dy + x * dx);259260expect(exp, slot, x, y);261if (check(res, exp, dx) || (++err) > 20)262continue;263264if (dx < 4) {265uint32_t u = 0, v = 0;266267for (i = 0; i < dx; i++) {268u |= ((uint8_t *)exp)[i] << (8 * i);269v |= ((uint8_t *)res)[i] << (8 * i);270}271printf("(%d, %d): got 0x%x, expected 0x%x\n",272x, y, v, u);273} else {274for (i = 0; i < dx / 4; i++) {275printf("(%d, %d)[%d]: got 0x%x/%f,"276" expected 0x%x/%f\n", x, y, i,277res[i], ((float *)res)[i],278exp[i], ((float *)exp)[i]);279}280}281}282}283284pipe->texture_unmap(pipe, xfer);285286if (err)287printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err);288else289printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y);290}291292static void destroy_tex(struct context *ctx)293{294int i;295296for (i = 0; i < MAX_RESOURCES; ++i) {297if (ctx->tex[i])298pipe_resource_reference(&ctx->tex[i], NULL);299}300}301302static void init_sampler_views(struct context *ctx, const int *slots)303{304struct pipe_context *pipe = ctx->pipe;305struct pipe_sampler_view tview;306int i;307308for (i = 0; *slots >= 0; ++i, ++slots) {309u_sampler_view_default_template(&tview, ctx->tex[*slots],310ctx->tex[*slots]->format);311312ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots],313&tview);314assert(ctx->view[i]);315}316317pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, i, 0, ctx->view);318}319320static void destroy_sampler_views(struct context *ctx)321{322struct pipe_context *pipe = ctx->pipe;323int i;324325pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, MAX_RESOURCES, NULL);326327for (i = 0; i < MAX_RESOURCES; ++i) {328if (ctx->view[i]) {329pipe->sampler_view_destroy(pipe, ctx->view[i]);330ctx->view[i] = NULL;331}332}333}334335static void init_compute_resources(struct context *ctx, const int *slots)336{337struct pipe_context *pipe = ctx->pipe;338int i;339340for (i = 0; *slots >= 0; ++i, ++slots) {341struct pipe_surface tsurf = {342.format = ctx->tex[*slots]->format,343.writable = ctx->tex_rw[*slots]344};345346if (ctx->tex[*slots]->target == PIPE_BUFFER)347tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1;348349ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots],350&tsurf);351assert(ctx->surf[i]);352}353354pipe->set_compute_resources(pipe, 0, i, ctx->surf);355}356357static void destroy_compute_resources(struct context *ctx)358{359struct pipe_context *pipe = ctx->pipe;360int i;361362pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL);363364for (i = 0; i < MAX_RESOURCES; ++i) {365if (ctx->surf[i]) {366pipe->surface_destroy(pipe, ctx->surf[i]);367ctx->surf[i] = NULL;368}369}370}371372static void init_sampler_states(struct context *ctx, int n)373{374struct pipe_context *pipe = ctx->pipe;375struct pipe_sampler_state smp = {376.normalized_coords = 1,377};378int i;379380for (i = 0; i < n; ++i) {381ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp);382assert(ctx->hwsmp[i]);383}384385pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE, 0, i, ctx->hwsmp);386}387388static void destroy_sampler_states(struct context *ctx)389{390struct pipe_context *pipe = ctx->pipe;391int i;392393pipe->bind_sampler_states(pipe, PIPE_SHADER_COMPUTE,3940, MAX_RESOURCES, NULL);395396for (i = 0; i < MAX_RESOURCES; ++i) {397if (ctx->hwsmp[i]) {398pipe->delete_sampler_state(pipe, ctx->hwsmp[i]);399ctx->hwsmp[i] = NULL;400}401}402}403404static void init_globals(struct context *ctx, const int *slots,405uint32_t **handles)406{407struct pipe_context *pipe = ctx->pipe;408struct pipe_resource *res[MAX_RESOURCES];409int i;410411for (i = 0; *slots >= 0; ++i, ++slots)412res[i] = ctx->tex[*slots];413414pipe->set_global_binding(pipe, 0, i, res, handles);415}416417static void destroy_globals(struct context *ctx)418{419struct pipe_context *pipe = ctx->pipe;420421pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL);422}423424static void launch_grid(struct context *ctx, const uint *block_layout,425const uint *grid_layout, uint32_t pc,426void *input)427{428struct pipe_context *pipe = ctx->pipe;429struct pipe_grid_info info;430int i;431432for (i = 0; i < 3; i++) {433info.block[i] = block_layout[i];434info.grid[i] = grid_layout[i];435}436info.pc = pc;437info.input = input;438439pipe->launch_grid(pipe, &info);440}441442static void test_default_init(void *p, int s, int x, int y)443{444*(uint32_t *)p = 0xdeadbeef;445}446447/* test_system_values */448static void test_system_values_expect(void *p, int s, int x, int y)449{450int id = x / 16, sv = (x % 16) / 4, c = x % 4;451int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 };452int bsz[] = { 4, 3, 5, 1};453int gsz[] = { 5, 4, 1, 1};454455switch (sv) {456case 0:457*(uint32_t *)p = tid[c] / bsz[c];458break;459case 1:460*(uint32_t *)p = bsz[c];461break;462case 2:463*(uint32_t *)p = gsz[c];464break;465case 3:466*(uint32_t *)p = tid[c] % bsz[c];467break;468}469}470471static void test_system_values(struct context *ctx)472{473const char *src = "COMP\n"474"DCL RES[0], BUFFER, RAW, WR\n"475"DCL SV[0], BLOCK_ID[0]\n"476"DCL SV[1], BLOCK_SIZE[0]\n"477"DCL SV[2], GRID_SIZE[0]\n"478"DCL SV[3], THREAD_ID[0]\n"479"DCL TEMP[0], LOCAL\n"480"DCL TEMP[1], LOCAL\n"481"IMM UINT32 { 64, 0, 0, 0 }\n"482"IMM UINT32 { 16, 0, 0, 0 }\n"483"IMM UINT32 { 0, 0, 0, 0 }\n"484"\n"485"BGNSUB"486" UMUL TEMP[0], SV[0], SV[1]\n"487" UADD TEMP[0], TEMP[0], SV[3]\n"488" UMUL TEMP[1], SV[1], SV[2]\n"489" UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n"490" UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n"491" UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n"492" UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n"493" UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n"494" UMUL TEMP[0].x, TEMP[0], IMM[0]\n"495" STORE RES[0].xyzw, TEMP[0], SV[0]\n"496" UADD TEMP[0].x, TEMP[0], IMM[1]\n"497" STORE RES[0].xyzw, TEMP[0], SV[1]\n"498" UADD TEMP[0].x, TEMP[0], IMM[1]\n"499" STORE RES[0].xyzw, TEMP[0], SV[2]\n"500" UADD TEMP[0].x, TEMP[0], IMM[1]\n"501" STORE RES[0].xyzw, TEMP[0], SV[3]\n"502" RET\n"503"ENDSUB\n";504505printf("- %s\n", __func__);506507init_prog(ctx, 0, 0, 0, src, NULL);508init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,50976800, 0, test_default_init);510init_compute_resources(ctx, (int []) { 0, -1 });511launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL);512check_tex(ctx, 0, test_system_values_expect, NULL);513destroy_compute_resources(ctx);514destroy_tex(ctx);515destroy_prog(ctx);516}517518/* test_resource_access */519static void test_resource_access_init0(void *p, int s, int x, int y)520{521*(float *)p = 8.0 - (float)x;522}523524static void test_resource_access_expect(void *p, int s, int x, int y)525{526*(float *)p = 8.0 - (float)((x + 4 * y) & 0x3f);527}528529static void test_resource_access(struct context *ctx)530{531const char *src = "COMP\n"532"DCL RES[0], BUFFER, RAW, WR\n"533"DCL RES[1], 2D, RAW, WR\n"534"DCL SV[0], BLOCK_ID[0]\n"535"DCL TEMP[0], LOCAL\n"536"DCL TEMP[1], LOCAL\n"537"IMM UINT32 { 15, 0, 0, 0 }\n"538"IMM UINT32 { 16, 1, 0, 0 }\n"539"\n"540" BGNSUB\n"541" UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n"542" AND TEMP[0].x, TEMP[0], IMM[0]\n"543" UMUL TEMP[0].x, TEMP[0], IMM[1]\n"544" LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n"545" UMUL TEMP[1], SV[0], IMM[1]\n"546" STORE RES[1].xyzw, TEMP[1], TEMP[0]\n"547" RET\n"548" ENDSUB\n";549550printf("- %s\n", __func__);551552init_prog(ctx, 0, 0, 0, src, NULL);553init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,554256, 0, test_resource_access_init0);555init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,55660, 12, test_default_init);557init_compute_resources(ctx, (int []) { 0, 1, -1 });558launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL);559check_tex(ctx, 1, test_resource_access_expect, NULL);560destroy_compute_resources(ctx);561destroy_tex(ctx);562destroy_prog(ctx);563}564565/* test_function_calls */566static void test_function_calls_init(void *p, int s, int x, int y)567{568*(uint32_t *)p = 15 * y + x;569}570571static void test_function_calls_expect(void *p, int s, int x, int y)572{573*(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ;574}575576static void test_function_calls(struct context *ctx)577{578const char *src = "COMP\n"579"DCL RES[0], 2D, RAW, WR\n"580"DCL SV[0], BLOCK_ID[0]\n"581"DCL SV[1], BLOCK_SIZE[0]\n"582"DCL SV[2], GRID_SIZE[0]\n"583"DCL SV[3], THREAD_ID[0]\n"584"DCL TEMP[0]\n"585"DCL TEMP[1]\n"586"DCL TEMP[2], LOCAL\n"587"IMM UINT32 { 0, 11, 22, 33 }\n"588"IMM FLT32 { 11, 33, 55, 99 }\n"589"IMM UINT32 { 4, 1, 0, 0 }\n"590"IMM UINT32 { 12, 0, 0, 0 }\n"591"\n"592"00: BGNSUB\n"593"01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"594"02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"595"03: USLT TEMP[0].x, TEMP[0], IMM[0]\n"596"04: RET\n"597"05: ENDSUB\n"598"06: BGNSUB\n"599"07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n"600"08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n"601"09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n"602"10: IF TEMP[0].xxxx\n"603"11: CAL :0\n"604"12: ENDIF\n"605"13: RET\n"606"14: ENDSUB\n"607"15: BGNSUB\n"608"16: UMUL TEMP[2], SV[0], SV[1]\n"609"17: UADD TEMP[2], TEMP[2], SV[3]\n"610"18: UMUL TEMP[2], TEMP[2], IMM[2]\n"611"00: MOV TEMP[1].x, IMM[2].wwww\n"612"19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n"613"20: CAL :6\n"614"21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n"615"22: RET\n"616"23: ENDSUB\n";617618printf("- %s\n", __func__);619620init_prog(ctx, 0, 0, 0, src, NULL);621init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,62215, 12, test_function_calls_init);623init_compute_resources(ctx, (int []) { 0, -1 });624launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL);625check_tex(ctx, 0, test_function_calls_expect, NULL);626destroy_compute_resources(ctx);627destroy_tex(ctx);628destroy_prog(ctx);629}630631/* test_input_global */632static void test_input_global_expect(void *p, int s, int x, int y)633{634*(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0);635}636637static void test_input_global(struct context *ctx)638{639const char *src = "COMP\n"640"DCL SV[0], THREAD_ID[0]\n"641"DCL TEMP[0], LOCAL\n"642"DCL TEMP[1], LOCAL\n"643"IMM UINT32 { 8, 0, 0, 0 }\n"644"\n"645" BGNSUB\n"646" UMUL TEMP[0], SV[0], IMM[0]\n"647" LOAD TEMP[1].xy, RINPUT, TEMP[0]\n"648" LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n"649" UADD TEMP[1].x, TEMP[0], -TEMP[1]\n"650" STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n"651" RET\n"652" ENDSUB\n";653uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004,6540x10005, 0x10006, 0x10007, 0x10008 };655656printf("- %s\n", __func__);657658init_prog(ctx, 0, 0, 32, src, NULL);659init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,660test_default_init);661init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,662test_default_init);663init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,664test_default_init);665init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0,666test_default_init);667init_globals(ctx, (int []){ 0, 1, 2, 3, -1 },668(uint32_t *[]){ &input[1], &input[3],669&input[5], &input[7] });670launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input);671check_tex(ctx, 0, test_input_global_expect, NULL);672check_tex(ctx, 1, test_input_global_expect, NULL);673check_tex(ctx, 2, test_input_global_expect, NULL);674check_tex(ctx, 3, test_input_global_expect, NULL);675destroy_globals(ctx);676destroy_tex(ctx);677destroy_prog(ctx);678}679680/* test_private */681static void test_private_expect(void *p, int s, int x, int y)682{683*(uint32_t *)p = (x / 32) + x % 32;684}685686static void test_private(struct context *ctx)687{688const char *src = "COMP\n"689"DCL RES[0], BUFFER, RAW, WR\n"690"DCL SV[0], BLOCK_ID[0]\n"691"DCL SV[1], BLOCK_SIZE[0]\n"692"DCL SV[2], THREAD_ID[0]\n"693"DCL TEMP[0], LOCAL\n"694"DCL TEMP[1], LOCAL\n"695"DCL TEMP[2], LOCAL\n"696"IMM UINT32 { 128, 0, 0, 0 }\n"697"IMM UINT32 { 4, 0, 0, 0 }\n"698"\n"699" BGNSUB\n"700" UMUL TEMP[0].x, SV[0], SV[1]\n"701" UADD TEMP[0].x, TEMP[0], SV[2]\n"702" MOV TEMP[1].x, IMM[0].wwww\n"703" BGNLOOP\n"704" USEQ TEMP[2].x, TEMP[1], IMM[0]\n"705" IF TEMP[2]\n"706" BRK\n"707" ENDIF\n"708" UDIV TEMP[2].x, TEMP[1], IMM[1]\n"709" UADD TEMP[2].x, TEMP[2], TEMP[0]\n"710" STORE RPRIVATE.x, TEMP[1], TEMP[2]\n"711" UADD TEMP[1].x, TEMP[1], IMM[1]\n"712" ENDLOOP\n"713" MOV TEMP[1].x, IMM[0].wwww\n"714" UMUL TEMP[0].x, TEMP[0], IMM[0]\n"715" BGNLOOP\n"716" USEQ TEMP[2].x, TEMP[1], IMM[0]\n"717" IF TEMP[2]\n"718" BRK\n"719" ENDIF\n"720" LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n"721" STORE RES[0].x, TEMP[0], TEMP[2]\n"722" UADD TEMP[0].x, TEMP[0], IMM[1]\n"723" UADD TEMP[1].x, TEMP[1], IMM[1]\n"724" ENDLOOP\n"725" RET\n"726" ENDSUB\n";727728printf("- %s\n", __func__);729730init_prog(ctx, 0, 128, 0, src, NULL);731init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,73232768, 0, test_default_init);733init_compute_resources(ctx, (int []) { 0, -1 });734launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL);735check_tex(ctx, 0, test_private_expect, NULL);736destroy_compute_resources(ctx);737destroy_tex(ctx);738destroy_prog(ctx);739}740741/* test_local */742static void test_local_expect(void *p, int s, int x, int y)743{744*(uint32_t *)p = x & 0x20 ? 2 : 1;745}746747static void test_local(struct context *ctx)748{749const char *src = "COMP\n"750"DCL RES[0], BUFFER, RAW, WR\n"751"DCL SV[0], BLOCK_ID[0]\n"752"DCL SV[1], BLOCK_SIZE[0]\n"753"DCL SV[2], THREAD_ID[0]\n"754"DCL TEMP[0], LOCAL\n"755"DCL TEMP[1], LOCAL\n"756"DCL TEMP[2], LOCAL\n"757"IMM UINT32 { 1, 0, 0, 0 }\n"758"IMM UINT32 { 2, 0, 0, 0 }\n"759"IMM UINT32 { 4, 0, 0, 0 }\n"760"IMM UINT32 { 32, 0, 0, 0 }\n"761"IMM UINT32 { 128, 0, 0, 0 }\n"762"\n"763" BGNSUB\n"764" UMUL TEMP[0].x, SV[2], IMM[2]\n"765" STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n"766" MFENCE RLOCAL\n"767" USLT TEMP[1].x, SV[2], IMM[3]\n"768" IF TEMP[1]\n"769" UADD TEMP[1].x, TEMP[0], IMM[4]\n"770" BGNLOOP\n"771" LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"772" USEQ TEMP[2].x, TEMP[2], IMM[0]\n"773" IF TEMP[2]\n"774" BRK\n"775" ENDIF\n"776" ENDLOOP\n"777" STORE RLOCAL.x, TEMP[0], IMM[0]\n"778" MFENCE RLOCAL\n"779" BGNLOOP\n"780" LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"781" USEQ TEMP[2].x, TEMP[2], IMM[1]\n"782" IF TEMP[2]\n"783" BRK\n"784" ENDIF\n"785" ENDLOOP\n"786" ELSE\n"787" UADD TEMP[1].x, TEMP[0], -IMM[4]\n"788" BGNLOOP\n"789" LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"790" USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n"791" IF TEMP[2]\n"792" BRK\n"793" ENDIF\n"794" ENDLOOP\n"795" STORE RLOCAL.x, TEMP[0], IMM[0]\n"796" MFENCE RLOCAL\n"797" BGNLOOP\n"798" LOAD TEMP[2].x, RLOCAL, TEMP[1]\n"799" USEQ TEMP[2].x, TEMP[2], IMM[0]\n"800" IF TEMP[2]\n"801" BRK\n"802" ENDIF\n"803" ENDLOOP\n"804" STORE RLOCAL.x, TEMP[0], IMM[1]\n"805" MFENCE RLOCAL\n"806" ENDIF\n"807" UMUL TEMP[1].x, SV[0], SV[1]\n"808" UMUL TEMP[1].x, TEMP[1], IMM[2]\n"809" UADD TEMP[1].x, TEMP[1], TEMP[0]\n"810" LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"811" STORE RES[0].x, TEMP[1], TEMP[0]\n"812" RET\n"813" ENDSUB\n";814815printf("- %s\n", __func__);816817init_prog(ctx, 256, 0, 0, src, NULL);818init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,8194096, 0, test_default_init);820init_compute_resources(ctx, (int []) { 0, -1 });821launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);822check_tex(ctx, 0, test_local_expect, NULL);823destroy_compute_resources(ctx);824destroy_tex(ctx);825destroy_prog(ctx);826}827828/* test_sample */829static void test_sample_init(void *p, int s, int x, int y)830{831*(float *)p = s ? 1 : x * y;832}833834static void test_sample_expect(void *p, int s, int x, int y)835{836switch (x % 4) {837case 0:838*(float *)p = x / 4 * y;839break;840case 1:841case 2:842*(float *)p = 0;843break;844case 3:845*(float *)p = 1;846break;847}848}849850static void test_sample(struct context *ctx)851{852const char *src = "COMP\n"853"DCL SVIEW[0], 2D, FLOAT\n"854"DCL RES[0], 2D, RAW, WR\n"855"DCL SAMP[0]\n"856"DCL SV[0], BLOCK_ID[0]\n"857"DCL TEMP[0], LOCAL\n"858"DCL TEMP[1], LOCAL\n"859"IMM UINT32 { 16, 1, 0, 0 }\n"860"IMM FLT32 { 128, 32, 0, 0 }\n"861"\n"862" BGNSUB\n"863" I2F TEMP[1], SV[0]\n"864" DIV TEMP[1], TEMP[1], IMM[1]\n"865" SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n"866" UMUL TEMP[0], SV[0], IMM[0]\n"867" STORE RES[0].xyzw, TEMP[0], TEMP[1]\n"868" RET\n"869" ENDSUB\n";870871printf("- %s\n", __func__);872873init_prog(ctx, 0, 0, 0, src, NULL);874init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,875128, 32, test_sample_init);876init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,877512, 32, test_sample_init);878init_compute_resources(ctx, (int []) { 1, -1 });879init_sampler_views(ctx, (int []) { 0, -1 });880init_sampler_states(ctx, 2);881launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL);882check_tex(ctx, 1, test_sample_expect, NULL);883destroy_sampler_states(ctx);884destroy_sampler_views(ctx);885destroy_compute_resources(ctx);886destroy_tex(ctx);887destroy_prog(ctx);888}889890/* test_many_kern */891static void test_many_kern_expect(void *p, int s, int x, int y)892{893*(uint32_t *)p = x;894}895896static void test_many_kern(struct context *ctx)897{898const char *src = "COMP\n"899"DCL RES[0], BUFFER, RAW, WR\n"900"DCL TEMP[0], LOCAL\n"901"IMM UINT32 { 0, 1, 2, 3 }\n"902"IMM UINT32 { 4, 0, 0, 0 }\n"903"\n"904" BGNSUB\n"905" UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n"906" STORE RES[0].x, TEMP[0], IMM[0].xxxx\n"907" RET\n"908" ENDSUB\n"909" BGNSUB\n"910" UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n"911" STORE RES[0].x, TEMP[0], IMM[0].yyyy\n"912" RET\n"913" ENDSUB\n"914" BGNSUB\n"915" UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n"916" STORE RES[0].x, TEMP[0], IMM[0].zzzz\n"917" RET\n"918" ENDSUB\n"919" BGNSUB\n"920" UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n"921" STORE RES[0].x, TEMP[0], IMM[0].wwww\n"922" RET\n"923" ENDSUB\n";924925printf("- %s\n", __func__);926927init_prog(ctx, 0, 0, 0, src, NULL);928init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,92916, 0, test_default_init);930init_compute_resources(ctx, (int []) { 0, -1 });931launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL);932launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL);933launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL);934launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL);935check_tex(ctx, 0, test_many_kern_expect, NULL);936destroy_compute_resources(ctx);937destroy_tex(ctx);938destroy_prog(ctx);939}940941/* test_constant */942static void test_constant_init(void *p, int s, int x, int y)943{944*(float *)p = s ? 0xdeadbeef : 8.0 - (float)x;945}946947static void test_constant_expect(void *p, int s, int x, int y)948{949*(float *)p = 8.0 - (float)x;950}951952static void test_constant(struct context *ctx)953{954const char *src = "COMP\n"955"DCL RES[0], BUFFER, RAW\n"956"DCL RES[1], BUFFER, RAW, WR\n"957"DCL SV[0], BLOCK_ID[0]\n"958"DCL TEMP[0], LOCAL\n"959"DCL TEMP[1], LOCAL\n"960"IMM UINT32 { 4, 0, 0, 0 }\n"961"\n"962" BGNSUB\n"963" UMUL TEMP[0].x, SV[0], IMM[0]\n"964" LOAD TEMP[1].x, RES[0], TEMP[0]\n"965" STORE RES[1].x, TEMP[0], TEMP[1]\n"966" RET\n"967" ENDSUB\n";968969printf("- %s\n", __func__);970971init_prog(ctx, 0, 0, 0, src, NULL);972init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,973256, 0, test_constant_init);974init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,975256, 0, test_constant_init);976init_compute_resources(ctx, (int []) { 0, 1, -1 });977launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);978check_tex(ctx, 1, test_constant_expect, NULL);979destroy_compute_resources(ctx);980destroy_tex(ctx);981destroy_prog(ctx);982}983984/* test_resource_indirect */985static void test_resource_indirect_init(void *p, int s, int x, int y)986{987*(uint32_t *)p = s == 0 ? 0xdeadbeef :988s == 1 ? x % 2 :989s == 2 ? 2 * x :9902 * x + 1;991}992993static void test_resource_indirect_expect(void *p, int s, int x, int y)994{995*(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0);996}997998static void test_resource_indirect(struct context *ctx)999{1000const char *src = "COMP\n"1001"DCL RES[0], BUFFER, RAW, WR\n"1002"DCL RES[1..3], BUFFER, RAW\n"1003"DCL SV[0], BLOCK_ID[0]\n"1004"DCL TEMP[0], LOCAL\n"1005"DCL TEMP[1], LOCAL\n"1006"IMM UINT32 { 4, 0, 0, 0 }\n"1007"\n"1008" BGNSUB\n"1009" UMUL TEMP[0].x, SV[0], IMM[0]\n"1010" LOAD TEMP[1].x, RES[1], TEMP[0]\n"1011" LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n"1012" STORE RES[0].x, TEMP[0], TEMP[1]\n"1013" RET\n"1014" ENDSUB\n";10151016printf("- %s\n", __func__);10171018init_prog(ctx, 0, 0, 0, src, NULL);1019init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,1020256, 0, test_resource_indirect_init);1021init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,1022256, 0, test_resource_indirect_init);1023init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,1024256, 0, test_resource_indirect_init);1025init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT,1026256, 0, test_resource_indirect_init);1027init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 });1028launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL);1029check_tex(ctx, 0, test_resource_indirect_expect, NULL);1030destroy_compute_resources(ctx);1031destroy_tex(ctx);1032destroy_prog(ctx);1033}10341035/* test_surface_ld */1036enum pipe_format surface_fmts[] = {1037PIPE_FORMAT_B8G8R8A8_UNORM,1038PIPE_FORMAT_B8G8R8X8_UNORM,1039PIPE_FORMAT_A8R8G8B8_UNORM,1040PIPE_FORMAT_X8R8G8B8_UNORM,1041PIPE_FORMAT_X8R8G8B8_UNORM,1042PIPE_FORMAT_L8_UNORM,1043PIPE_FORMAT_A8_UNORM,1044PIPE_FORMAT_I8_UNORM,1045PIPE_FORMAT_L8A8_UNORM,1046PIPE_FORMAT_R32_FLOAT,1047PIPE_FORMAT_R32G32_FLOAT,1048PIPE_FORMAT_R32G32B32A32_FLOAT,1049PIPE_FORMAT_R32_UNORM,1050PIPE_FORMAT_R32G32_UNORM,1051PIPE_FORMAT_R32G32B32A32_UNORM,1052PIPE_FORMAT_R32_SNORM,1053PIPE_FORMAT_R32G32_SNORM,1054PIPE_FORMAT_R32G32B32A32_SNORM,1055PIPE_FORMAT_R8_UINT,1056PIPE_FORMAT_R8G8_UINT,1057PIPE_FORMAT_R8G8B8A8_UINT,1058PIPE_FORMAT_R8_SINT,1059PIPE_FORMAT_R8G8_SINT,1060PIPE_FORMAT_R8G8B8A8_SINT,1061PIPE_FORMAT_R32_UINT,1062PIPE_FORMAT_R32G32_UINT,1063PIPE_FORMAT_R32G32B32A32_UINT,1064PIPE_FORMAT_R32_SINT,1065PIPE_FORMAT_R32G32_SINT,1066PIPE_FORMAT_R32G32B32A32_SINT1067};10681069static void test_surface_ld_init0f(void *p, int s, int x, int y)1070{1071float v[] = { 1.0, -.75, .50, -.25 };1072int i = 0;10731074util_format_pack_rgba(surface_fmts[i], p, v, 1);1075}10761077static void test_surface_ld_init0i(void *p, int s, int x, int y)1078{1079int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };1080int i = 0;10811082util_format_pack_rgba(surface_fmts[i], p, v, 1);1083}10841085static void test_surface_ld_expectf(void *p, int s, int x, int y)1086{1087float v[4], w[4];1088int i = 0;10891090test_surface_ld_init0f(v, s, x / 4, y);1091util_format_unpack_rgba(surface_fmts[i], w, v, 1);1092*(float *)p = w[x % 4];1093}10941095static void test_surface_ld_expecti(void *p, int s, int x, int y)1096{1097int32_t v[4], w[4];1098int i = 0;10991100test_surface_ld_init0i(v, s, x / 4, y);1101util_format_unpack_rgba(surface_fmts[i], w, v, 1);1102*(uint32_t *)p = w[x % 4];1103}11041105static void test_surface_ld(struct context *ctx)1106{1107const char *src = "COMP\n"1108"DCL RES[0], 2D\n"1109"DCL RES[1], 2D, RAW, WR\n"1110"DCL SV[0], BLOCK_ID[0]\n"1111"DCL TEMP[0], LOCAL\n"1112"DCL TEMP[1], LOCAL\n"1113"IMM UINT32 { 16, 1, 0, 0 }\n"1114"\n"1115" BGNSUB\n"1116" LOAD TEMP[1], RES[0], SV[0]\n"1117" UMUL TEMP[0], SV[0], IMM[0]\n"1118" STORE RES[1].xyzw, TEMP[0], TEMP[1]\n"1119" RET\n"1120" ENDSUB\n";1121int i = 0;11221123printf("- %s\n", __func__);11241125init_prog(ctx, 0, 0, 0, src, NULL);11261127for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {1128bool is_int = util_format_is_pure_integer(surface_fmts[i]);11291130printf(" - %s\n", util_format_name(surface_fmts[i]));11311132if (!ctx->screen->is_format_supported(ctx->screen,1133surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,1134PIPE_BIND_COMPUTE_RESOURCE)) {1135printf("(unsupported)\n");1136continue;1137}11381139init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i],1140128, 32, (is_int ? test_surface_ld_init0i : test_surface_ld_init0f));1141init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,1142512, 32, test_default_init);1143init_compute_resources(ctx, (int []) { 0, 1, -1 });1144init_sampler_states(ctx, 2);1145launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,1146NULL);1147check_tex(ctx, 1, (is_int ? test_surface_ld_expecti : test_surface_ld_expectf), NULL);1148destroy_sampler_states(ctx);1149destroy_compute_resources(ctx);1150destroy_tex(ctx);1151}11521153destroy_prog(ctx);1154}11551156/* test_surface_st */1157static void test_surface_st_init0f(void *p, int s, int x, int y)1158{1159float v[] = { 1.0, -.75, 0.5, -.25 };1160*(float *)p = v[x % 4];1161}11621163static void test_surface_st_init0i(void *p, int s, int x, int y)1164{1165int v[] = { 0xffffffff, 0xffff, 0xff, 0xf };1166*(int32_t *)p = v[x % 4];1167}11681169static void test_surface_st_init1(void *p, int s, int x, int y)1170{1171int i = 0;1172memset(p, 1, util_format_get_blocksize(surface_fmts[i]));1173}11741175static void test_surface_st_expectf(void *p, int s, int x, int y)1176{1177float vf[4];1178int i = 0, j;11791180for (j = 0; j < 4; j++)1181test_surface_st_init0f(&vf[j], s, 4 * x + j, y);1182util_format_pack_rgba(surface_fmts[i], p, vf, 1);1183}11841185static void test_surface_st_expects(void *p, int s, int x, int y)1186{1187int32_t v[4];1188int i = 0, j;11891190for (j = 0; j < 4; j++)1191test_surface_st_init0i(&v[j], s, 4 * x + j, y);1192util_format_pack_rgba(surface_fmts[i], p, v, 1);1193}11941195static void test_surface_st_expectu(void *p, int s, int x, int y)1196{1197uint32_t v[4];1198int i = 0, j;11991200for (j = 0; j < 4; j++)1201test_surface_st_init0i(&v[j], s, 4 * x + j, y);1202util_format_pack_rgba(surface_fmts[i], p, v, 1);1203}12041205static bool test_surface_st_check(void *x, void *y, int sz)1206{1207int i = 0, j;12081209if (util_format_is_float(surface_fmts[i])) {1210return fabs(*(float *)x - *(float *)y) < 3.92156863e-3;12111212} else if ((sz % 4) == 0) {1213for (j = 0; j < sz / 4; j++)1214if (abs(((uint32_t *)x)[j] -1215((uint32_t *)y)[j]) > 1)1216return false;1217return true;1218} else {1219return !memcmp(x, y, sz);1220}1221}12221223static void test_surface_st(struct context *ctx)1224{1225const char *src = "COMP\n"1226"DCL RES[0], 2D, RAW\n"1227"DCL RES[1], 2D, WR\n"1228"DCL SV[0], BLOCK_ID[0]\n"1229"DCL TEMP[0], LOCAL\n"1230"DCL TEMP[1], LOCAL\n"1231"IMM UINT32 { 16, 1, 0, 0 }\n"1232"\n"1233" BGNSUB\n"1234" UMUL TEMP[0], SV[0], IMM[0]\n"1235" LOAD TEMP[1], RES[0], TEMP[0]\n"1236" STORE RES[1], SV[0], TEMP[1]\n"1237" RET\n"1238" ENDSUB\n";1239int i = 0;12401241printf("- %s\n", __func__);12421243init_prog(ctx, 0, 0, 0, src, NULL);12441245for (i = 0; i < ARRAY_SIZE(surface_fmts); i++) {1246bool is_signed = (util_format_description(surface_fmts[i])1247->channel[0].type == UTIL_FORMAT_TYPE_SIGNED);1248bool is_int = util_format_is_pure_integer(surface_fmts[i]);12491250printf(" - %s\n", util_format_name(surface_fmts[i]));12511252if (!ctx->screen->is_format_supported(ctx->screen,1253surface_fmts[i], PIPE_TEXTURE_2D, 1, 1,1254PIPE_BIND_COMPUTE_RESOURCE)) {1255printf("(unsupported)\n");1256continue;1257}12581259init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT,1260512, 32, (is_int ? test_surface_st_init0i : test_surface_st_init0f));1261init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i],1262128, 32, test_surface_st_init1);1263init_compute_resources(ctx, (int []) { 0, 1, -1 });1264init_sampler_states(ctx, 2);1265launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0,1266NULL);1267check_tex(ctx, 1, (is_int && is_signed ? test_surface_st_expects :1268is_int && !is_signed ? test_surface_st_expectu :1269test_surface_st_expectf), test_surface_st_check);1270destroy_sampler_states(ctx);1271destroy_compute_resources(ctx);1272destroy_tex(ctx);1273}12741275destroy_prog(ctx);1276}12771278/* test_barrier */1279static void test_barrier_expect(void *p, int s, int x, int y)1280{1281*(uint32_t *)p = 31;1282}12831284static void test_barrier(struct context *ctx)1285{1286const char *src = "COMP\n"1287"DCL RES[0], BUFFER, RAW, WR\n"1288"DCL SV[0], BLOCK_ID[0]\n"1289"DCL SV[1], BLOCK_SIZE[0]\n"1290"DCL SV[2], THREAD_ID[0]\n"1291"DCL TEMP[0], LOCAL\n"1292"DCL TEMP[1], LOCAL\n"1293"DCL TEMP[2], LOCAL\n"1294"DCL TEMP[3], LOCAL\n"1295"IMM UINT32 { 1, 0, 0, 0 }\n"1296"IMM UINT32 { 4, 0, 0, 0 }\n"1297"IMM UINT32 { 32, 0, 0, 0 }\n"1298"\n"1299" BGNSUB\n"1300" UMUL TEMP[0].x, SV[2], IMM[1]\n"1301" MOV TEMP[1].x, IMM[0].wwww\n"1302" BGNLOOP\n"1303" BARRIER\n"1304" STORE RLOCAL.x, TEMP[0], TEMP[1]\n"1305" BARRIER\n"1306" MOV TEMP[2].x, IMM[0].wwww\n"1307" BGNLOOP\n"1308" UMUL TEMP[3].x, TEMP[2], IMM[1]\n"1309" LOAD TEMP[3].x, RLOCAL, TEMP[3]\n"1310" USNE TEMP[3].x, TEMP[3], TEMP[1]\n"1311" IF TEMP[3]\n"1312" END\n"1313" ENDIF\n"1314" UADD TEMP[2].x, TEMP[2], IMM[0]\n"1315" USEQ TEMP[3].x, TEMP[2], SV[1]\n"1316" IF TEMP[3]\n"1317" BRK\n"1318" ENDIF\n"1319" ENDLOOP\n"1320" UADD TEMP[1].x, TEMP[1], IMM[0]\n"1321" USEQ TEMP[2].x, TEMP[1], IMM[2]\n"1322" IF TEMP[2]\n"1323" BRK\n"1324" ENDIF\n"1325" ENDLOOP\n"1326" UMUL TEMP[1].x, SV[0], SV[1]\n"1327" UMUL TEMP[1].x, TEMP[1], IMM[1]\n"1328" UADD TEMP[1].x, TEMP[1], TEMP[0]\n"1329" LOAD TEMP[0].x, RLOCAL, TEMP[0]\n"1330" STORE RES[0].x, TEMP[1], TEMP[0]\n"1331" RET\n"1332" ENDSUB\n";13331334printf("- %s\n", __func__);13351336init_prog(ctx, 256, 0, 0, src, NULL);1337init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,13384096, 0, test_default_init);1339init_compute_resources(ctx, (int []) { 0, -1 });1340launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);1341check_tex(ctx, 0, test_barrier_expect, NULL);1342destroy_compute_resources(ctx);1343destroy_tex(ctx);1344destroy_prog(ctx);1345}13461347/* test_atom_ops */1348static void test_atom_ops_init(void *p, int s, int x, int y)1349{1350*(uint32_t *)p = 0xbad;1351}13521353static void test_atom_ops_expect(void *p, int s, int x, int y)1354{1355switch (x) {1356case 0:1357*(uint32_t *)p = 0xce6c8eef;1358break;1359case 1:1360*(uint32_t *)p = 0xdeadbeef;1361break;1362case 2:1363*(uint32_t *)p = 0x11111111;1364break;1365case 3:1366*(uint32_t *)p = 0x10011001;1367break;1368case 4:1369*(uint32_t *)p = 0xdfbdbfff;1370break;1371case 5:1372*(uint32_t *)p = 0x11111111;1373break;1374case 6:1375*(uint32_t *)p = 0x11111111;1376break;1377case 7:1378*(uint32_t *)p = 0xdeadbeef;1379break;1380case 8:1381*(uint32_t *)p = 0xdeadbeef;1382break;1383case 9:1384*(uint32_t *)p = 0x11111111;1385break;1386}1387}13881389static void test_atom_ops(struct context *ctx, bool global)1390{1391const char *src = "COMP\n"1392"#ifdef TARGET_GLOBAL\n"1393"#define target RES[0]\n"1394"#else\n"1395"#define target RLOCAL\n"1396"#endif\n"1397""1398"DCL RES[0], BUFFER, RAW, WR\n"1399"#define threadid SV[0]\n"1400"DCL threadid, THREAD_ID[0]\n"1401""1402"#define offset TEMP[0]\n"1403"DCL offset, LOCAL\n"1404"#define tmp TEMP[1]\n"1405"DCL tmp, LOCAL\n"1406""1407"#define k0 IMM[0]\n"1408"IMM UINT32 { 0, 0, 0, 0 }\n"1409"#define k1 IMM[1]\n"1410"IMM UINT32 { 1, 0, 0, 0 }\n"1411"#define k2 IMM[2]\n"1412"IMM UINT32 { 2, 0, 0, 0 }\n"1413"#define k3 IMM[3]\n"1414"IMM UINT32 { 3, 0, 0, 0 }\n"1415"#define k4 IMM[4]\n"1416"IMM UINT32 { 4, 0, 0, 0 }\n"1417"#define k5 IMM[5]\n"1418"IMM UINT32 { 5, 0, 0, 0 }\n"1419"#define k6 IMM[6]\n"1420"IMM UINT32 { 6, 0, 0, 0 }\n"1421"#define k7 IMM[7]\n"1422"IMM UINT32 { 7, 0, 0, 0 }\n"1423"#define k8 IMM[8]\n"1424"IMM UINT32 { 8, 0, 0, 0 }\n"1425"#define k9 IMM[9]\n"1426"IMM UINT32 { 9, 0, 0, 0 }\n"1427"#define korig IMM[10].xxxx\n"1428"#define karg IMM[10].yyyy\n"1429"IMM UINT32 { 3735928559, 286331153, 0, 0 }\n"1430"\n"1431" BGNSUB\n"1432" UMUL offset.x, threadid, k4\n"1433" STORE target.x, offset, korig\n"1434" USEQ tmp.x, threadid, k0\n"1435" IF tmp\n"1436" ATOMUADD tmp.x, target, offset, karg\n"1437" ATOMUADD tmp.x, target, offset, tmp\n"1438" ENDIF\n"1439" USEQ tmp.x, threadid, k1\n"1440" IF tmp\n"1441" ATOMXCHG tmp.x, target, offset, karg\n"1442" ATOMXCHG tmp.x, target, offset, tmp\n"1443" ENDIF\n"1444" USEQ tmp.x, threadid, k2\n"1445" IF tmp\n"1446" ATOMCAS tmp.x, target, offset, korig, karg\n"1447" ATOMCAS tmp.x, target, offset, tmp, k0\n"1448" ENDIF\n"1449" USEQ tmp.x, threadid, k3\n"1450" IF tmp\n"1451" ATOMAND tmp.x, target, offset, karg\n"1452" ATOMAND tmp.x, target, offset, tmp\n"1453" ENDIF\n"1454" USEQ tmp.x, threadid, k4\n"1455" IF tmp\n"1456" ATOMOR tmp.x, target, offset, karg\n"1457" ATOMOR tmp.x, target, offset, tmp\n"1458" ENDIF\n"1459" USEQ tmp.x, threadid, k5\n"1460" IF tmp\n"1461" ATOMXOR tmp.x, target, offset, karg\n"1462" ATOMXOR tmp.x, target, offset, tmp\n"1463" ENDIF\n"1464" USEQ tmp.x, threadid, k6\n"1465" IF tmp\n"1466" ATOMUMIN tmp.x, target, offset, karg\n"1467" ATOMUMIN tmp.x, target, offset, tmp\n"1468" ENDIF\n"1469" USEQ tmp.x, threadid, k7\n"1470" IF tmp\n"1471" ATOMUMAX tmp.x, target, offset, karg\n"1472" ATOMUMAX tmp.x, target, offset, tmp\n"1473" ENDIF\n"1474" USEQ tmp.x, threadid, k8\n"1475" IF tmp\n"1476" ATOMIMIN tmp.x, target, offset, karg\n"1477" ATOMIMIN tmp.x, target, offset, tmp\n"1478" ENDIF\n"1479" USEQ tmp.x, threadid, k9\n"1480" IF tmp\n"1481" ATOMIMAX tmp.x, target, offset, karg\n"1482" ATOMIMAX tmp.x, target, offset, tmp\n"1483" ENDIF\n"1484"#ifdef TARGET_LOCAL\n"1485" LOAD tmp.x, RLOCAL, offset\n"1486" STORE RES[0].x, offset, tmp\n"1487"#endif\n"1488" RET\n"1489" ENDSUB\n";14901491printf("- %s (%s)\n", __func__, global ? "global" : "local");14921493init_prog(ctx, 40, 0, 0, src,1494(global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));1495init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,149640, 0, test_atom_ops_init);1497init_compute_resources(ctx, (int []) { 0, -1 });1498launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL);1499check_tex(ctx, 0, test_atom_ops_expect, NULL);1500destroy_compute_resources(ctx);1501destroy_tex(ctx);1502destroy_prog(ctx);1503}15041505/* test_atom_race */1506static void test_atom_race_expect(void *p, int s, int x, int y)1507{1508*(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff;1509}15101511static void test_atom_race(struct context *ctx, bool global)1512{1513const char *src = "COMP\n"1514"#ifdef TARGET_GLOBAL\n"1515"#define target RES[0]\n"1516"#else\n"1517"#define target RLOCAL\n"1518"#endif\n"1519""1520"DCL RES[0], BUFFER, RAW, WR\n"1521""1522"#define blockid SV[0]\n"1523"DCL blockid, BLOCK_ID[0]\n"1524"#define blocksz SV[1]\n"1525"DCL blocksz, BLOCK_SIZE[0]\n"1526"#define threadid SV[2]\n"1527"DCL threadid, THREAD_ID[0]\n"1528""1529"#define offset TEMP[0]\n"1530"DCL offset, LOCAL\n"1531"#define arg TEMP[1]\n"1532"DCL arg, LOCAL\n"1533"#define count TEMP[2]\n"1534"DCL count, LOCAL\n"1535"#define vlocal TEMP[3]\n"1536"DCL vlocal, LOCAL\n"1537"#define vshared TEMP[4]\n"1538"DCL vshared, LOCAL\n"1539"#define last TEMP[5]\n"1540"DCL last, LOCAL\n"1541"#define tmp0 TEMP[6]\n"1542"DCL tmp0, LOCAL\n"1543"#define tmp1 TEMP[7]\n"1544"DCL tmp1, LOCAL\n"1545""1546"#define k0 IMM[0]\n"1547"IMM UINT32 { 0, 0, 0, 0 }\n"1548"#define k1 IMM[1]\n"1549"IMM UINT32 { 1, 0, 0, 0 }\n"1550"#define k4 IMM[2]\n"1551"IMM UINT32 { 4, 0, 0, 0 }\n"1552"#define k32 IMM[3]\n"1553"IMM UINT32 { 32, 0, 0, 0 }\n"1554"#define k128 IMM[4]\n"1555"IMM UINT32 { 128, 0, 0, 0 }\n"1556"#define kdeadcafe IMM[5]\n"1557"IMM UINT32 { 3735931646, 0, 0, 0 }\n"1558"#define kallowed_set IMM[6]\n"1559"IMM UINT32 { 559035650, 0, 0, 0 }\n"1560"#define k11111111 IMM[7]\n"1561"IMM UINT32 { 286331153, 0, 0, 0 }\n"1562"\n"1563" BGNSUB\n"1564" MOV offset.x, threadid\n"1565"#ifdef TARGET_GLOBAL\n"1566" UMUL tmp0.x, blockid, blocksz\n"1567" UADD offset.x, offset, tmp0\n"1568"#endif\n"1569" UMUL offset.x, offset, k4\n"1570" USLT tmp0.x, threadid, k32\n"1571" STORE target.x, offset, k0\n"1572" BARRIER\n"1573" IF tmp0\n"1574" MOV vlocal.x, k0\n"1575" MOV arg.x, kdeadcafe\n"1576" BGNLOOP\n"1577" INEG arg.x, arg\n"1578" ATOMUADD vshared.x, target, offset, arg\n"1579" SFENCE target\n"1580" USNE tmp0.x, vshared, vlocal\n"1581" IF tmp0\n"1582" BRK\n"1583" ENDIF\n"1584" UADD vlocal.x, vlocal, arg\n"1585" ENDLOOP\n"1586" UADD vlocal.x, vshared, arg\n"1587" LOAD vshared.x, target, offset\n"1588" USEQ tmp0.x, vshared, vlocal\n"1589" STORE target.x, offset, tmp0\n"1590" ELSE\n"1591" UADD offset.x, offset, -k128\n"1592" MOV count.x, k0\n"1593" MOV last.x, k0\n"1594" BGNLOOP\n"1595" LOAD vshared.x, target, offset\n"1596" USEQ tmp0.x, vshared, kallowed_set.xxxx\n"1597" USEQ tmp1.x, vshared, kallowed_set.yyyy\n"1598" OR tmp0.x, tmp0, tmp1\n"1599" IF tmp0\n"1600" USEQ tmp0.x, vshared, last\n"1601" IF tmp0\n"1602" CONT\n"1603" ENDIF\n"1604" MOV last.x, vshared\n"1605" ELSE\n"1606" END\n"1607" ENDIF\n"1608" UADD count.x, count, k1\n"1609" USEQ tmp0.x, count, k128\n"1610" IF tmp0\n"1611" BRK\n"1612" ENDIF\n"1613" ENDLOOP\n"1614" ATOMXCHG tmp0.x, target, offset, k11111111\n"1615" UADD offset.x, offset, k128\n"1616" ATOMXCHG tmp0.x, target, offset, k11111111\n"1617" SFENCE target\n"1618" ENDIF\n"1619"#ifdef TARGET_LOCAL\n"1620" LOAD tmp0.x, RLOCAL, offset\n"1621" UMUL tmp1.x, blockid, blocksz\n"1622" UMUL tmp1.x, tmp1, k4\n"1623" UADD offset.x, offset, tmp1\n"1624" STORE RES[0].x, offset, tmp0\n"1625"#endif\n"1626" RET\n"1627" ENDSUB\n";16281629printf("- %s (%s)\n", __func__, global ? "global" : "local");16301631init_prog(ctx, 256, 0, 0, src,1632(global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL"));1633init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT,16344096, 0, test_default_init);1635init_compute_resources(ctx, (int []) { 0, -1 });1636launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL);1637check_tex(ctx, 0, test_atom_race_expect, NULL);1638destroy_compute_resources(ctx);1639destroy_tex(ctx);1640destroy_prog(ctx);1641}16421643int main(int argc, char *argv[])1644{1645struct context *ctx = CALLOC_STRUCT(context);16461647unsigned tests = (argc > 1) ? strtoul(argv[1], NULL, 0) : ~0;16481649init_ctx(ctx);16501651if (tests & (1 << 0))1652test_system_values(ctx);1653if (tests & (1 << 1))1654test_resource_access(ctx);1655if (tests & (1 << 2))1656test_function_calls(ctx);1657if (tests & (1 << 3))1658test_input_global(ctx);1659if (tests & (1 << 4))1660test_private(ctx);1661if (tests & (1 << 5))1662test_local(ctx);1663if (tests & (1 << 6))1664test_sample(ctx);1665if (tests & (1 << 7))1666test_many_kern(ctx);1667if (tests & (1 << 8))1668test_constant(ctx);1669if (tests & (1 << 9))1670test_resource_indirect(ctx);1671if (tests & (1 << 10))1672test_surface_ld(ctx);1673if (tests & (1 << 11))1674test_surface_st(ctx);1675if (tests & (1 << 12))1676test_barrier(ctx);1677if (tests & (1 << 13))1678test_atom_ops(ctx, true);1679if (tests & (1 << 14))1680test_atom_race(ctx, true);1681if (tests & (1 << 15))1682test_atom_ops(ctx, false);1683if (tests & (1 << 16))1684test_atom_race(ctx, false);16851686destroy_ctx(ctx);16871688return 0;1689}169016911692