Path: blob/21.2-virgl/src/microsoft/clc/clc_compiler_test.cpp
4560 views
/*1* Copyright © Microsoft Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <stdio.h>24#include <stdint.h>25#include <stdexcept>26#include <vector>2728#include <directx/d3d12.h>29#include <dxgi1_4.h>30#include <gtest/gtest.h>31#include <wrl.h>3233#include "compute_test.h"3435using std::vector;3637TEST_F(ComputeTest, runtime_memcpy)38{39struct shift { uint8_t val; uint8_t shift; uint16_t ret; };40const char *kernel_source =41"struct shift { uchar val; uchar shift; ushort ret; };\n\42__kernel void main_test(__global struct shift *inout)\n\43{\n\44uint id = get_global_id(0);\n\45uint id2 = id + get_global_id(1);\n\46struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\47lc[id] = inout[id];\n\48inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\49}\n";5051auto inout = ShaderArg<struct shift>({52{ 0x10, 1, 0xffff },53{ 0x20, 2, 0xffff },54{ 0x30, 3, 0xffff },55{ 0x40, 4, 0xffff },56},57SHADER_ARG_INOUT);58const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };59run_shader(kernel_source, inout.size(), 1, 1, inout);60for (int i = 0; i < inout.size(); ++i)61EXPECT_EQ(inout[i].ret, expected[i]);62}6364TEST_F(ComputeTest, two_global_arrays)65{66const char *kernel_source =67"__kernel void main_test(__global uint *g1, __global uint *g2)\n\68{\n\69uint idx = get_global_id(0);\n\70g1[idx] -= g2[idx];\n\71}\n";72auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);73auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);74const uint32_t expected[] = {759, 18, 27, 3676};7778run_shader(kernel_source, g1.size(), 1, 1, g1, g2);79for (int i = 0; i < g1.size(); ++i)80EXPECT_EQ(g1[i], expected[i]);81}8283/* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */84TEST_F(ComputeTest, DISABLED_i64tof32)85{86const char *kernel_source =87"__kernel void main_test(__global long *out, __constant long *in)\n\88{\n\89__local float tmp[12];\n\90uint idx = get_global_id(0);\n\91tmp[idx] = in[idx];\n\92barrier(CLK_LOCAL_MEM_FENCE);\n\93out[idx] = tmp[idx + get_global_id(1)];\n\94}\n";95auto in = ShaderArg<int64_t>({ 0x100000000LL,96-0x100000000LL,970x7fffffffffffffffLL,980x4000004000000000LL,990x4000003fffffffffLL,1000x4000004000000001LL,101-1,102-0x4000004000000000LL,103-0x4000003fffffffffLL,104-0x4000004000000001LL,1050,106INT64_MIN },107SHADER_ARG_INPUT);108auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);109const int64_t expected[] = {1100x100000000LL,111-0x100000000LL,1120x7fffffffffffffffLL,1130x4000000000000000LL,1140x4000000000000000LL,1150x4000008000000000LL,116-1,117-0x4000000000000000LL,118-0x4000000000000000LL,119-0x4000008000000000LL,1200,121INT64_MIN,122};123124run_shader(kernel_source, out.size(), 1, 1, out, in);125for (int i = 0; i < out.size(); ++i) {126EXPECT_EQ((int64_t)out[i], expected[i]);127}128}129TEST_F(ComputeTest, two_constant_arrays)130{131const char *kernel_source =132"__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\133{\n\134uint idx = get_global_id(0);\n\135g1[idx] -= c1[idx] + c2[idx];\n\136}\n";137auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);138auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);139auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);140const uint32_t expected[] = {1414, 13, 22, 31142};143144run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);145for (int i = 0; i < g1.size(); ++i)146EXPECT_EQ(g1[i], expected[i]);147}148149TEST_F(ComputeTest, null_constant_ptr)150{151const char *kernel_source =152"__kernel void main_test(__global uint *g1, __constant uint *c1)\n\153{\n\154__constant uint fallback[] = {2, 3, 4, 5};\n\155__constant uint *c = c1 ? c1 : fallback;\n\156uint idx = get_global_id(0);\n\157g1[idx] -= c[idx];\n\158}\n";159auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);160auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);161const uint32_t expected1[] = {1629, 18, 27, 36163};164165run_shader(kernel_source, g1.size(), 1, 1, g1, c1);166for (int i = 0; i < g1.size(); ++i)167EXPECT_EQ(g1[i], expected1[i]);168169const uint32_t expected2[] = {1708, 17, 26, 35171};172173g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);174auto c2 = NullShaderArg();175run_shader(kernel_source, g1.size(), 1, 1, g1, c2);176for (int i = 0; i < g1.size(); ++i)177EXPECT_EQ(g1[i], expected2[i]);178}179180/* This test seems to fail on older versions of WARP. */181TEST_F(ComputeTest, DISABLED_null_global_ptr)182{183const char *kernel_source =184"__kernel void main_test(__global uint *g1, __global uint *g2)\n\185{\n\186__constant uint fallback[] = {2, 3, 4, 5};\n\187uint idx = get_global_id(0);\n\188g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\189}\n";190auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);191auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);192const uint32_t expected1[] = {1939, 18, 27, 36194};195196run_shader(kernel_source, g1.size(), 1, 1, g1, g2);197for (int i = 0; i < g1.size(); ++i)198EXPECT_EQ(g1[i], expected1[i]);199200const uint32_t expected2[] = {2018, 17, 26, 35202};203204g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);205auto g2null = NullShaderArg();206run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);207for (int i = 0; i < g1.size(); ++i)208EXPECT_EQ(g1[i], expected2[i]);209}210211TEST_F(ComputeTest, ret_constant_ptr)212{213struct s { uint64_t ptr; uint32_t val; };214const char *kernel_source =215"struct s { __constant uint *ptr; uint val; };\n\216__kernel void main_test(__global struct s *out, __constant uint *in)\n\217{\n\218__constant uint foo[] = { 1, 2 };\n\219uint idx = get_global_id(0);\n\220if (idx == 0)\n\221out[idx].ptr = foo;\n\222else\n\223out[idx].ptr = in;\n\224out[idx].val = out[idx].ptr[idx];\n\225}\n";226auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);227auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);228const uint32_t expected_val[] = {2291, 4230};231const uint64_t expected_ptr[] = {2322ull << 32, 1ull << 32233};234235run_shader(kernel_source, out.size(), 1, 1, out, in);236for (int i = 0; i < out.size(); ++i) {237EXPECT_EQ(out[i].val, expected_val[i]);238EXPECT_EQ(out[i].ptr, expected_ptr[i]);239}240}241242TEST_F(ComputeTest, ret_global_ptr)243{244struct s { uint64_t ptr; uint32_t val; };245const char *kernel_source =246"struct s { __global uint *ptr; uint val; };\n\247__kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\248{\n\249uint idx = get_global_id(0);\n\250out[idx].ptr = idx ? in2 : in1;\n\251out[idx].val = out[idx].ptr[idx];\n\252}\n";253auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);254auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);255auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);256const uint32_t expected_val[] = {2571, 4258};259const uint64_t expected_ptr[] = {2601ull << 32, 2ull << 32261};262263run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);264for (int i = 0; i < out.size(); ++i) {265EXPECT_EQ(out[i].val, expected_val[i]);266EXPECT_EQ(out[i].ptr, expected_ptr[i]);267}268}269270TEST_F(ComputeTest, ret_local_ptr)271{272struct s { uint64_t ptr; };273const char *kernel_source =274"struct s { __local uint *ptr; };\n\275__kernel void main_test(__global struct s *out)\n\276{\n\277__local uint tmp[2];\n\278uint idx = get_global_id(0);\n\279tmp[idx] = idx;\n\280out[idx].ptr = &tmp[idx];\n\281}\n";282auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);283const uint64_t expected_ptr[] = {2840, 4,285};286287run_shader(kernel_source, out.size(), 1, 1, out);288for (int i = 0; i < out.size(); ++i) {289EXPECT_EQ(out[i].ptr, expected_ptr[i]);290}291}292293TEST_F(ComputeTest, ret_private_ptr)294{295struct s { uint64_t ptr; uint32_t value; };296const char *kernel_source =297"struct s { __private uint *ptr; uint value; };\n\298__kernel void main_test(__global struct s *out)\n\299{\n\300uint tmp[2] = {1, 2};\n\301uint idx = get_global_id(0);\n\302out[idx].ptr = &tmp[idx];\n\303out[idx].value = *out[idx].ptr;\n\304}\n";305auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);306const uint64_t expected_ptr[] = {3070, 4,308};309const uint32_t expected_value[] = {3101, 2311};312313run_shader(kernel_source, out.size(), 1, 1, out);314for (int i = 0; i < out.size(); ++i) {315EXPECT_EQ(out[i].ptr, expected_ptr[i]);316}317}318319TEST_F(ComputeTest, globals_8bit)320{321const char *kernel_source =322"__kernel void main_test(__global unsigned char *inout)\n\323{\n\324uint idx = get_global_id(0);\n\325inout[idx] = inout[idx] + 1;\n\326}\n";327auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);328const uint8_t expected[] = {329101, 111, 121, 131330};331run_shader(kernel_source, inout.size(), 1, 1, inout);332for (int i = 0; i < inout.size(); ++i)333EXPECT_EQ(inout[i], expected[i]);334}335336TEST_F(ComputeTest, globals_16bit)337{338const char *kernel_source =339"__kernel void main_test(__global unsigned short *inout)\n\340{\n\341uint idx = get_global_id(0);\n\342inout[idx] = inout[idx] + 1;\n\343}\n";344auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);345const uint16_t expected[] = {34610001, 10011, 10021, 10031347};348run_shader(kernel_source, inout.size(), 1, 1, inout);349for (int i = 0; i < inout.size(); ++i)350EXPECT_EQ(inout[i], expected[i]);351}352353TEST_F(ComputeTest, DISABLED_globals_64bit)354{355/* Test disabled, because we need a fixed version of WARP that hasn't356been officially shipped yet */357358const char *kernel_source =359"__kernel void main_test(__global unsigned long *inout)\n\360{\n\361uint idx = get_global_id(0);\n\362inout[idx] = inout[idx] + 1;\n\363}\n";364uint64_t base = 1ull << 50;365auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },366SHADER_ARG_INOUT);367const uint64_t expected[] = {368base + 1, base + 11, base + 21, base + 31369};370run_shader(kernel_source, inout.size(), 1, 1, inout);371for (int i = 0; i < inout.size(); ++i)372EXPECT_EQ(inout[i], expected[i]);373}374375TEST_F(ComputeTest, built_ins_global_id)376{377const char *kernel_source =378"__kernel void main_test(__global uint *output)\n\379{\n\380output[get_global_id(0)] = get_global_id(0);\n\381}\n";382auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),383SHADER_ARG_OUTPUT);384const uint32_t expected[] = {3850, 1, 2, 3386};387388run_shader(kernel_source, output.size(), 1, 1, output);389for (int i = 0; i < output.size(); ++i)390EXPECT_EQ(output[i], expected[i]);391}392393TEST_F(ComputeTest, built_ins_global_id_rmw)394{395const char *kernel_source =396"__kernel void main_test(__global uint *output)\n\397{\n\398uint id = get_global_id(0);\n\399output[id] = output[id] * (id + 1);\n\400}\n";401auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},402SHADER_ARG_INOUT);403const uint32_t expected[] = {4040x00000001, 0x20000002, 0x00060006, 0x1004080c405};406run_shader(kernel_source, inout.size(), 1, 1, inout);407for (int i = 0; i < inout.size(); ++i)408EXPECT_EQ(inout[i], expected[i]);409}410411TEST_F(ComputeTest, types_float_basics)412{413const char *kernel_source =414"__kernel void main_test(__global uint *output)\n\415{\n\416output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\417}\n";418auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),419SHADER_ARG_OUTPUT);420const uint32_t expected[] = {4211, 2, 3, 4422};423run_shader(kernel_source, output.size(), 1, 1, output);424for (int i = 0; i < output.size(); ++i)425EXPECT_EQ(output[i], expected[i]);426}427428TEST_F(ComputeTest, DISABLED_types_double_basics)429{430const char *kernel_source =431"__kernel void main_test(__global uint *output)\n\432{\n\433output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\434}\n";435auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),436SHADER_ARG_OUTPUT);437const uint32_t expected[] = {4381, 2, 3, 4439};440run_shader(kernel_source, output.size(), 1, 1, output);441for (int i = 0; i < output.size(); ++i)442EXPECT_EQ(output[i], expected[i]);443}444445TEST_F(ComputeTest, types_short_basics)446{447const char *kernel_source =448"__kernel void main_test(__global uint *output)\n\449{\n\450output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\451}\n";452auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),453SHADER_ARG_OUTPUT);454const uint32_t expected[] = {4551, 2, 3, 4456};457run_shader(kernel_source, output.size(), 1, 1, output);458for (int i = 0; i < output.size(); ++i)459EXPECT_EQ(output[i], expected[i]);460}461462TEST_F(ComputeTest, types_char_basics)463{464const char *kernel_source =465"__kernel void main_test(__global uint *output)\n\466{\n\467output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\468}\n";469auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),470SHADER_ARG_OUTPUT);471const uint32_t expected[] = {4721, 2, 3, 4473};474run_shader(kernel_source, output.size(), 1, 1, output);475for (int i = 0; i < output.size(); ++i)476EXPECT_EQ(output[i], expected[i]);477}478479TEST_F(ComputeTest, types_if_statement)480{481const char *kernel_source =482"__kernel void main_test(__global uint *output)\n\483{\n\484int idx = get_global_id(0);\n\485if (idx > 0)\n\486output[idx] = ~idx;\n\487else\n\488output[0] = 0xff;\n\489}\n";490auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),491SHADER_ARG_OUTPUT);492const uint32_t expected[] = {4930xff, ~1u, ~2u, ~3u494};495run_shader(kernel_source, output.size(), 1, 1, output);496for (int i = 0; i < output.size(); ++i)497EXPECT_EQ(output[i], expected[i]);498}499500TEST_F(ComputeTest, types_do_while_loop)501{502const char *kernel_source =503"__kernel void main_test(__global uint *output)\n\504{\n\505int value = 1;\n\506int i = 1, n = get_global_id(0);\n\507do {\n\508value *= i++;\n\509} while (i <= n);\n\510output[n] = value;\n\511}\n";512auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),513SHADER_ARG_OUTPUT);514const uint32_t expected[] = {5151, 1, 1*2, 1*2*3, 1*2*3*4516};517run_shader(kernel_source, output.size(), 1, 1, output);518for (int i = 0; i < output.size(); ++i)519EXPECT_EQ(output[i], expected[i]);520}521522TEST_F(ComputeTest, types_for_loop)523{524const char *kernel_source =525"__kernel void main_test(__global uint *output)\n\526{\n\527int value = 1;\n\528int n = get_global_id(0);\n\529for (int i = 1; i <= n; ++i)\n\530value *= i;\n\531output[n] = value;\n\532}\n";533auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),534SHADER_ARG_OUTPUT);535const uint32_t expected[] = {5361, 1, 1*2, 1*2*3, 1*2*3*4537};538run_shader(kernel_source, output.size(), 1, 1, output);539for (int i = 0; i < output.size(); ++i)540EXPECT_EQ(output[i], expected[i]);541}542543TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)544{545const char *kernel_source =546"__kernel void main_test(__global ulong *inout)\n\547{\n\548ushort tmp[] = {\n\549get_global_id(1) + 0x00000000,\n\550get_global_id(1) + 0x10000001,\n\551get_global_id(1) + 0x20000020,\n\552get_global_id(1) + 0x30000300,\n\553};\n\554uint idx = get_global_id(0);\n\555inout[idx] = tmp[idx];\n\556}\n";557auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);558const uint16_t expected[] = {5590x00000000, 0x10000001, 0x20000020, 0x30000300,560};561run_shader(kernel_source, inout.size(), 1, 1, inout);562for (int i = 0; i < inout.size(); ++i)563EXPECT_EQ(inout[i], expected[i]);564}565566TEST_F(ComputeTest, complex_types_local_array_short)567{568const char *kernel_source =569"__kernel void main_test(__global ushort *inout)\n\570{\n\571ushort tmp[] = {\n\572get_global_id(1) + 0x00,\n\573get_global_id(1) + 0x10,\n\574get_global_id(1) + 0x20,\n\575get_global_id(1) + 0x30,\n\576};\n\577uint idx = get_global_id(0);\n\578inout[idx] = tmp[idx];\n\579}\n";580auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);581const uint16_t expected[] = {5820x00, 0x10, 0x20, 0x30,583};584run_shader(kernel_source, inout.size(), 1, 1, inout);585for (int i = 0; i < inout.size(); ++i)586EXPECT_EQ(inout[i], expected[i]);587}588589TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)590{591const char *kernel_source =592"struct has_vecs { uchar c; ushort s; float2 f; };\n\593__kernel void main_test(__global uint *inout)\n\594{\n\595struct has_vecs tmp[] = {\n\596{ 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\597{ 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\598{ 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\599{ 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\600};\n\601uint idx = get_global_id(0);\n\602uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\603inout[idx] = mul + trunc(tmp[idx].f[1]);\n\604}\n";605auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);606const uint16_t expected[] = { 101, 404, 909, 1616 };607run_shader(kernel_source, inout.size(), 1, 1, inout);608for (int i = 0; i < inout.size(); ++i)609EXPECT_EQ(inout[i], expected[i]);610}611612TEST_F(ComputeTest, complex_types_local_array)613{614const char *kernel_source =615"__kernel void main_test(__global uint *inout)\n\616{\n\617uint tmp[] = {\n\618get_global_id(1) + 0x00,\n\619get_global_id(1) + 0x10,\n\620get_global_id(1) + 0x20,\n\621get_global_id(1) + 0x30,\n\622};\n\623uint idx = get_global_id(0);\n\624inout[idx] = tmp[idx];\n\625}\n";626auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);627const uint32_t expected[] = {6280x00, 0x10, 0x20, 0x30,629};630run_shader(kernel_source, inout.size(), 1, 1, inout);631for (int i = 0; i < inout.size(); ++i)632EXPECT_EQ(inout[i], expected[i]);633}634635TEST_F(ComputeTest, complex_types_global_struct_array)636{637struct two_vals { uint32_t add; uint32_t mul; };638const char *kernel_source =639"struct two_vals { uint add; uint mul; };\n\640__kernel void main_test(__global struct two_vals *in_out)\n\641{\n\642uint id = get_global_id(0);\n\643in_out[id].add = in_out[id].add + id;\n\644in_out[id].mul = in_out[id].mul * id;\n\645}\n";646auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },647SHADER_ARG_INOUT);648const struct two_vals expected[] = {649{ 8 + 0, 8 * 0 },650{ 16 + 1, 16 * 1 },651{ 64 + 2, 64 * 2 },652{ 65536 + 3, 65536 * 3 }653};654run_shader(kernel_source, inout.size(), 1, 1, inout);655for (int i = 0; i < inout.size(); ++i) {656EXPECT_EQ(inout[i].add, expected[i].add);657EXPECT_EQ(inout[i].mul, expected[i].mul);658}659}660661TEST_F(ComputeTest, complex_types_global_uint2)662{663struct uint2 { uint32_t x; uint32_t y; };664const char *kernel_source =665"__kernel void main_test(__global uint2 *inout)\n\666{\n\667uint id = get_global_id(0);\n\668inout[id].x = inout[id].x + id;\n\669inout[id].y = inout[id].y * id;\n\670}\n";671auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },672SHADER_ARG_INOUT);673const struct uint2 expected[] = {674{ 8 + 0, 8 * 0 },675{ 16 + 1, 16 * 1 },676{ 64 + 2, 64 * 2 },677{ 65536 + 3, 65536 * 3 }678};679run_shader(kernel_source, inout.size(), 1, 1, inout);680for (int i = 0; i < inout.size(); ++i) {681EXPECT_EQ(inout[i].x, expected[i].x);682EXPECT_EQ(inout[i].y, expected[i].y);683}684}685686TEST_F(ComputeTest, complex_types_global_ushort2)687{688struct ushort2 { uint16_t x; uint16_t y; };689const char *kernel_source =690"__kernel void main_test(__global ushort2 *inout)\n\691{\n\692uint id = get_global_id(0);\n\693inout[id].x = inout[id].x + id;\n\694inout[id].y = inout[id].y * id;\n\695}\n";696auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },697{ (uint16_t)65536, (uint16_t)65536 } },698SHADER_ARG_INOUT);699const struct ushort2 expected[] = {700{ 8 + 0, 8 * 0 },701{ 16 + 1, 16 * 1 },702{ 64 + 2, 64 * 2 },703{ (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }704};705run_shader(kernel_source, inout.size(), 1, 1, inout);706for (int i = 0; i < inout.size(); ++i) {707EXPECT_EQ(inout[i].x, expected[i].x);708EXPECT_EQ(inout[i].y, expected[i].y);709}710}711712TEST_F(ComputeTest, complex_types_global_uchar3)713{714struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };715const char *kernel_source =716"__kernel void main_test(__global uchar3 *inout)\n\717{\n\718uint id = get_global_id(0);\n\719inout[id].x = inout[id].x + id;\n\720inout[id].y = inout[id].y * id;\n\721inout[id].z = inout[id].y + inout[id].x;\n\722}\n";723auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },724SHADER_ARG_INOUT);725const struct uchar3 expected[] = {726{ 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },727{ 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },728{ 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },729{ (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }730};731run_shader(kernel_source, inout.size(), 1, 1, inout);732for (int i = 0; i < inout.size(); ++i) {733EXPECT_EQ(inout[i].x, expected[i].x);734EXPECT_EQ(inout[i].y, expected[i].y);735EXPECT_EQ(inout[i].z, expected[i].z);736}737}738739TEST_F(ComputeTest, complex_types_constant_uchar3)740{741struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };742const char *kernel_source =743"__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\744{\n\745uint id = get_global_id(0);\n\746out[id].x = in[id].x + id;\n\747out[id].y = in[id].y * id;\n\748out[id].z = out[id].y + out[id].x;\n\749}\n";750auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },751SHADER_ARG_INPUT);752auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),753SHADER_ARG_OUTPUT);754const struct uchar3 expected[] = {755{ 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },756{ 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },757{ 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },758{ (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }759};760run_shader(kernel_source, out.size(), 1, 1, out, in);761for (int i = 0; i < out.size(); ++i) {762EXPECT_EQ(out[i].x, expected[i].x);763EXPECT_EQ(out[i].y, expected[i].y);764EXPECT_EQ(out[i].z, expected[i].z);765}766}767768TEST_F(ComputeTest, complex_types_global_uint8)769{770struct uint8 {771uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;772uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;773};774const char *kernel_source =775"__kernel void main_test(__global uint8 *inout)\n\776{\n\777uint id = get_global_id(0);\n\778inout[id].s01234567 = inout[id].s01234567 * 2;\n\779}\n";780auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },781SHADER_ARG_INOUT);782const struct uint8 expected[] = {783{ 2, 4, 6, 8, 10, 12, 14, 16 }784};785run_shader(kernel_source, inout.size(), 1, 1, inout);786for (int i = 0; i < inout.size(); ++i) {787EXPECT_EQ(inout[i].s0, expected[i].s0);788EXPECT_EQ(inout[i].s1, expected[i].s1);789EXPECT_EQ(inout[i].s2, expected[i].s2);790EXPECT_EQ(inout[i].s3, expected[i].s3);791EXPECT_EQ(inout[i].s4, expected[i].s4);792EXPECT_EQ(inout[i].s5, expected[i].s5);793EXPECT_EQ(inout[i].s6, expected[i].s6);794EXPECT_EQ(inout[i].s7, expected[i].s7);795}796}797798TEST_F(ComputeTest, complex_types_local_ulong16)799{800struct ulong16 {801uint64_t values[16];802};803const char *kernel_source =804R"(__kernel void main_test(__global ulong16 *inout)805{806__local ulong16 local_array[2];807uint id = get_global_id(0);808local_array[id] = inout[id];809barrier(CLK_LOCAL_MEM_FENCE);810inout[id] = local_array[0] * 2;811})";812auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },813SHADER_ARG_INOUT);814const struct ulong16 expected[] = {815{ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }816};817run_shader(kernel_source, inout.size(), 1, 1, inout);818for (int i = 0; i < inout.size(); ++i) {819for (int j = 0; j < 16; ++j) {820EXPECT_EQ(inout[i].values[j], expected[i].values[j]);821}822}823}824825TEST_F(ComputeTest, complex_types_constant_uint8)826{827struct uint8 {828uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;829uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;830};831const char *kernel_source =832"__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\833{\n\834uint id = get_global_id(0);\n\835out[id].s01234567 = in[id].s01234567 * 2;\n\836}\n";837auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },838SHADER_ARG_INPUT);839auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },840SHADER_ARG_INOUT);841const struct uint8 expected[] = {842{ 2, 4, 6, 8, 10, 12, 14, 16 }843};844run_shader(kernel_source, out.size(), 1, 1, out, in);845for (int i = 0; i < out.size(); ++i) {846EXPECT_EQ(out[i].s0, expected[i].s0);847EXPECT_EQ(out[i].s1, expected[i].s1);848EXPECT_EQ(out[i].s2, expected[i].s2);849EXPECT_EQ(out[i].s3, expected[i].s3);850EXPECT_EQ(out[i].s4, expected[i].s4);851EXPECT_EQ(out[i].s5, expected[i].s5);852EXPECT_EQ(out[i].s6, expected[i].s6);853EXPECT_EQ(out[i].s7, expected[i].s7);854}855}856857TEST_F(ComputeTest, DISABLED_complex_types_const_array)858{859/* DISABLED because current release versions of WARP either return860* rubbish from reads or crash: they are not prepared to handle861* non-float global constants */862const char *kernel_source =863"__kernel void main_test(__global uint *output)\n\864{\n\865const uint foo[] = { 100, 101, 102, 103 };\n\866output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\867}\n";868auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),869SHADER_ARG_OUTPUT);870const uint32_t expected[] = {871100, 101, 102, 103872};873run_shader(kernel_source, output.size(), 1, 1, output);874for (int i = 0; i < output.size(); ++i)875EXPECT_EQ(output[i], expected[i]);876}877878TEST_F(ComputeTest, mem_access_load_store_ordering)879{880const char *kernel_source =881"__kernel void main_test(__global uint *output)\n\882{\n\883uint foo[4];\n\884foo[0] = 0x11111111;\n\885foo[1] = 0x22222222;\n\886foo[2] = 0x44444444;\n\887foo[3] = 0x88888888;\n\888foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\889foo[0] += get_global_id(0); // foo[0] = tid\n\890foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\891output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\892}\n";893auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),894SHADER_ARG_OUTPUT);895const uint16_t expected[] = {8960, 1, 2, 3897};898run_shader(kernel_source, output.size(), 1, 1, output);899for (int i = 0; i < output.size(); ++i)900EXPECT_EQ(output[i], expected[i]);901}902903TEST_F(ComputeTest, DISABLED_two_const_arrays)904{905/* DISABLED because current release versions of WARP either return906* rubbish from reads or crash: they are not prepared to handle907* non-float global constants */908const char *kernel_source =909"__kernel void main_test(__global uint *output)\n\910{\n\911uint id = get_global_id(0);\n\912uint foo[4] = {100, 101, 102, 103};\n\913uint bar[4] = {1, 2, 3, 4};\n\914output[id] = foo[id] * bar[id];\n\915}\n";916auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),917SHADER_ARG_OUTPUT);918const uint32_t expected[] = {919100, 202, 306, 412920};921run_shader(kernel_source, output.size(), 1, 1, output);922for (int i = 0; i < output.size(); ++i)923EXPECT_EQ(output[i], expected[i]);924}925926TEST_F(ComputeTest, imod_pos)927{928const char *kernel_source =929"__kernel void main_test(__global int *inout)\n\930{\n\931inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\932}\n";933auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },934SHADER_ARG_INOUT);935const int32_t expected[] = {936-1, 0, -2, -1, 0, 1, 2, 0, 1937};938run_shader(kernel_source, inout.size(), 1, 1, inout);939for (int i = 0; i < inout.size(); ++i)940EXPECT_EQ(inout[i], expected[i]);941}942943TEST_F(ComputeTest, imod_neg)944{945const char *kernel_source =946"__kernel void main_test(__global int *inout)\n\947{\n\948inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\949}\n";950auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },951SHADER_ARG_INOUT);952const int32_t expected[] = {953-1, 0, -2, -1, 0, 1, 2, 0, 1954};955run_shader(kernel_source, inout.size(), 1, 1, inout);956for (int i = 0; i < inout.size(); ++i)957EXPECT_EQ(inout[i], expected[i]);958}959960TEST_F(ComputeTest, umod)961{962const char *kernel_source =963"__kernel void main_test(__global uint *inout)\n\964{\n\965inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\966}\n";967auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },968SHADER_ARG_INOUT);969const uint32_t expected[] = {9700xfffffffa, 0xfffffffb, 0, 1, 2971};972run_shader(kernel_source, inout.size(), 1, 1, inout);973for (int i = 0; i < inout.size(); ++i)974EXPECT_EQ(inout[i], expected[i]);975}976977TEST_F(ComputeTest, rotate)978{979const char *kernel_source =980"__kernel void main_test(__global uint *inout)\n\981{\n\982inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\983}\n";984auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),985SHADER_ARG_INOUT);986const uint32_t expected[] = {9870xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea988};989run_shader(kernel_source, inout.size(), 1, 1, inout);990for (int i = 0; i < inout.size(); ++i)991EXPECT_EQ(inout[i], expected[i]);992}993994TEST_F(ComputeTest, popcount)995{996const char *kernel_source =997"__kernel void main_test(__global uint *inout)\n\998{\n\999inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\1000}\n";1001auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },1002SHADER_ARG_INOUT);1003const uint32_t expected[] = {10040, 1, 2, 2, 4, 321005};1006run_shader(kernel_source, inout.size(), 1, 1, inout);1007for (int i = 0; i < inout.size(); ++i)1008EXPECT_EQ(inout[i], expected[i]);1009}10101011TEST_F(ComputeTest, hadd)1012{1013const char *kernel_source =1014"__kernel void main_test(__global uint *inout)\n\1015{\n\1016inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\1017}\n";1018auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },1019SHADER_ARG_INOUT);1020const uint32_t expected[] = {1021(1u << 31) >> 1,1022((1u << 31) + 1) >> 1,1023((1u << 31) + 2) >> 1,1024((1u << 31) + 3) >> 1,1025((1ull << 31) + 0xfffffffc) >> 1,1026((1ull << 31) + 0xfffffffd) >> 1,1027((1ull << 31) + 0xfffffffe) >> 1,1028((1ull << 31) + 0xffffffff) >> 1,1029};1030run_shader(kernel_source, inout.size(), 1, 1, inout);1031for (int i = 0; i < inout.size(); ++i)1032EXPECT_EQ(inout[i], expected[i]);1033}10341035TEST_F(ComputeTest, rhadd)1036{1037const char *kernel_source =1038"__kernel void main_test(__global uint *inout)\n\1039{\n\1040inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\1041}\n";1042auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },1043SHADER_ARG_INOUT);1044const uint32_t expected[] = {1045((1u << 31) + 1) >> 1,1046((1u << 31) + 2) >> 1,1047((1u << 31) + 3) >> 1,1048((1u << 31) + 4) >> 1,1049((1ull << 31) + 0xfffffffd) >> 1,1050((1ull << 31) + 0xfffffffe) >> 1,1051((1ull << 31) + 0xffffffff) >> 1,1052((1ull << 31) + (1ull << 32)) >> 1,1053};1054run_shader(kernel_source, inout.size(), 1, 1, inout);1055for (int i = 0; i < inout.size(); ++i)1056EXPECT_EQ(inout[i], expected[i]);1057}10581059TEST_F(ComputeTest, add_sat)1060{1061const char *kernel_source =1062"__kernel void main_test(__global uint *inout)\n\1063{\n\1064inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\1065}\n";1066auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },1067SHADER_ARG_INOUT);1068const uint32_t expected[] = {10690xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff1070};1071run_shader(kernel_source, inout.size(), 1, 1, inout);1072for (int i = 0; i < inout.size(); ++i)1073EXPECT_EQ(inout[i], expected[i]);1074}10751076TEST_F(ComputeTest, sub_sat)1077{1078const char *kernel_source =1079"__kernel void main_test(__global uint *inout)\n\1080{\n\1081inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\1082}\n";1083auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);1084const uint32_t expected[] = {10850, 0, 0, 11086};1087run_shader(kernel_source, inout.size(), 1, 1, inout);1088for (int i = 0; i < inout.size(); ++i)1089EXPECT_EQ(inout[i], expected[i]);1090}10911092TEST_F(ComputeTest, mul_hi)1093{1094const char *kernel_source =1095"__kernel void main_test(__global uint *inout)\n\1096{\n\1097inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\1098}\n";1099auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);1100const uint32_t expected[] = {11010, 0, 1, 1, (1u << 30)1102};1103run_shader(kernel_source, inout.size(), 1, 1, inout);1104for (int i = 0; i < inout.size(); ++i)1105EXPECT_EQ(inout[i], expected[i]);1106}11071108TEST_F(ComputeTest, ldexp_x)1109{1110const char *kernel_source =1111"__kernel void main_test(__global float *inout)\n\1112{\n\1113inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\1114}\n";1115auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);1116const float expected[] = {1117ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)1118};1119run_shader(kernel_source, inout.size(), 1, 1, inout);1120for (int i = 0; i < inout.size(); ++i)1121EXPECT_FLOAT_EQ(inout[i], expected[i]);1122}11231124TEST_F(ComputeTest, ldexp_y)1125{1126const char *kernel_source =1127"__kernel void main_test(__global float *inout)\n\1128{\n\1129inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\1130}\n";1131auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);1132const float expected[] = {1133ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)1134};1135run_shader(kernel_source, inout.size(), 1, 1, inout);1136for (int i = 0; i < inout.size(); ++i)1137EXPECT_FLOAT_EQ(inout[i], expected[i]);1138}11391140TEST_F(ComputeTest, frexp_ret)1141{1142const char *kernel_source =1143"__kernel void main_test(__global float *inout)\n\1144{\n\1145int exp;\n\1146inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\1147}\n";1148auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);1149const float expected[] = {11500.0f, 0.5f, 0.5f, 0.75f1151};1152run_shader(kernel_source, inout.size(), 1, 1, inout);1153for (int i = 0; i < inout.size(); ++i)1154EXPECT_FLOAT_EQ(inout[i], expected[i]);1155}11561157TEST_F(ComputeTest, frexp_exp)1158{1159const char *kernel_source =1160"__kernel void main_test(__global float *inout)\n\1161{\n\1162int exp;\n\1163frexp(inout[get_global_id(0)], &exp);\n\1164inout[get_global_id(0)] = (float)exp;\n\1165}\n";1166auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);1167const float expected[] = {11680.0f, 0.0f, 1.0f, 2.0f1169};1170run_shader(kernel_source, inout.size(), 1, 1, inout);1171for (int i = 0; i < inout.size(); ++i)1172EXPECT_FLOAT_EQ(inout[i], expected[i]);1173}11741175TEST_F(ComputeTest, clz)1176{1177const char *kernel_source =1178"__kernel void main_test(__global uint *inout)\n\1179{\n\1180inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\1181}\n";1182auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);1183const uint32_t expected[] = {118432, 31, 16, 1, 01185};1186run_shader(kernel_source, inout.size(), 1, 1, inout);1187for (int i = 0; i < inout.size(); ++i)1188EXPECT_FLOAT_EQ(inout[i], expected[i]);1189}11901191TEST_F(ComputeTest, sin)1192{1193struct sin_vals { float in; float clc; float native; };1194const char *kernel_source =1195"struct sin_vals { float in; float clc; float native; };\n\1196__kernel void main_test(__global struct sin_vals *inout)\n\1197{\n\1198inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\1199inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\1200}\n";1201const vector<sin_vals> input = {1202{ 0.0f, 0.0f, 0.0f },1203{ 1.0f, 0.0f, 0.0f },1204{ 2.0f, 0.0f, 0.0f },1205{ 3.0f, 0.0f, 0.0f },1206};1207auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);1208const struct sin_vals expected[] = {1209{ 0.0f, 0.0f, 0.0f },1210{ 1.0f, sin(1.0f), sin(1.0f) },1211{ 2.0f, sin(2.0f), sin(2.0f) },1212{ 3.0f, sin(3.0f), sin(3.0f) },1213};1214run_shader(kernel_source, inout.size(), 1, 1, inout);1215for (int i = 0; i < inout.size(); ++i) {1216EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);1217EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);1218EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec1219}1220}12211222TEST_F(ComputeTest, DISABLED_cosh)1223{1224/* Disabled because of WARP failures, where we fetch incorrect results when1225* sourcing from non-float ICBs */1226const char *kernel_source =1227"__kernel void main_test(__global float *inout)\n\1228{\n\1229inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\1230}\n";1231auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1232const float expected[] = {1233cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)1234};1235run_shader(kernel_source, inout.size(), 1, 1, inout);1236for (int i = 0; i < inout.size(); ++i)1237EXPECT_FLOAT_EQ(inout[i], expected[i]);1238}12391240TEST_F(ComputeTest, exp)1241{1242const char *kernel_source =1243"__kernel void main_test(__global float *inout)\n\1244{\n\1245inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\1246}\n";1247auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1248const float expected[] = {1249exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)1250};1251run_shader(kernel_source, inout.size(), 1, 1, inout);1252for (int i = 0; i < inout.size(); ++i)1253EXPECT_FLOAT_EQ(inout[i], expected[i]);1254}12551256TEST_F(ComputeTest, exp10)1257{1258const char *kernel_source =1259"__kernel void main_test(__global float *inout)\n\1260{\n\1261inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\1262}\n";1263auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1264const float expected[] = {1265pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)1266};1267run_shader(kernel_source, inout.size(), 1, 1, inout);1268for (int i = 0; i < inout.size(); ++i)1269EXPECT_FLOAT_EQ(inout[i], expected[i]);1270}12711272TEST_F(ComputeTest, exp2)1273{1274const char *kernel_source =1275"__kernel void main_test(__global float *inout)\n\1276{\n\1277inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\1278}\n";1279auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1280const float expected[] = {1281pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)1282};1283run_shader(kernel_source, inout.size(), 1, 1, inout);1284for (int i = 0; i < inout.size(); ++i)1285EXPECT_FLOAT_EQ(inout[i], expected[i]);1286}12871288TEST_F(ComputeTest, log)1289{1290const char *kernel_source =1291"__kernel void main_test(__global float *inout)\n\1292{\n\1293inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\1294}\n";1295auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1296const float expected[] = {1297log(0.0f), log(1.0f), log(2.0f), log(3.0f)1298};1299run_shader(kernel_source, inout.size(), 1, 1, inout);1300for (int i = 0; i < inout.size(); ++i)1301EXPECT_FLOAT_EQ(inout[i], expected[i]);1302}13031304TEST_F(ComputeTest, log10)1305{1306const char *kernel_source =1307"__kernel void main_test(__global float *inout)\n\1308{\n\1309inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\1310}\n";1311auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1312const float expected[] = {1313log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)1314};1315run_shader(kernel_source, inout.size(), 1, 1, inout);1316for (int i = 0; i < inout.size(); ++i)1317EXPECT_FLOAT_EQ(inout[i], expected[i]);1318}13191320TEST_F(ComputeTest, log2)1321{1322const char *kernel_source =1323"__kernel void main_test(__global float *inout)\n\1324{\n\1325inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\1326}\n";1327auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);1328const float expected[] = {1329log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)1330};1331run_shader(kernel_source, inout.size(), 1, 1, inout);1332for (int i = 0; i < inout.size(); ++i)1333EXPECT_FLOAT_EQ(inout[i], expected[i]);1334}13351336TEST_F(ComputeTest, rint)1337{1338const char *kernel_source =1339"__kernel void main_test(__global float *inout)\n\1340{\n\1341inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\1342}\n";13431344auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);1345const float expected[] = {13460.0f, 2.0f, 0.0f, -2.0f, 1.0f,1347};1348run_shader(kernel_source, inout.size(), 1, 1, inout);1349for (int i = 0; i < inout.size(); ++i)1350EXPECT_FLOAT_EQ(inout[i], expected[i]);1351}13521353TEST_F(ComputeTest, round)1354{1355const char *kernel_source =1356"__kernel void main_test(__global float *inout)\n\1357{\n\1358inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\1359}\n";1360auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },1361SHADER_ARG_INOUT);1362const float expected[] = {13630.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f1364};1365run_shader(kernel_source, inout.size(), 1, 1, inout);1366for (int i = 0; i < inout.size(); ++i)1367EXPECT_FLOAT_EQ(inout[i], expected[i]);1368}13691370TEST_F(ComputeTest, arg_by_val)1371{1372const char *kernel_source =1373"__kernel void main_test(__global float *inout, float mul)\n\1374{\n\1375inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\1376}\n";1377auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },1378SHADER_ARG_INOUT);1379auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);1380const float expected[] = {13810.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f1382};1383run_shader(kernel_source, inout.size(), 1, 1, inout, mul);1384for (int i = 0; i < inout.size(); ++i)1385EXPECT_FLOAT_EQ(inout[i], expected[i]);1386}13871388TEST_F(ComputeTest, uint8_by_val)1389{1390struct uint8 {1391uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;1392uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;1393};1394const char *kernel_source =1395"__kernel void main_test(__global uint *out, uint8 val)\n\1396{\n\1397out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\1398val.s4 + val.s5 + val.s6 + val.s7;\n\1399}\n";1400auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);1401auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);1402const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };1403run_shader(kernel_source, out.size(), 1, 1, out, val);1404for (int i = 0; i < out.size(); ++i)1405EXPECT_EQ(out[i], expected[i]);1406}14071408TEST_F(ComputeTest, link)1409{1410const char *foo_src =1411"float foo(float in)\n\1412{\n\1413return in * in;\n\1414}\n";1415const char *kernel_source =1416"float foo(float in);\n\1417__kernel void main_test(__global float *inout)\n\1418{\n\1419inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\1420}\n";1421std::vector<const char *> srcs = { foo_src, kernel_source };1422auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);1423const float expected[] = {14244.0f,1425};1426run_shader(srcs, inout.size(), 1, 1, inout);1427for (int i = 0; i < inout.size(); ++i)1428EXPECT_EQ(inout[i], expected[i]);1429}14301431TEST_F(ComputeTest, link_library)1432{1433const char *bar_src =1434"float bar(float in)\n\1435{\n\1436return in * 5;\n\1437}\n";1438const char *foo_src =1439"float bar(float in);\n\1440float foo(float in)\n\1441{\n\1442return in * bar(in);\n\1443}\n";1444const char *kernel_source =1445"float foo(float in);\n\1446__kernel void main_test(__global float *inout)\n\1447{\n\1448inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\1449}\n";1450std::vector<Shader> libraries = {1451compile({ bar_src, kernel_source }, {}, true),1452compile({ foo_src }, {}, true)1453};1454Shader exe = link(libraries);1455auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);1456const float expected[] = {145720.0f,1458};1459run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);1460for (int i = 0; i < inout.size(); ++i)1461EXPECT_EQ(inout[i], expected[i]);1462}14631464TEST_F(ComputeTest, localvar)1465{1466const char *kernel_source =1467"__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\1468void main_test(__global float *inout)\n\1469{\n\1470__local float2 tmp[2];\n\1471tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\1472tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\1473barrier(CLK_LOCAL_MEM_FENCE);\n\1474inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\1475}\n";14761477auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);1478const float expected[] = {14799.0f, 5.0f1480};1481run_shader(kernel_source, inout.size(), 1, 1, inout);1482for (int i = 0; i < inout.size(); ++i)1483EXPECT_EQ(inout[i], expected[i]);1484}14851486TEST_F(ComputeTest, localvar_uchar2)1487{1488const char *kernel_source =1489"__attribute__((reqd_work_group_size(2, 1, 1)))\n\1490__kernel void main_test(__global uchar *inout)\n\1491{\n\1492__local uchar2 tmp[2];\n\1493tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\1494tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\1495barrier(CLK_LOCAL_MEM_FENCE);\n\1496inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\1497}\n";14981499auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);1500const uint8_t expected[] = { 9, 5 };1501run_shader(kernel_source, inout.size(), 1, 1, inout);1502for (int i = 0; i < inout.size(); ++i)1503EXPECT_EQ(inout[i], expected[i]);1504}15051506TEST_F(ComputeTest, work_group_size_hint)1507{1508const char *kernel_source =1509"__attribute__((work_group_size_hint(2, 1, 1)))\n\1510__kernel void main_test(__global uint *output)\n\1511{\n\1512output[get_global_id(0)] = get_local_id(0);\n\1513}\n";1514auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),1515SHADER_ARG_OUTPUT);1516const uint32_t expected[] = {15170, 1, 2, 31518};1519run_shader(kernel_source, output.size(), 1, 1, output);1520for (int i = 0; i < output.size(); ++i)1521EXPECT_EQ(output[i], expected[i]);1522}15231524TEST_F(ComputeTest, reqd_work_group_size)1525{1526const char *kernel_source =1527"__attribute__((reqd_work_group_size(2, 1, 1)))\n\1528__kernel void main_test(__global uint *output)\n\1529{\n\1530output[get_global_id(0)] = get_local_id(0);\n\1531}\n";1532auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),1533SHADER_ARG_OUTPUT);1534const uint32_t expected[] = {15350, 1, 0, 11536};1537run_shader(kernel_source, output.size(), 1, 1, output);1538for (int i = 0; i < output.size(); ++i)1539EXPECT_EQ(output[i], expected[i]);1540}15411542TEST_F(ComputeTest, image)1543{1544const char* kernel_source =1545"__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\1546{\n\1547int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\1548write_imagef(output, coords, read_imagef(input, coords));\n\1549}\n";1550Shader shader = compile(std::vector<const char*>({ kernel_source }));1551validate(shader);1552}15531554TEST_F(ComputeTest, image_two_reads)1555{1556const char* kernel_source =1557"__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\1558{\n\1559if (is_float)\n\1560output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\1561else \n\1562output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\1563}\n";1564Shader shader = compile(std::vector<const char*>({ kernel_source }));1565validate(shader);1566}15671568TEST_F(ComputeTest, sampler)1569{1570const char* kernel_source =1571"__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\1572{\n\1573output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\1574}\n";1575Shader shader = compile(std::vector<const char*>({ kernel_source }));1576validate(shader);1577}15781579TEST_F(ComputeTest, image_dims)1580{1581const char* kernel_source =1582"__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\1583{\n\1584output[get_global_id(0)] = get_image_width(roimage);\n\1585output[get_global_id(0) + 1] = get_image_width(woimage);\n\1586}\n";1587Shader shader = compile(std::vector<const char*>({ kernel_source }));1588validate(shader);1589}15901591TEST_F(ComputeTest, image_format)1592{1593const char* kernel_source =1594"__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\1595{\n\1596output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\1597output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\1598}\n";1599Shader shader = compile(std::vector<const char*>({ kernel_source }));1600validate(shader);1601}16021603TEST_F(ComputeTest, image1d_buffer_t)1604{1605const char* kernel_source =1606"__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\1607{\n\1608write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\1609}\n";1610Shader shader = compile(std::vector<const char*>({ kernel_source }));1611validate(shader);1612}16131614TEST_F(ComputeTest, local_ptr)1615{1616struct uint2 { uint32_t x, y; };1617const char *kernel_source =1618"__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\1619{\n\1620tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\1621tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\1622barrier(CLK_LOCAL_MEM_FENCE);\n\1623inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\1624}\n";1625auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);1626auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);1627const uint8_t expected[] = { 9, 5 };1628run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);1629for (int i = 0; i < inout.size(); ++i)1630EXPECT_EQ(inout[i], expected[i]);1631}16321633TEST_F(ComputeTest, two_local_ptrs)1634{1635struct uint2 { uint32_t x, y; };1636const char *kernel_source =1637"__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\1638{\n\1639tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\1640tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\1641tmp2[get_local_id(0)] = get_global_id(0);\n\1642barrier(CLK_LOCAL_MEM_FENCE);\n\1643inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\1644}\n";1645auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);1646auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);1647auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);1648const uint8_t expected[] = { 9, 6 };1649run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);1650for (int i = 0; i < inout.size(); ++i)1651EXPECT_EQ(inout[i], expected[i]);1652}16531654TEST_F(ComputeTest, int8_to_float)1655{1656const char *kernel_source =1657"__kernel void main_test(__global char* in, __global float* out)\n\1658{\n\1659uint pos = get_global_id(0);\n\1660out[pos] = in[pos] / 100.0f;\n\1661}";1662auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);1663auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);1664const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };1665run_shader(kernel_source, in.size(), 1, 1, in, out);1666for (int i = 0; i < in.size(); ++i)1667EXPECT_FLOAT_EQ(out[i], expected[i]);1668}16691670TEST_F(ComputeTest, vec_hint_float4)1671{1672const char *kernel_source =1673"__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\1674{\n\1675inout[get_global_id(0)] *= inout[get_global_id(1)];\n\1676}";1677Shader shader = compile({ kernel_source });1678EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 4);1679EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);1680}16811682TEST_F(ComputeTest, vec_hint_uchar2)1683{1684const char *kernel_source =1685"__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\1686{\n\1687inout[get_global_id(0)] *= inout[get_global_id(1)];\n\1688}";1689Shader shader = compile({ kernel_source });1690EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 2);1691EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);1692}16931694TEST_F(ComputeTest, vec_hint_none)1695{1696const char *kernel_source =1697"__kernel void main_test(__global float *inout)\n\1698{\n\1699inout[get_global_id(0)] *= inout[get_global_id(1)];\n\1700}";1701Shader shader = compile({ kernel_source });1702EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 0);1703}17041705TEST_F(ComputeTest, DISABLED_debug_layer_failure)1706{1707const char *kernel_source =1708"__kernel void main_test(__global float *inout, float mul)\n\1709{\n\1710inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\1711}\n";1712auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },1713SHADER_ARG_INOUT);1714auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);1715const float expected[] = {17160.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f1717};1718ComPtr<ID3D12InfoQueue> info_queue;1719dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());1720if (!info_queue) {1721GTEST_SKIP() << "No info queue";1722return;1723}17241725info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");1726run_shader(kernel_source, inout.size(), 1, 1, inout, mul);1727for (int i = 0; i < inout.size(); ++i)1728EXPECT_FLOAT_EQ(inout[i], expected[i]);1729}17301731TEST_F(ComputeTest, compiler_defines)1732{1733const char *kernel_source =1734"__kernel void main_test(__global int* out)\n\1735{\n\1736out[0] = OUT_VAL0;\n\1737out[1] = __OPENCL_C_VERSION__;\n\1738}";1739auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);1740CompileArgs compile_args = { 1, 1, 1 };1741compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };1742std::vector<RawShaderArg *> raw_args = { &out };1743run_shader({ kernel_source }, compile_args, out);1744EXPECT_EQ(out[0], 5);1745EXPECT_EQ(out[1], 100);1746}17471748/* There's a bug in WARP turning atomic_add(ptr, x) into1749* atomic_add(ptr, x * 4). Works fine on intel HW.1750*/1751TEST_F(ComputeTest, DISABLED_global_atomic_add)1752{1753const char *kernel_source =1754"__kernel void main_test(__global int *inout, __global int *old)\n\1755{\n\1756old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\1757}\n";1758auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);1759auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);1760const int32_t expected_inout[] = { 5, 7 };1761const int32_t expected_old[] = { 2, 4 };1762run_shader(kernel_source, inout.size(), 1, 1, inout, old);1763for (int i = 0; i < inout.size(); ++i) {1764EXPECT_EQ(inout[i], expected_inout[i]);1765EXPECT_EQ(old[i], expected_old[i]);1766}1767}17681769TEST_F(ComputeTest, global_atomic_imin)1770{1771const char *kernel_source =1772"__kernel void main_test(__global int *inout, __global int *old)\n\1773{\n\1774old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\1775}\n";1776auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);1777auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);1778const int32_t expected_inout[] = { 0, 1, -1 };1779const int32_t expected_old[] = { 0, 2, -1 };1780run_shader(kernel_source, inout.size(), 1, 1, inout, old);1781for (int i = 0; i < inout.size(); ++i) {1782EXPECT_EQ(inout[i], expected_inout[i]);1783EXPECT_EQ(old[i], expected_old[i]);1784}1785}17861787TEST_F(ComputeTest, global_atomic_and_or)1788{1789const char *kernel_source =1790"__attribute__((reqd_work_group_size(3, 1, 1)))\n\1791__kernel void main_test(__global int *inout)\n\1792{\n\1793atomic_and(inout, ~(1 << get_global_id(0)));\n\1794atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\1795}\n";1796auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);1797const int32_t expected[] = { 0x78 };1798run_shader(kernel_source, 3, 1, 1, inout);1799for (int i = 0; i < inout.size(); ++i)1800EXPECT_EQ(inout[i], expected[i]);1801}18021803TEST_F(ComputeTest, global_atomic_cmpxchg)1804{1805const char *kernel_source =1806"__attribute__((reqd_work_group_size(2, 1, 1)))\n\1807__kernel void main_test(__global int *inout)\n\1808{\n\1809while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\1810;\n\1811}\n";1812auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);1813const int32_t expected_inout[] = { 2 };1814run_shader(kernel_source, 2, 1, 1, inout);1815for (int i = 0; i < inout.size(); ++i)1816EXPECT_EQ(inout[i], expected_inout[i]);1817}18181819TEST_F(ComputeTest, local_atomic_and_or)1820{1821const char *kernel_source =1822"__attribute__((reqd_work_group_size(2, 1, 1)))\n\1823__kernel void main_test(__global ushort *inout)\n\1824{\n\1825__local ushort tmp;\n\1826atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\1827atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\1828barrier(CLK_LOCAL_MEM_FENCE);\n\1829inout[get_global_id(0)] = tmp;\n\1830}\n";1831auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);1832const uint16_t expected[] = { 0x402, 0x402 };1833run_shader(kernel_source, inout.size(), 1, 1, inout);1834for (int i = 0; i < inout.size(); ++i)1835EXPECT_EQ(inout[i], expected[i]);1836}18371838TEST_F(ComputeTest, local_atomic_cmpxchg)1839{1840const char *kernel_source =1841"__attribute__((reqd_work_group_size(2, 1, 1)))\n\1842__kernel void main_test(__global int *out)\n\1843{\n\1844__local uint tmp;\n\1845tmp = 0;\n\1846barrier(CLK_LOCAL_MEM_FENCE);\n\1847while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\1848;\n\1849barrier(CLK_LOCAL_MEM_FENCE);\n\1850out[0] = tmp;\n\1851}\n";18521853auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);1854const uint16_t expected[] = { 2 };1855run_shader(kernel_source, 2, 1, 1, out);1856for (int i = 0; i < out.size(); ++i)1857EXPECT_EQ(out[i], expected[i]);1858}18591860TEST_F(ComputeTest, constant_sampler)1861{1862const char* kernel_source =1863"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\1864__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\1865{\n\1866int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\1867float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\1868write_imagef(output, coordsi, \n\1869read_imagef(input, sampler, coordsf) + \n\1870read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\1871}\n";1872Shader shader = compile(std::vector<const char*>({ kernel_source }));1873validate(shader);1874EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);1875}18761877TEST_F(ComputeTest, hi)1878{1879const char *kernel_source = R"(1880__kernel void main_test(__global char3 *srcA, __global char2 *dst)1881{1882int tid = get_global_id(0);18831884char2 tmp = srcA[tid].hi;1885dst[tid] = tmp;1886})";1887Shader shader = compile(std::vector<const char*>({ kernel_source }));1888validate(shader);1889}18901891TEST_F(ComputeTest, system_values)1892{1893const char *kernel_source =1894"__kernel void main_test(__global uint* outputs)\n\1895{\n\1896outputs[0] = get_work_dim();\n\1897outputs[1] = get_global_size(0);\n\1898outputs[2] = get_local_size(0);\n\1899outputs[3] = get_num_groups(0);\n\1900outputs[4] = get_group_id(0);\n\1901outputs[5] = get_global_offset(0);\n\1902outputs[6] = get_global_id(0);\n\1903}\n";1904auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);1905const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };1906CompileArgs args = { 1, 1, 1 };1907Shader shader = compile({ kernel_source });1908run_shader(shader, args, out);1909for (int i = 0; i < out.size(); ++i)1910EXPECT_EQ(out[i], expected[i]);19111912args.work_props.work_dim = 2;1913args.work_props.global_offset_x = 100;1914args.work_props.group_id_offset_x = 2;1915args.work_props.group_count_total_x = 5;1916const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };1917run_shader(shader, args, out);1918for (int i = 0; i < out.size(); ++i)1919EXPECT_EQ(out[i], expected_withoffsets[i]);1920}19211922TEST_F(ComputeTest, convert_round_sat)1923{1924const char *kernel_source =1925"__kernel void main_test(__global float *f, __global uchar *u)\n\1926{\n\1927uint idx = get_global_id(0);\n\1928u[idx] = convert_uchar_sat_rtp(f[idx]);\n\1929}\n";1930auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);1931auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);1932const uint8_t expected[] = {19330, 2, 20, 2551934};19351936run_shader(kernel_source, f.size(), 1, 1, f, u);1937for (int i = 0; i < u.size(); ++i)1938EXPECT_EQ(u[i], expected[i]);1939}19401941TEST_F(ComputeTest, convert_round_sat_vec)1942{1943const char *kernel_source =1944"__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\1945{\n\1946uint idx = get_global_id(0);\n\1947u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\1948}\n";1949auto f = ShaderArg<float>({1950-1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,1951-0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,19520.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,1953-0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,1954}, SHADER_ARG_INPUT);1955auto u = ShaderArg<uint8_t>({1956255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,1957255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,1958255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,1959255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,1960}, SHADER_ARG_OUTPUT);1961const uint8_t expected[] = {19620, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,19630, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,19640, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,19650, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,1966};19671968run_shader(kernel_source, 4, 1, 1, f, u);1969for (int i = 0; i < u.size(); ++i)1970EXPECT_EQ(u[i], expected[i]);1971}19721973TEST_F(ComputeTest, convert_char2_uchar2)1974{1975const char *kernel_source =1976"__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\1977{\n\1978size_t i = get_global_id(0);\n\1979dest[i] = convert_uchar2_sat( src[i] );\n\1980}\n";19811982auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);1983auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);1984const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };1985run_shader(kernel_source, 4, 1, 1, c, u);1986for (int i = 0; i < u.size(); i++)1987EXPECT_EQ(u[i], expected[i]);1988}19891990TEST_F(ComputeTest, async_copy)1991{1992const char *kernel_source = R"(1993__kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )1994{1995int i;1996for(i=0; i<copiesPerWorkItem; i++)1997localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;1998barrier( CLK_LOCAL_MEM_FENCE );1999event_t event;2000event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );2001wait_group_events( 1, &event );2002for(i=0; i<copiesPerWorkItem; i++)2003dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];2004})";2005Shader shader = compile({ kernel_source });2006validate(shader);2007}20082009TEST_F(ComputeTest, packed_struct_global)2010{2011#pragma pack(push, 1)2012struct s { uint8_t uc; uint64_t ul; uint16_t us; };2013#pragma pack(pop)20142015const char *kernel_source =2016"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\2017__kernel void main_test(__global struct s *inout, global uint *size)\n\2018{\n\2019uint idx = get_global_id(0);\n\2020inout[idx].uc = idx + 1;\n\2021inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\2022inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\2023*size = sizeof(struct s);\n\2024}\n";2025auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);2026auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);2027const struct s expected[] = {2028{ 1, 0xfbfcfdff12345678, 0xa112 }2029};20302031run_shader(kernel_source, inout.size(), 1, 1, inout, size);2032for (int i = 0; i < inout.size(); ++i) {2033EXPECT_EQ(inout[i].uc, expected[i].uc);2034EXPECT_EQ(inout[i].ul, expected[i].ul);2035EXPECT_EQ(inout[i].us, expected[i].us);2036}2037EXPECT_EQ(size, sizeof(struct s));2038}20392040TEST_F(ComputeTest, packed_struct_arg)2041{2042#pragma pack(push, 1)2043struct s { uint8_t uc; uint64_t ul; uint16_t us; };2044#pragma pack(pop)20452046const char *kernel_source =2047"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\2048__kernel void main_test(__global struct s *out, struct s in)\n\2049{\n\2050uint idx = get_global_id(0);\n\2051out[idx].uc = in.uc + 0x12;\n\2052out[idx].ul = in.ul + 0x123456789abcdef;\n\2053out[idx].us = in.us + 0x1234;\n\2054}\n";2055auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);2056auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);2057const struct s expected[] = {2058{ 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }2059};20602061run_shader(kernel_source, out.size(), 1, 1, out, in);2062for (int i = 0; i < out.size(); ++i) {2063EXPECT_EQ(out[i].uc, expected[i].uc);2064EXPECT_EQ(out[i].ul, expected[i].ul);2065EXPECT_EQ(out[i].us, expected[i].us);2066}2067}20682069TEST_F(ComputeTest, packed_struct_local)2070{2071#pragma pack(push, 1)2072struct s { uint8_t uc; uint64_t ul; uint16_t us; };2073#pragma pack(pop)20742075const char *kernel_source =2076"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\2077__kernel void main_test(__global struct s *out, __constant struct s *in)\n\2078{\n\2079uint idx = get_global_id(0);\n\2080__local struct s tmp[2];\n\2081tmp[get_local_id(0)] = in[idx];\n\2082barrier(CLK_LOCAL_MEM_FENCE);\n\2083out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\2084}\n";2085auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);2086auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);2087const struct s expected[] = {2088{ 0x12, 0x123456789abcdef, 0x1234 },2089{ 1, 2, 3 },2090};20912092run_shader(kernel_source, out.size(), 1, 1, out, in);2093for (int i = 0; i < out.size(); ++i) {2094EXPECT_EQ(out[i].uc, expected[i].uc);2095EXPECT_EQ(out[i].ul, expected[i].ul);2096EXPECT_EQ(out[i].us, expected[i].us);2097}2098}20992100/* DISABLED because current release versions of WARP either return2101* rubbish from reads or crash: they are not prepared to handle2102* non-float global constants */2103TEST_F(ComputeTest, DISABLED_packed_struct_const)2104{2105#pragma pack(push, 1)2106struct s { uint8_t uc; uint64_t ul; uint16_t us; };2107#pragma pack(pop)21082109const char *kernel_source =2110"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\2111__kernel void main_test(__global struct s *out, struct s in)\n\2112{\n\2113__constant struct s base[] = {\n\2114{0x12, 0x123456789abcdef, 0x1234},\n\2115{0x11, 0x123456789abcdee, 0x1233},\n\2116};\n\2117uint idx = get_global_id(0);\n\2118out[idx].uc = base[idx % 2].uc + in.uc;\n\2119out[idx].ul = base[idx % 2].ul + in.ul;\n\2120out[idx].us = base[idx % 2].us + in.us;\n\2121}\n";2122auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);2123auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);2124const struct s expected[] = {2125{ 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },2126{ 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },2127};21282129run_shader(kernel_source, out.size(), 1, 1, out, in);2130for (int i = 0; i < out.size(); ++i) {2131EXPECT_EQ(out[i].uc, expected[i].uc);2132EXPECT_EQ(out[i].ul, expected[i].ul);2133EXPECT_EQ(out[i].us, expected[i].us);2134}2135}21362137TEST_F(ComputeTest, DISABLED_printf)2138{2139const char *kernel_source = R"(2140__kernel void main_test(__global float *src, __global uint *dest)2141{2142__constant char *format_str = "%s: %f";2143__constant char *str_val = "Test";2144*dest = printf(format_str, str_val, src[0]);2145})";21462147auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);2148auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);2149run_shader(kernel_source, 1, 1, 1, src, dest);2150EXPECT_EQ(dest[0], 0);2151}21522153TEST_F(ComputeTest, vload_half)2154{2155const char *kernel_source = R"(2156__kernel void main_test(__global half *src, __global float4 *dest)2157{2158int offset = get_global_id(0);2159dest[offset] = vload_half4(offset, src);2160})";2161auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,21620x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);2163auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,2164FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);2165run_shader(kernel_source, 2, 1, 1, src, dest);2166for (unsigned i = 0; i < 8; ++i)2167EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));2168}21692170TEST_F(ComputeTest, vstore_half)2171{2172const char *kernel_source = R"(2173__kernel void main_test(__global half *dst, __global float4 *src)2174{2175int offset = get_global_id(0);2176vstore_half4(src[offset], offset, dst);2177})";2178auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,21790xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);2180auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,21815.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);2182run_shader(kernel_source, 2, 1, 1, dest, src);2183const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,21840x4500, 0x4600, 0x4700, 0x4800 };2185for (unsigned i = 0; i < 8; ++i)2186EXPECT_EQ(dest[i], expected[i]);2187}21882189TEST_F(ComputeTest, inline_function)2190{2191const char *kernel_source = R"(2192inline float helper(float foo)2193{2194return foo * 2;2195}21962197__kernel void main_test(__global float *dst, __global float *src)2198{2199*dst = helper(*src);2200})";2201auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);2202auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);2203run_shader(kernel_source, 1, 1, 1, dest, src);2204EXPECT_EQ(dest[0], 2.0f);2205}22062207TEST_F(ComputeTest, unused_arg)2208{2209const char *kernel_source = R"(2210__kernel void main_test(__global int *dst, __global int *unused, __global int *src)2211{2212int i = get_global_id(0);2213dst[i] = src[i];2214})";2215auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);2216auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);2217auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);2218run_shader(kernel_source, 4, 1, 1, dest, unused, src);2219for (int i = 0; i < 4; ++i)2220EXPECT_EQ(dest[i], i + 1);2221}222222232224