Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/microsoft/clc/clc_compiler_test.cpp
4560 views
1
/*
2
* Copyright © Microsoft Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <stdio.h>
25
#include <stdint.h>
26
#include <stdexcept>
27
#include <vector>
28
29
#include <directx/d3d12.h>
30
#include <dxgi1_4.h>
31
#include <gtest/gtest.h>
32
#include <wrl.h>
33
34
#include "compute_test.h"
35
36
using std::vector;
37
38
TEST_F(ComputeTest, runtime_memcpy)
39
{
40
struct shift { uint8_t val; uint8_t shift; uint16_t ret; };
41
const char *kernel_source =
42
"struct shift { uchar val; uchar shift; ushort ret; };\n\
43
__kernel void main_test(__global struct shift *inout)\n\
44
{\n\
45
uint id = get_global_id(0);\n\
46
uint id2 = id + get_global_id(1);\n\
47
struct shift lc[4] = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }};\n\
48
lc[id] = inout[id];\n\
49
inout[id2].ret = (ushort) lc[id2].val << (ushort) lc[id2].shift;\n\
50
}\n";
51
52
auto inout = ShaderArg<struct shift>({
53
{ 0x10, 1, 0xffff },
54
{ 0x20, 2, 0xffff },
55
{ 0x30, 3, 0xffff },
56
{ 0x40, 4, 0xffff },
57
},
58
SHADER_ARG_INOUT);
59
const uint16_t expected[] = { 0x20, 0x80, 0x180, 0x400 };
60
run_shader(kernel_source, inout.size(), 1, 1, inout);
61
for (int i = 0; i < inout.size(); ++i)
62
EXPECT_EQ(inout[i].ret, expected[i]);
63
}
64
65
TEST_F(ComputeTest, two_global_arrays)
66
{
67
const char *kernel_source =
68
"__kernel void main_test(__global uint *g1, __global uint *g2)\n\
69
{\n\
70
uint idx = get_global_id(0);\n\
71
g1[idx] -= g2[idx];\n\
72
}\n";
73
auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
74
auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
75
const uint32_t expected[] = {
76
9, 18, 27, 36
77
};
78
79
run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
80
for (int i = 0; i < g1.size(); ++i)
81
EXPECT_EQ(g1[i], expected[i]);
82
}
83
84
/* Disabled until saturated conversions from f32->i64 fixed (mesa/mesa#3824) */
85
TEST_F(ComputeTest, DISABLED_i64tof32)
86
{
87
const char *kernel_source =
88
"__kernel void main_test(__global long *out, __constant long *in)\n\
89
{\n\
90
__local float tmp[12];\n\
91
uint idx = get_global_id(0);\n\
92
tmp[idx] = in[idx];\n\
93
barrier(CLK_LOCAL_MEM_FENCE);\n\
94
out[idx] = tmp[idx + get_global_id(1)];\n\
95
}\n";
96
auto in = ShaderArg<int64_t>({ 0x100000000LL,
97
-0x100000000LL,
98
0x7fffffffffffffffLL,
99
0x4000004000000000LL,
100
0x4000003fffffffffLL,
101
0x4000004000000001LL,
102
-1,
103
-0x4000004000000000LL,
104
-0x4000003fffffffffLL,
105
-0x4000004000000001LL,
106
0,
107
INT64_MIN },
108
SHADER_ARG_INPUT);
109
auto out = ShaderArg<int64_t>(std::vector<int64_t>(12, 0xdeadbeed), SHADER_ARG_OUTPUT);
110
const int64_t expected[] = {
111
0x100000000LL,
112
-0x100000000LL,
113
0x7fffffffffffffffLL,
114
0x4000000000000000LL,
115
0x4000000000000000LL,
116
0x4000008000000000LL,
117
-1,
118
-0x4000000000000000LL,
119
-0x4000000000000000LL,
120
-0x4000008000000000LL,
121
0,
122
INT64_MIN,
123
};
124
125
run_shader(kernel_source, out.size(), 1, 1, out, in);
126
for (int i = 0; i < out.size(); ++i) {
127
EXPECT_EQ((int64_t)out[i], expected[i]);
128
}
129
}
130
TEST_F(ComputeTest, two_constant_arrays)
131
{
132
const char *kernel_source =
133
"__kernel void main_test(__constant uint *c1, __global uint *g1, __constant uint *c2)\n\
134
{\n\
135
uint idx = get_global_id(0);\n\
136
g1[idx] -= c1[idx] + c2[idx];\n\
137
}\n";
138
auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
139
auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
140
auto c2 = ShaderArg<uint32_t>(std::vector<uint32_t>(16384, 5), SHADER_ARG_INPUT);
141
const uint32_t expected[] = {
142
4, 13, 22, 31
143
};
144
145
run_shader(kernel_source, g1.size(), 1, 1, c1, g1, c2);
146
for (int i = 0; i < g1.size(); ++i)
147
EXPECT_EQ(g1[i], expected[i]);
148
}
149
150
TEST_F(ComputeTest, null_constant_ptr)
151
{
152
const char *kernel_source =
153
"__kernel void main_test(__global uint *g1, __constant uint *c1)\n\
154
{\n\
155
__constant uint fallback[] = {2, 3, 4, 5};\n\
156
__constant uint *c = c1 ? c1 : fallback;\n\
157
uint idx = get_global_id(0);\n\
158
g1[idx] -= c[idx];\n\
159
}\n";
160
auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
161
auto c1 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
162
const uint32_t expected1[] = {
163
9, 18, 27, 36
164
};
165
166
run_shader(kernel_source, g1.size(), 1, 1, g1, c1);
167
for (int i = 0; i < g1.size(); ++i)
168
EXPECT_EQ(g1[i], expected1[i]);
169
170
const uint32_t expected2[] = {
171
8, 17, 26, 35
172
};
173
174
g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
175
auto c2 = NullShaderArg();
176
run_shader(kernel_source, g1.size(), 1, 1, g1, c2);
177
for (int i = 0; i < g1.size(); ++i)
178
EXPECT_EQ(g1[i], expected2[i]);
179
}
180
181
/* This test seems to fail on older versions of WARP. */
182
TEST_F(ComputeTest, DISABLED_null_global_ptr)
183
{
184
const char *kernel_source =
185
"__kernel void main_test(__global uint *g1, __global uint *g2)\n\
186
{\n\
187
__constant uint fallback[] = {2, 3, 4, 5};\n\
188
uint idx = get_global_id(0);\n\
189
g1[idx] -= g2 ? g2[idx] : fallback[idx];\n\
190
}\n";
191
auto g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
192
auto g2 = ShaderArg<uint32_t>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
193
const uint32_t expected1[] = {
194
9, 18, 27, 36
195
};
196
197
run_shader(kernel_source, g1.size(), 1, 1, g1, g2);
198
for (int i = 0; i < g1.size(); ++i)
199
EXPECT_EQ(g1[i], expected1[i]);
200
201
const uint32_t expected2[] = {
202
8, 17, 26, 35
203
};
204
205
g1 = ShaderArg<uint32_t>({ 10, 20, 30, 40 }, SHADER_ARG_INOUT);
206
auto g2null = NullShaderArg();
207
run_shader(kernel_source, g1.size(), 1, 1, g1, g2null);
208
for (int i = 0; i < g1.size(); ++i)
209
EXPECT_EQ(g1[i], expected2[i]);
210
}
211
212
TEST_F(ComputeTest, ret_constant_ptr)
213
{
214
struct s { uint64_t ptr; uint32_t val; };
215
const char *kernel_source =
216
"struct s { __constant uint *ptr; uint val; };\n\
217
__kernel void main_test(__global struct s *out, __constant uint *in)\n\
218
{\n\
219
__constant uint foo[] = { 1, 2 };\n\
220
uint idx = get_global_id(0);\n\
221
if (idx == 0)\n\
222
out[idx].ptr = foo;\n\
223
else\n\
224
out[idx].ptr = in;\n\
225
out[idx].val = out[idx].ptr[idx];\n\
226
}\n";
227
auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
228
auto in = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
229
const uint32_t expected_val[] = {
230
1, 4
231
};
232
const uint64_t expected_ptr[] = {
233
2ull << 32, 1ull << 32
234
};
235
236
run_shader(kernel_source, out.size(), 1, 1, out, in);
237
for (int i = 0; i < out.size(); ++i) {
238
EXPECT_EQ(out[i].val, expected_val[i]);
239
EXPECT_EQ(out[i].ptr, expected_ptr[i]);
240
}
241
}
242
243
TEST_F(ComputeTest, ret_global_ptr)
244
{
245
struct s { uint64_t ptr; uint32_t val; };
246
const char *kernel_source =
247
"struct s { __global uint *ptr; uint val; };\n\
248
__kernel void main_test(__global struct s *out, __global uint *in1, __global uint *in2)\n\
249
{\n\
250
uint idx = get_global_id(0);\n\
251
out[idx].ptr = idx ? in2 : in1;\n\
252
out[idx].val = out[idx].ptr[idx];\n\
253
}\n";
254
auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0xdeadbeefdeadbeef, 0}), SHADER_ARG_OUTPUT);
255
auto in1 = ShaderArg<uint32_t>({ 1, 2 }, SHADER_ARG_INPUT);
256
auto in2 = ShaderArg<uint32_t>({ 3, 4 }, SHADER_ARG_INPUT);
257
const uint32_t expected_val[] = {
258
1, 4
259
};
260
const uint64_t expected_ptr[] = {
261
1ull << 32, 2ull << 32
262
};
263
264
run_shader(kernel_source, out.size(), 1, 1, out, in1, in2);
265
for (int i = 0; i < out.size(); ++i) {
266
EXPECT_EQ(out[i].val, expected_val[i]);
267
EXPECT_EQ(out[i].ptr, expected_ptr[i]);
268
}
269
}
270
271
TEST_F(ComputeTest, ret_local_ptr)
272
{
273
struct s { uint64_t ptr; };
274
const char *kernel_source =
275
"struct s { __local uint *ptr; };\n\
276
__kernel void main_test(__global struct s *out)\n\
277
{\n\
278
__local uint tmp[2];\n\
279
uint idx = get_global_id(0);\n\
280
tmp[idx] = idx;\n\
281
out[idx].ptr = &tmp[idx];\n\
282
}\n";
283
auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
284
const uint64_t expected_ptr[] = {
285
0, 4,
286
};
287
288
run_shader(kernel_source, out.size(), 1, 1, out);
289
for (int i = 0; i < out.size(); ++i) {
290
EXPECT_EQ(out[i].ptr, expected_ptr[i]);
291
}
292
}
293
294
TEST_F(ComputeTest, ret_private_ptr)
295
{
296
struct s { uint64_t ptr; uint32_t value; };
297
const char *kernel_source =
298
"struct s { __private uint *ptr; uint value; };\n\
299
__kernel void main_test(__global struct s *out)\n\
300
{\n\
301
uint tmp[2] = {1, 2};\n\
302
uint idx = get_global_id(0);\n\
303
out[idx].ptr = &tmp[idx];\n\
304
out[idx].value = *out[idx].ptr;\n\
305
}\n";
306
auto out = ShaderArg<struct s>(std::vector<struct s>(2, { 0xdeadbeefdeadbeef }), SHADER_ARG_OUTPUT);
307
const uint64_t expected_ptr[] = {
308
0, 4,
309
};
310
const uint32_t expected_value[] = {
311
1, 2
312
};
313
314
run_shader(kernel_source, out.size(), 1, 1, out);
315
for (int i = 0; i < out.size(); ++i) {
316
EXPECT_EQ(out[i].ptr, expected_ptr[i]);
317
}
318
}
319
320
TEST_F(ComputeTest, globals_8bit)
321
{
322
const char *kernel_source =
323
"__kernel void main_test(__global unsigned char *inout)\n\
324
{\n\
325
uint idx = get_global_id(0);\n\
326
inout[idx] = inout[idx] + 1;\n\
327
}\n";
328
auto inout = ShaderArg<uint8_t> ({ 100, 110, 120, 130 }, SHADER_ARG_INOUT);
329
const uint8_t expected[] = {
330
101, 111, 121, 131
331
};
332
run_shader(kernel_source, inout.size(), 1, 1, inout);
333
for (int i = 0; i < inout.size(); ++i)
334
EXPECT_EQ(inout[i], expected[i]);
335
}
336
337
TEST_F(ComputeTest, globals_16bit)
338
{
339
const char *kernel_source =
340
"__kernel void main_test(__global unsigned short *inout)\n\
341
{\n\
342
uint idx = get_global_id(0);\n\
343
inout[idx] = inout[idx] + 1;\n\
344
}\n";
345
auto inout = ShaderArg<uint16_t> ({ 10000, 10010, 10020, 10030 }, SHADER_ARG_INOUT);
346
const uint16_t expected[] = {
347
10001, 10011, 10021, 10031
348
};
349
run_shader(kernel_source, inout.size(), 1, 1, inout);
350
for (int i = 0; i < inout.size(); ++i)
351
EXPECT_EQ(inout[i], expected[i]);
352
}
353
354
TEST_F(ComputeTest, DISABLED_globals_64bit)
355
{
356
/* Test disabled, because we need a fixed version of WARP that hasn't
357
been officially shipped yet */
358
359
const char *kernel_source =
360
"__kernel void main_test(__global unsigned long *inout)\n\
361
{\n\
362
uint idx = get_global_id(0);\n\
363
inout[idx] = inout[idx] + 1;\n\
364
}\n";
365
uint64_t base = 1ull << 50;
366
auto inout = ShaderArg<uint64_t>({ base, base + 10, base + 20, base + 30 },
367
SHADER_ARG_INOUT);
368
const uint64_t expected[] = {
369
base + 1, base + 11, base + 21, base + 31
370
};
371
run_shader(kernel_source, inout.size(), 1, 1, inout);
372
for (int i = 0; i < inout.size(); ++i)
373
EXPECT_EQ(inout[i], expected[i]);
374
}
375
376
TEST_F(ComputeTest, built_ins_global_id)
377
{
378
const char *kernel_source =
379
"__kernel void main_test(__global uint *output)\n\
380
{\n\
381
output[get_global_id(0)] = get_global_id(0);\n\
382
}\n";
383
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
384
SHADER_ARG_OUTPUT);
385
const uint32_t expected[] = {
386
0, 1, 2, 3
387
};
388
389
run_shader(kernel_source, output.size(), 1, 1, output);
390
for (int i = 0; i < output.size(); ++i)
391
EXPECT_EQ(output[i], expected[i]);
392
}
393
394
TEST_F(ComputeTest, built_ins_global_id_rmw)
395
{
396
const char *kernel_source =
397
"__kernel void main_test(__global uint *output)\n\
398
{\n\
399
uint id = get_global_id(0);\n\
400
output[id] = output[id] * (id + 1);\n\
401
}\n";
402
auto inout = ShaderArg<uint32_t>({0x00000001, 0x10000001, 0x00020002, 0x04010203},
403
SHADER_ARG_INOUT);
404
const uint32_t expected[] = {
405
0x00000001, 0x20000002, 0x00060006, 0x1004080c
406
};
407
run_shader(kernel_source, inout.size(), 1, 1, inout);
408
for (int i = 0; i < inout.size(); ++i)
409
EXPECT_EQ(inout[i], expected[i]);
410
}
411
412
TEST_F(ComputeTest, types_float_basics)
413
{
414
const char *kernel_source =
415
"__kernel void main_test(__global uint *output)\n\
416
{\n\
417
output[get_global_id(0)] = (uint)((float)get_global_id(0) + 1.5f);\n\
418
}\n";
419
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
420
SHADER_ARG_OUTPUT);
421
const uint32_t expected[] = {
422
1, 2, 3, 4
423
};
424
run_shader(kernel_source, output.size(), 1, 1, output);
425
for (int i = 0; i < output.size(); ++i)
426
EXPECT_EQ(output[i], expected[i]);
427
}
428
429
TEST_F(ComputeTest, DISABLED_types_double_basics)
430
{
431
const char *kernel_source =
432
"__kernel void main_test(__global uint *output)\n\
433
{\n\
434
output[get_global_id(0)] = (uint)((double)get_global_id(0) + 1.5);\n\
435
}\n";
436
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
437
SHADER_ARG_OUTPUT);
438
const uint32_t expected[] = {
439
1, 2, 3, 4
440
};
441
run_shader(kernel_source, output.size(), 1, 1, output);
442
for (int i = 0; i < output.size(); ++i)
443
EXPECT_EQ(output[i], expected[i]);
444
}
445
446
TEST_F(ComputeTest, types_short_basics)
447
{
448
const char *kernel_source =
449
"__kernel void main_test(__global uint *output)\n\
450
{\n\
451
output[get_global_id(0)] = (uint)((short)get_global_id(0) + (short)1);\n\
452
}\n";
453
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
454
SHADER_ARG_OUTPUT);
455
const uint32_t expected[] = {
456
1, 2, 3, 4
457
};
458
run_shader(kernel_source, output.size(), 1, 1, output);
459
for (int i = 0; i < output.size(); ++i)
460
EXPECT_EQ(output[i], expected[i]);
461
}
462
463
TEST_F(ComputeTest, types_char_basics)
464
{
465
const char *kernel_source =
466
"__kernel void main_test(__global uint *output)\n\
467
{\n\
468
output[get_global_id(0)] = (uint)((char)get_global_id(0) + (char)1);\n\
469
}\n";
470
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
471
SHADER_ARG_OUTPUT);
472
const uint32_t expected[] = {
473
1, 2, 3, 4
474
};
475
run_shader(kernel_source, output.size(), 1, 1, output);
476
for (int i = 0; i < output.size(); ++i)
477
EXPECT_EQ(output[i], expected[i]);
478
}
479
480
TEST_F(ComputeTest, types_if_statement)
481
{
482
const char *kernel_source =
483
"__kernel void main_test(__global uint *output)\n\
484
{\n\
485
int idx = get_global_id(0);\n\
486
if (idx > 0)\n\
487
output[idx] = ~idx;\n\
488
else\n\
489
output[0] = 0xff;\n\
490
}\n";
491
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
492
SHADER_ARG_OUTPUT);
493
const uint32_t expected[] = {
494
0xff, ~1u, ~2u, ~3u
495
};
496
run_shader(kernel_source, output.size(), 1, 1, output);
497
for (int i = 0; i < output.size(); ++i)
498
EXPECT_EQ(output[i], expected[i]);
499
}
500
501
TEST_F(ComputeTest, types_do_while_loop)
502
{
503
const char *kernel_source =
504
"__kernel void main_test(__global uint *output)\n\
505
{\n\
506
int value = 1;\n\
507
int i = 1, n = get_global_id(0);\n\
508
do {\n\
509
value *= i++;\n\
510
} while (i <= n);\n\
511
output[n] = value;\n\
512
}\n";
513
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
514
SHADER_ARG_OUTPUT);
515
const uint32_t expected[] = {
516
1, 1, 1*2, 1*2*3, 1*2*3*4
517
};
518
run_shader(kernel_source, output.size(), 1, 1, output);
519
for (int i = 0; i < output.size(); ++i)
520
EXPECT_EQ(output[i], expected[i]);
521
}
522
523
TEST_F(ComputeTest, types_for_loop)
524
{
525
const char *kernel_source =
526
"__kernel void main_test(__global uint *output)\n\
527
{\n\
528
int value = 1;\n\
529
int n = get_global_id(0);\n\
530
for (int i = 1; i <= n; ++i)\n\
531
value *= i;\n\
532
output[n] = value;\n\
533
}\n";
534
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(5, 0xdeadbeef),
535
SHADER_ARG_OUTPUT);
536
const uint32_t expected[] = {
537
1, 1, 1*2, 1*2*3, 1*2*3*4
538
};
539
run_shader(kernel_source, output.size(), 1, 1, output);
540
for (int i = 0; i < output.size(); ++i)
541
EXPECT_EQ(output[i], expected[i]);
542
}
543
544
TEST_F(ComputeTest, DISABLED_complex_types_local_array_long)
545
{
546
const char *kernel_source =
547
"__kernel void main_test(__global ulong *inout)\n\
548
{\n\
549
ushort tmp[] = {\n\
550
get_global_id(1) + 0x00000000,\n\
551
get_global_id(1) + 0x10000001,\n\
552
get_global_id(1) + 0x20000020,\n\
553
get_global_id(1) + 0x30000300,\n\
554
};\n\
555
uint idx = get_global_id(0);\n\
556
inout[idx] = tmp[idx];\n\
557
}\n";
558
auto inout = ShaderArg<uint64_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
559
const uint16_t expected[] = {
560
0x00000000, 0x10000001, 0x20000020, 0x30000300,
561
};
562
run_shader(kernel_source, inout.size(), 1, 1, inout);
563
for (int i = 0; i < inout.size(); ++i)
564
EXPECT_EQ(inout[i], expected[i]);
565
}
566
567
TEST_F(ComputeTest, complex_types_local_array_short)
568
{
569
const char *kernel_source =
570
"__kernel void main_test(__global ushort *inout)\n\
571
{\n\
572
ushort tmp[] = {\n\
573
get_global_id(1) + 0x00,\n\
574
get_global_id(1) + 0x10,\n\
575
get_global_id(1) + 0x20,\n\
576
get_global_id(1) + 0x30,\n\
577
};\n\
578
uint idx = get_global_id(0);\n\
579
inout[idx] = tmp[idx];\n\
580
}\n";
581
auto inout = ShaderArg<uint16_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
582
const uint16_t expected[] = {
583
0x00, 0x10, 0x20, 0x30,
584
};
585
run_shader(kernel_source, inout.size(), 1, 1, inout);
586
for (int i = 0; i < inout.size(); ++i)
587
EXPECT_EQ(inout[i], expected[i]);
588
}
589
590
TEST_F(ComputeTest, complex_types_local_array_struct_vec_float_misaligned)
591
{
592
const char *kernel_source =
593
"struct has_vecs { uchar c; ushort s; float2 f; };\n\
594
__kernel void main_test(__global uint *inout)\n\
595
{\n\
596
struct has_vecs tmp[] = {\n\
597
{ 10 + get_global_id(0), get_global_id(1), { 10.0f, 1.0f } },\n\
598
{ 19 + get_global_id(0), get_global_id(1), { 20.0f, 4.0f } },\n\
599
{ 28 + get_global_id(0), get_global_id(1), { 30.0f, 9.0f } },\n\
600
{ 37 + get_global_id(0), get_global_id(1), { 40.0f, 16.0f } },\n\
601
};\n\
602
uint idx = get_global_id(0);\n\
603
uint mul = (tmp[idx].c + tmp[idx].s) * trunc(tmp[idx].f[0]);\n\
604
inout[idx] = mul + trunc(tmp[idx].f[1]);\n\
605
}\n";
606
auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
607
const uint16_t expected[] = { 101, 404, 909, 1616 };
608
run_shader(kernel_source, inout.size(), 1, 1, inout);
609
for (int i = 0; i < inout.size(); ++i)
610
EXPECT_EQ(inout[i], expected[i]);
611
}
612
613
TEST_F(ComputeTest, complex_types_local_array)
614
{
615
const char *kernel_source =
616
"__kernel void main_test(__global uint *inout)\n\
617
{\n\
618
uint tmp[] = {\n\
619
get_global_id(1) + 0x00,\n\
620
get_global_id(1) + 0x10,\n\
621
get_global_id(1) + 0x20,\n\
622
get_global_id(1) + 0x30,\n\
623
};\n\
624
uint idx = get_global_id(0);\n\
625
inout[idx] = tmp[idx];\n\
626
}\n";
627
auto inout = ShaderArg<uint32_t>({ 0, 0, 0, 0 }, SHADER_ARG_INOUT);
628
const uint32_t expected[] = {
629
0x00, 0x10, 0x20, 0x30,
630
};
631
run_shader(kernel_source, inout.size(), 1, 1, inout);
632
for (int i = 0; i < inout.size(); ++i)
633
EXPECT_EQ(inout[i], expected[i]);
634
}
635
636
TEST_F(ComputeTest, complex_types_global_struct_array)
637
{
638
struct two_vals { uint32_t add; uint32_t mul; };
639
const char *kernel_source =
640
"struct two_vals { uint add; uint mul; };\n\
641
__kernel void main_test(__global struct two_vals *in_out)\n\
642
{\n\
643
uint id = get_global_id(0);\n\
644
in_out[id].add = in_out[id].add + id;\n\
645
in_out[id].mul = in_out[id].mul * id;\n\
646
}\n";
647
auto inout = ShaderArg<struct two_vals>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
648
SHADER_ARG_INOUT);
649
const struct two_vals expected[] = {
650
{ 8 + 0, 8 * 0 },
651
{ 16 + 1, 16 * 1 },
652
{ 64 + 2, 64 * 2 },
653
{ 65536 + 3, 65536 * 3 }
654
};
655
run_shader(kernel_source, inout.size(), 1, 1, inout);
656
for (int i = 0; i < inout.size(); ++i) {
657
EXPECT_EQ(inout[i].add, expected[i].add);
658
EXPECT_EQ(inout[i].mul, expected[i].mul);
659
}
660
}
661
662
TEST_F(ComputeTest, complex_types_global_uint2)
663
{
664
struct uint2 { uint32_t x; uint32_t y; };
665
const char *kernel_source =
666
"__kernel void main_test(__global uint2 *inout)\n\
667
{\n\
668
uint id = get_global_id(0);\n\
669
inout[id].x = inout[id].x + id;\n\
670
inout[id].y = inout[id].y * id;\n\
671
}\n";
672
auto inout = ShaderArg<struct uint2>({ { 8, 8 }, { 16, 16 }, { 64, 64 }, { 65536, 65536 } },
673
SHADER_ARG_INOUT);
674
const struct uint2 expected[] = {
675
{ 8 + 0, 8 * 0 },
676
{ 16 + 1, 16 * 1 },
677
{ 64 + 2, 64 * 2 },
678
{ 65536 + 3, 65536 * 3 }
679
};
680
run_shader(kernel_source, inout.size(), 1, 1, inout);
681
for (int i = 0; i < inout.size(); ++i) {
682
EXPECT_EQ(inout[i].x, expected[i].x);
683
EXPECT_EQ(inout[i].y, expected[i].y);
684
}
685
}
686
687
TEST_F(ComputeTest, complex_types_global_ushort2)
688
{
689
struct ushort2 { uint16_t x; uint16_t y; };
690
const char *kernel_source =
691
"__kernel void main_test(__global ushort2 *inout)\n\
692
{\n\
693
uint id = get_global_id(0);\n\
694
inout[id].x = inout[id].x + id;\n\
695
inout[id].y = inout[id].y * id;\n\
696
}\n";
697
auto inout = ShaderArg<struct ushort2>({ { 8, 8 }, { 16, 16 }, { 64, 64 },
698
{ (uint16_t)65536, (uint16_t)65536 } },
699
SHADER_ARG_INOUT);
700
const struct ushort2 expected[] = {
701
{ 8 + 0, 8 * 0 },
702
{ 16 + 1, 16 * 1 },
703
{ 64 + 2, 64 * 2 },
704
{ (uint16_t)(65536 + 3), (uint16_t)(65536 * 3) }
705
};
706
run_shader(kernel_source, inout.size(), 1, 1, inout);
707
for (int i = 0; i < inout.size(); ++i) {
708
EXPECT_EQ(inout[i].x, expected[i].x);
709
EXPECT_EQ(inout[i].y, expected[i].y);
710
}
711
}
712
713
TEST_F(ComputeTest, complex_types_global_uchar3)
714
{
715
struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
716
const char *kernel_source =
717
"__kernel void main_test(__global uchar3 *inout)\n\
718
{\n\
719
uint id = get_global_id(0);\n\
720
inout[id].x = inout[id].x + id;\n\
721
inout[id].y = inout[id].y * id;\n\
722
inout[id].z = inout[id].y + inout[id].x;\n\
723
}\n";
724
auto inout = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
725
SHADER_ARG_INOUT);
726
const struct uchar3 expected[] = {
727
{ 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
728
{ 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
729
{ 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
730
{ (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
731
};
732
run_shader(kernel_source, inout.size(), 1, 1, inout);
733
for (int i = 0; i < inout.size(); ++i) {
734
EXPECT_EQ(inout[i].x, expected[i].x);
735
EXPECT_EQ(inout[i].y, expected[i].y);
736
EXPECT_EQ(inout[i].z, expected[i].z);
737
}
738
}
739
740
TEST_F(ComputeTest, complex_types_constant_uchar3)
741
{
742
struct uchar3 { uint8_t x; uint8_t y; uint8_t z; uint8_t pad; };
743
const char *kernel_source =
744
"__kernel void main_test(__global uchar3 *out, __constant uchar3 *in)\n\
745
{\n\
746
uint id = get_global_id(0);\n\
747
out[id].x = in[id].x + id;\n\
748
out[id].y = in[id].y * id;\n\
749
out[id].z = out[id].y + out[id].x;\n\
750
}\n";
751
auto in = ShaderArg<struct uchar3>({ { 8, 8, 8 }, { 16, 16, 16 }, { 64, 64, 64 }, { 255, 255, 255 } },
752
SHADER_ARG_INPUT);
753
auto out = ShaderArg<struct uchar3>(std::vector<struct uchar3>(4, { 0xff, 0xff, 0xff }),
754
SHADER_ARG_OUTPUT);
755
const struct uchar3 expected[] = {
756
{ 8 + 0, 8 * 0, (8 + 0) + (8 * 0) },
757
{ 16 + 1, 16 * 1, (16 + 1) + (16 * 1) },
758
{ 64 + 2, 64 * 2, (64 + 2) + (64 * 2) },
759
{ (uint8_t)(255 + 3), (uint8_t)(255 * 3), (uint8_t)((255 + 3) + (255 * 3)) }
760
};
761
run_shader(kernel_source, out.size(), 1, 1, out, in);
762
for (int i = 0; i < out.size(); ++i) {
763
EXPECT_EQ(out[i].x, expected[i].x);
764
EXPECT_EQ(out[i].y, expected[i].y);
765
EXPECT_EQ(out[i].z, expected[i].z);
766
}
767
}
768
769
TEST_F(ComputeTest, complex_types_global_uint8)
770
{
771
struct uint8 {
772
uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
773
uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
774
};
775
const char *kernel_source =
776
"__kernel void main_test(__global uint8 *inout)\n\
777
{\n\
778
uint id = get_global_id(0);\n\
779
inout[id].s01234567 = inout[id].s01234567 * 2;\n\
780
}\n";
781
auto inout = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
782
SHADER_ARG_INOUT);
783
const struct uint8 expected[] = {
784
{ 2, 4, 6, 8, 10, 12, 14, 16 }
785
};
786
run_shader(kernel_source, inout.size(), 1, 1, inout);
787
for (int i = 0; i < inout.size(); ++i) {
788
EXPECT_EQ(inout[i].s0, expected[i].s0);
789
EXPECT_EQ(inout[i].s1, expected[i].s1);
790
EXPECT_EQ(inout[i].s2, expected[i].s2);
791
EXPECT_EQ(inout[i].s3, expected[i].s3);
792
EXPECT_EQ(inout[i].s4, expected[i].s4);
793
EXPECT_EQ(inout[i].s5, expected[i].s5);
794
EXPECT_EQ(inout[i].s6, expected[i].s6);
795
EXPECT_EQ(inout[i].s7, expected[i].s7);
796
}
797
}
798
799
TEST_F(ComputeTest, complex_types_local_ulong16)
800
{
801
struct ulong16 {
802
uint64_t values[16];
803
};
804
const char *kernel_source =
805
R"(__kernel void main_test(__global ulong16 *inout)
806
{
807
__local ulong16 local_array[2];
808
uint id = get_global_id(0);
809
local_array[id] = inout[id];
810
barrier(CLK_LOCAL_MEM_FENCE);
811
inout[id] = local_array[0] * 2;
812
})";
813
auto inout = ShaderArg<struct ulong16>({ { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } },
814
SHADER_ARG_INOUT);
815
const struct ulong16 expected[] = {
816
{ 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }
817
};
818
run_shader(kernel_source, inout.size(), 1, 1, inout);
819
for (int i = 0; i < inout.size(); ++i) {
820
for (int j = 0; j < 16; ++j) {
821
EXPECT_EQ(inout[i].values[j], expected[i].values[j]);
822
}
823
}
824
}
825
826
TEST_F(ComputeTest, complex_types_constant_uint8)
827
{
828
struct uint8 {
829
uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
830
uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
831
};
832
const char *kernel_source =
833
"__kernel void main_test(__global uint8 *out, __constant uint8 *in)\n\
834
{\n\
835
uint id = get_global_id(0);\n\
836
out[id].s01234567 = in[id].s01234567 * 2;\n\
837
}\n";
838
auto in = ShaderArg<struct uint8>({ { 1, 2, 3, 4, 5, 6, 7, 8 } },
839
SHADER_ARG_INPUT);
840
auto out = ShaderArg<struct uint8>({ { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff } },
841
SHADER_ARG_INOUT);
842
const struct uint8 expected[] = {
843
{ 2, 4, 6, 8, 10, 12, 14, 16 }
844
};
845
run_shader(kernel_source, out.size(), 1, 1, out, in);
846
for (int i = 0; i < out.size(); ++i) {
847
EXPECT_EQ(out[i].s0, expected[i].s0);
848
EXPECT_EQ(out[i].s1, expected[i].s1);
849
EXPECT_EQ(out[i].s2, expected[i].s2);
850
EXPECT_EQ(out[i].s3, expected[i].s3);
851
EXPECT_EQ(out[i].s4, expected[i].s4);
852
EXPECT_EQ(out[i].s5, expected[i].s5);
853
EXPECT_EQ(out[i].s6, expected[i].s6);
854
EXPECT_EQ(out[i].s7, expected[i].s7);
855
}
856
}
857
858
TEST_F(ComputeTest, DISABLED_complex_types_const_array)
859
{
860
/* DISABLED because current release versions of WARP either return
861
* rubbish from reads or crash: they are not prepared to handle
862
* non-float global constants */
863
const char *kernel_source =
864
"__kernel void main_test(__global uint *output)\n\
865
{\n\
866
const uint foo[] = { 100, 101, 102, 103 };\n\
867
output[get_global_id(0)] = foo[get_global_id(0) % 4];\n\
868
}\n";
869
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
870
SHADER_ARG_OUTPUT);
871
const uint32_t expected[] = {
872
100, 101, 102, 103
873
};
874
run_shader(kernel_source, output.size(), 1, 1, output);
875
for (int i = 0; i < output.size(); ++i)
876
EXPECT_EQ(output[i], expected[i]);
877
}
878
879
TEST_F(ComputeTest, mem_access_load_store_ordering)
880
{
881
const char *kernel_source =
882
"__kernel void main_test(__global uint *output)\n\
883
{\n\
884
uint foo[4];\n\
885
foo[0] = 0x11111111;\n\
886
foo[1] = 0x22222222;\n\
887
foo[2] = 0x44444444;\n\
888
foo[3] = 0x88888888;\n\
889
foo[get_global_id(1)] -= 0x11111111; // foo[0] = 0 \n\
890
foo[0] += get_global_id(0); // foo[0] = tid\n\
891
foo[foo[get_global_id(1)]] = get_global_id(0); // foo[tid] = tid\n\
892
output[get_global_id(0)] = foo[get_global_id(0)]; // output[tid] = tid\n\
893
}\n";
894
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
895
SHADER_ARG_OUTPUT);
896
const uint16_t expected[] = {
897
0, 1, 2, 3
898
};
899
run_shader(kernel_source, output.size(), 1, 1, output);
900
for (int i = 0; i < output.size(); ++i)
901
EXPECT_EQ(output[i], expected[i]);
902
}
903
904
TEST_F(ComputeTest, DISABLED_two_const_arrays)
905
{
906
/* DISABLED because current release versions of WARP either return
907
* rubbish from reads or crash: they are not prepared to handle
908
* non-float global constants */
909
const char *kernel_source =
910
"__kernel void main_test(__global uint *output)\n\
911
{\n\
912
uint id = get_global_id(0);\n\
913
uint foo[4] = {100, 101, 102, 103};\n\
914
uint bar[4] = {1, 2, 3, 4};\n\
915
output[id] = foo[id] * bar[id];\n\
916
}\n";
917
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
918
SHADER_ARG_OUTPUT);
919
const uint32_t expected[] = {
920
100, 202, 306, 412
921
};
922
run_shader(kernel_source, output.size(), 1, 1, output);
923
for (int i = 0; i < output.size(); ++i)
924
EXPECT_EQ(output[i], expected[i]);
925
}
926
927
TEST_F(ComputeTest, imod_pos)
928
{
929
const char *kernel_source =
930
"__kernel void main_test(__global int *inout)\n\
931
{\n\
932
inout[get_global_id(0)] = inout[get_global_id(0)] % 3;\n\
933
}\n";
934
auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
935
SHADER_ARG_INOUT);
936
const int32_t expected[] = {
937
-1, 0, -2, -1, 0, 1, 2, 0, 1
938
};
939
run_shader(kernel_source, inout.size(), 1, 1, inout);
940
for (int i = 0; i < inout.size(); ++i)
941
EXPECT_EQ(inout[i], expected[i]);
942
}
943
944
TEST_F(ComputeTest, imod_neg)
945
{
946
const char *kernel_source =
947
"__kernel void main_test(__global int *inout)\n\
948
{\n\
949
inout[get_global_id(0)] = inout[get_global_id(0)] % -3;\n\
950
}\n";
951
auto inout = ShaderArg<int32_t>({ -4, -3, -2, -1, 0, 1, 2, 3, 4 },
952
SHADER_ARG_INOUT);
953
const int32_t expected[] = {
954
-1, 0, -2, -1, 0, 1, 2, 0, 1
955
};
956
run_shader(kernel_source, inout.size(), 1, 1, inout);
957
for (int i = 0; i < inout.size(); ++i)
958
EXPECT_EQ(inout[i], expected[i]);
959
}
960
961
TEST_F(ComputeTest, umod)
962
{
963
const char *kernel_source =
964
"__kernel void main_test(__global uint *inout)\n\
965
{\n\
966
inout[get_global_id(0)] = inout[get_global_id(0)] % 0xfffffffc;\n\
967
}\n";
968
auto inout = ShaderArg<uint32_t>({ 0xfffffffa, 0xfffffffb, 0xfffffffc, 0xfffffffd, 0xfffffffe },
969
SHADER_ARG_INOUT);
970
const uint32_t expected[] = {
971
0xfffffffa, 0xfffffffb, 0, 1, 2
972
};
973
run_shader(kernel_source, inout.size(), 1, 1, inout);
974
for (int i = 0; i < inout.size(); ++i)
975
EXPECT_EQ(inout[i], expected[i]);
976
}
977
978
TEST_F(ComputeTest, rotate)
979
{
980
const char *kernel_source =
981
"__kernel void main_test(__global uint *inout)\n\
982
{\n\
983
inout[get_global_id(0)] = rotate(inout[get_global_id(0)], (uint)get_global_id(0) * 4);\n\
984
}\n";
985
auto inout = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
986
SHADER_ARG_INOUT);
987
const uint32_t expected[] = {
988
0xdeadbeef, 0xeadbeefd, 0xadbeefde, 0xdbeefdea
989
};
990
run_shader(kernel_source, inout.size(), 1, 1, inout);
991
for (int i = 0; i < inout.size(); ++i)
992
EXPECT_EQ(inout[i], expected[i]);
993
}
994
995
TEST_F(ComputeTest, popcount)
996
{
997
const char *kernel_source =
998
"__kernel void main_test(__global uint *inout)\n\
999
{\n\
1000
inout[get_global_id(0)] = popcount(inout[get_global_id(0)]);\n\
1001
}\n";
1002
auto inout = ShaderArg<uint32_t>({ 0, 0x1, 0x3, 0x101, 0x110011, ~0u },
1003
SHADER_ARG_INOUT);
1004
const uint32_t expected[] = {
1005
0, 1, 2, 2, 4, 32
1006
};
1007
run_shader(kernel_source, inout.size(), 1, 1, inout);
1008
for (int i = 0; i < inout.size(); ++i)
1009
EXPECT_EQ(inout[i], expected[i]);
1010
}
1011
1012
TEST_F(ComputeTest, hadd)
1013
{
1014
const char *kernel_source =
1015
"__kernel void main_test(__global uint *inout)\n\
1016
{\n\
1017
inout[get_global_id(0)] = hadd(inout[get_global_id(0)], 1u << 31);\n\
1018
}\n";
1019
auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1020
SHADER_ARG_INOUT);
1021
const uint32_t expected[] = {
1022
(1u << 31) >> 1,
1023
((1u << 31) + 1) >> 1,
1024
((1u << 31) + 2) >> 1,
1025
((1u << 31) + 3) >> 1,
1026
((1ull << 31) + 0xfffffffc) >> 1,
1027
((1ull << 31) + 0xfffffffd) >> 1,
1028
((1ull << 31) + 0xfffffffe) >> 1,
1029
((1ull << 31) + 0xffffffff) >> 1,
1030
};
1031
run_shader(kernel_source, inout.size(), 1, 1, inout);
1032
for (int i = 0; i < inout.size(); ++i)
1033
EXPECT_EQ(inout[i], expected[i]);
1034
}
1035
1036
TEST_F(ComputeTest, rhadd)
1037
{
1038
const char *kernel_source =
1039
"__kernel void main_test(__global uint *inout)\n\
1040
{\n\
1041
inout[get_global_id(0)] = rhadd(inout[get_global_id(0)], 1u << 31);\n\
1042
}\n";
1043
auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, 0xfffffffc, 0xfffffffd, 0xfffffffe, 0xffffffff },
1044
SHADER_ARG_INOUT);
1045
const uint32_t expected[] = {
1046
((1u << 31) + 1) >> 1,
1047
((1u << 31) + 2) >> 1,
1048
((1u << 31) + 3) >> 1,
1049
((1u << 31) + 4) >> 1,
1050
((1ull << 31) + 0xfffffffd) >> 1,
1051
((1ull << 31) + 0xfffffffe) >> 1,
1052
((1ull << 31) + 0xffffffff) >> 1,
1053
((1ull << 31) + (1ull << 32)) >> 1,
1054
};
1055
run_shader(kernel_source, inout.size(), 1, 1, inout);
1056
for (int i = 0; i < inout.size(); ++i)
1057
EXPECT_EQ(inout[i], expected[i]);
1058
}
1059
1060
TEST_F(ComputeTest, add_sat)
1061
{
1062
const char *kernel_source =
1063
"__kernel void main_test(__global uint *inout)\n\
1064
{\n\
1065
inout[get_global_id(0)] = add_sat(inout[get_global_id(0)], 2u);\n\
1066
}\n";
1067
auto inout = ShaderArg<uint32_t>({ 0xffffffff - 3, 0xffffffff - 2, 0xffffffff - 1, 0xffffffff },
1068
SHADER_ARG_INOUT);
1069
const uint32_t expected[] = {
1070
0xffffffff - 1, 0xffffffff, 0xffffffff, 0xffffffff
1071
};
1072
run_shader(kernel_source, inout.size(), 1, 1, inout);
1073
for (int i = 0; i < inout.size(); ++i)
1074
EXPECT_EQ(inout[i], expected[i]);
1075
}
1076
1077
TEST_F(ComputeTest, sub_sat)
1078
{
1079
const char *kernel_source =
1080
"__kernel void main_test(__global uint *inout)\n\
1081
{\n\
1082
inout[get_global_id(0)] = sub_sat(inout[get_global_id(0)], 2u);\n\
1083
}\n";
1084
auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3 }, SHADER_ARG_INOUT);
1085
const uint32_t expected[] = {
1086
0, 0, 0, 1
1087
};
1088
run_shader(kernel_source, inout.size(), 1, 1, inout);
1089
for (int i = 0; i < inout.size(); ++i)
1090
EXPECT_EQ(inout[i], expected[i]);
1091
}
1092
1093
TEST_F(ComputeTest, mul_hi)
1094
{
1095
const char *kernel_source =
1096
"__kernel void main_test(__global uint *inout)\n\
1097
{\n\
1098
inout[get_global_id(0)] = mul_hi(inout[get_global_id(0)], 1u << 31);\n\
1099
}\n";
1100
auto inout = ShaderArg<uint32_t>({ 0, 1, 2, 3, (1u << 31) }, SHADER_ARG_INOUT);
1101
const uint32_t expected[] = {
1102
0, 0, 1, 1, (1u << 30)
1103
};
1104
run_shader(kernel_source, inout.size(), 1, 1, inout);
1105
for (int i = 0; i < inout.size(); ++i)
1106
EXPECT_EQ(inout[i], expected[i]);
1107
}
1108
1109
TEST_F(ComputeTest, ldexp_x)
1110
{
1111
const char *kernel_source =
1112
"__kernel void main_test(__global float *inout)\n\
1113
{\n\
1114
inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], 5);\n\
1115
}\n";
1116
auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 2.0f }, SHADER_ARG_INOUT);
1117
const float expected[] = {
1118
ldexp(0.0f, 5), ldexp(0.5f, 5), ldexp(1.0f, 5), ldexp(2.0f, 5)
1119
};
1120
run_shader(kernel_source, inout.size(), 1, 1, inout);
1121
for (int i = 0; i < inout.size(); ++i)
1122
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1123
}
1124
1125
TEST_F(ComputeTest, ldexp_y)
1126
{
1127
const char *kernel_source =
1128
"__kernel void main_test(__global float *inout)\n\
1129
{\n\
1130
inout[get_global_id(0)] = ldexp(inout[get_global_id(0)], get_global_id(0));\n\
1131
}\n";
1132
auto inout = ShaderArg<float>({ 0.25f, 0.5f, 0.75f, 1.0f }, SHADER_ARG_INOUT);
1133
const float expected[] = {
1134
ldexp(0.25f, 0), ldexp(0.5f, 1), ldexp(0.75f, 2), ldexp(1.0f, 3)
1135
};
1136
run_shader(kernel_source, inout.size(), 1, 1, inout);
1137
for (int i = 0; i < inout.size(); ++i)
1138
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1139
}
1140
1141
TEST_F(ComputeTest, frexp_ret)
1142
{
1143
const char *kernel_source =
1144
"__kernel void main_test(__global float *inout)\n\
1145
{\n\
1146
int exp;\n\
1147
inout[get_global_id(0)] = frexp(inout[get_global_id(0)], &exp);\n\
1148
}\n";
1149
auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1150
const float expected[] = {
1151
0.0f, 0.5f, 0.5f, 0.75f
1152
};
1153
run_shader(kernel_source, inout.size(), 1, 1, inout);
1154
for (int i = 0; i < inout.size(); ++i)
1155
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1156
}
1157
1158
TEST_F(ComputeTest, frexp_exp)
1159
{
1160
const char *kernel_source =
1161
"__kernel void main_test(__global float *inout)\n\
1162
{\n\
1163
int exp;\n\
1164
frexp(inout[get_global_id(0)], &exp);\n\
1165
inout[get_global_id(0)] = (float)exp;\n\
1166
}\n";
1167
auto inout = ShaderArg<float>({ 0.0f, 0.5f, 1.0f, 3.0f }, SHADER_ARG_INOUT);
1168
const float expected[] = {
1169
0.0f, 0.0f, 1.0f, 2.0f
1170
};
1171
run_shader(kernel_source, inout.size(), 1, 1, inout);
1172
for (int i = 0; i < inout.size(); ++i)
1173
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1174
}
1175
1176
TEST_F(ComputeTest, clz)
1177
{
1178
const char *kernel_source =
1179
"__kernel void main_test(__global uint *inout)\n\
1180
{\n\
1181
inout[get_global_id(0)] = clz(inout[get_global_id(0)]);\n\
1182
}\n";
1183
auto inout = ShaderArg<uint32_t>({ 0, 1, 0xffff, (1u << 30), (1u << 31) }, SHADER_ARG_INOUT);
1184
const uint32_t expected[] = {
1185
32, 31, 16, 1, 0
1186
};
1187
run_shader(kernel_source, inout.size(), 1, 1, inout);
1188
for (int i = 0; i < inout.size(); ++i)
1189
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1190
}
1191
1192
TEST_F(ComputeTest, sin)
1193
{
1194
struct sin_vals { float in; float clc; float native; };
1195
const char *kernel_source =
1196
"struct sin_vals { float in; float clc; float native; };\n\
1197
__kernel void main_test(__global struct sin_vals *inout)\n\
1198
{\n\
1199
inout[get_global_id(0)].clc = sin(inout[get_global_id(0)].in);\n\
1200
inout[get_global_id(0)].native = native_sin(inout[get_global_id(0)].in);\n\
1201
}\n";
1202
const vector<sin_vals> input = {
1203
{ 0.0f, 0.0f, 0.0f },
1204
{ 1.0f, 0.0f, 0.0f },
1205
{ 2.0f, 0.0f, 0.0f },
1206
{ 3.0f, 0.0f, 0.0f },
1207
};
1208
auto inout = ShaderArg<sin_vals>(input, SHADER_ARG_INOUT);
1209
const struct sin_vals expected[] = {
1210
{ 0.0f, 0.0f, 0.0f },
1211
{ 1.0f, sin(1.0f), sin(1.0f) },
1212
{ 2.0f, sin(2.0f), sin(2.0f) },
1213
{ 3.0f, sin(3.0f), sin(3.0f) },
1214
};
1215
run_shader(kernel_source, inout.size(), 1, 1, inout);
1216
for (int i = 0; i < inout.size(); ++i) {
1217
EXPECT_FLOAT_EQ(inout[i].in, inout[i].in);
1218
EXPECT_FLOAT_EQ(inout[i].clc, inout[i].clc);
1219
EXPECT_NEAR(inout[i].clc, inout[i].native, 0.008f); // range from DXIL spec
1220
}
1221
}
1222
1223
TEST_F(ComputeTest, DISABLED_cosh)
1224
{
1225
/* Disabled because of WARP failures, where we fetch incorrect results when
1226
* sourcing from non-float ICBs */
1227
const char *kernel_source =
1228
"__kernel void main_test(__global float *inout)\n\
1229
{\n\
1230
inout[get_global_id(0)] = cosh(inout[get_global_id(0)]);\n\
1231
}\n";
1232
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1233
const float expected[] = {
1234
cosh(0.0f), cosh(1.0f), cosh(2.0f), cosh(3.0f)
1235
};
1236
run_shader(kernel_source, inout.size(), 1, 1, inout);
1237
for (int i = 0; i < inout.size(); ++i)
1238
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1239
}
1240
1241
TEST_F(ComputeTest, exp)
1242
{
1243
const char *kernel_source =
1244
"__kernel void main_test(__global float *inout)\n\
1245
{\n\
1246
inout[get_global_id(0)] = native_exp(inout[get_global_id(0)]);\n\
1247
}\n";
1248
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1249
const float expected[] = {
1250
exp(0.0f), exp(1.0f), exp(2.0f), exp(3.0f)
1251
};
1252
run_shader(kernel_source, inout.size(), 1, 1, inout);
1253
for (int i = 0; i < inout.size(); ++i)
1254
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1255
}
1256
1257
TEST_F(ComputeTest, exp10)
1258
{
1259
const char *kernel_source =
1260
"__kernel void main_test(__global float *inout)\n\
1261
{\n\
1262
inout[get_global_id(0)] = native_exp10(inout[get_global_id(0)]);\n\
1263
}\n";
1264
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1265
const float expected[] = {
1266
pow(10.0f, 0.0f), pow(10.0f, 1.0f), pow(10.0f, 2.0f), pow(10.0f, 3.0f)
1267
};
1268
run_shader(kernel_source, inout.size(), 1, 1, inout);
1269
for (int i = 0; i < inout.size(); ++i)
1270
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1271
}
1272
1273
TEST_F(ComputeTest, exp2)
1274
{
1275
const char *kernel_source =
1276
"__kernel void main_test(__global float *inout)\n\
1277
{\n\
1278
inout[get_global_id(0)] = native_exp2(inout[get_global_id(0)]);\n\
1279
}\n";
1280
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1281
const float expected[] = {
1282
pow(2.0f, 0.0f), pow(2.0f, 1.0f), pow(2.0f, 2.0f), pow(2.0f, 3.0f)
1283
};
1284
run_shader(kernel_source, inout.size(), 1, 1, inout);
1285
for (int i = 0; i < inout.size(); ++i)
1286
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1287
}
1288
1289
TEST_F(ComputeTest, log)
1290
{
1291
const char *kernel_source =
1292
"__kernel void main_test(__global float *inout)\n\
1293
{\n\
1294
inout[get_global_id(0)] = native_log(inout[get_global_id(0)]);\n\
1295
}\n";
1296
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1297
const float expected[] = {
1298
log(0.0f), log(1.0f), log(2.0f), log(3.0f)
1299
};
1300
run_shader(kernel_source, inout.size(), 1, 1, inout);
1301
for (int i = 0; i < inout.size(); ++i)
1302
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1303
}
1304
1305
TEST_F(ComputeTest, log10)
1306
{
1307
const char *kernel_source =
1308
"__kernel void main_test(__global float *inout)\n\
1309
{\n\
1310
inout[get_global_id(0)] = native_log10(inout[get_global_id(0)]);\n\
1311
}\n";
1312
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1313
const float expected[] = {
1314
log10(0.0f), log10(1.0f), log10(2.0f), log10(3.0f)
1315
};
1316
run_shader(kernel_source, inout.size(), 1, 1, inout);
1317
for (int i = 0; i < inout.size(); ++i)
1318
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1319
}
1320
1321
TEST_F(ComputeTest, log2)
1322
{
1323
const char *kernel_source =
1324
"__kernel void main_test(__global float *inout)\n\
1325
{\n\
1326
inout[get_global_id(0)] = native_log2(inout[get_global_id(0)]);\n\
1327
}\n";
1328
auto inout = ShaderArg<float>({ 0.0f, 1.0f, 2.0f, 3.0f }, SHADER_ARG_INOUT);
1329
const float expected[] = {
1330
log(0.0f) / log(2), log(1.0f) / log(2), log(2.0f) / log(2), log(3.0f) / log(2)
1331
};
1332
run_shader(kernel_source, inout.size(), 1, 1, inout);
1333
for (int i = 0; i < inout.size(); ++i)
1334
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1335
}
1336
1337
TEST_F(ComputeTest, rint)
1338
{
1339
const char *kernel_source =
1340
"__kernel void main_test(__global float *inout)\n\
1341
{\n\
1342
inout[get_global_id(0)] = rint(inout[get_global_id(0)]);\n\
1343
}\n";
1344
1345
auto inout = ShaderArg<float>({ 0.5f, 1.5f, -0.5f, -1.5f, 1.4f }, SHADER_ARG_INOUT);
1346
const float expected[] = {
1347
0.0f, 2.0f, 0.0f, -2.0f, 1.0f,
1348
};
1349
run_shader(kernel_source, inout.size(), 1, 1, inout);
1350
for (int i = 0; i < inout.size(); ++i)
1351
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1352
}
1353
1354
TEST_F(ComputeTest, round)
1355
{
1356
const char *kernel_source =
1357
"__kernel void main_test(__global float *inout)\n\
1358
{\n\
1359
inout[get_global_id(0)] = round(inout[get_global_id(0)]);\n\
1360
}\n";
1361
auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1362
SHADER_ARG_INOUT);
1363
const float expected[] = {
1364
0.0f, 0.0f, -0.0f, 1.0f, -1.0f, 1.0f, -1.0f
1365
};
1366
run_shader(kernel_source, inout.size(), 1, 1, inout);
1367
for (int i = 0; i < inout.size(); ++i)
1368
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1369
}
1370
1371
TEST_F(ComputeTest, arg_by_val)
1372
{
1373
const char *kernel_source =
1374
"__kernel void main_test(__global float *inout, float mul)\n\
1375
{\n\
1376
inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1377
}\n";
1378
auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1379
SHADER_ARG_INOUT);
1380
auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1381
const float expected[] = {
1382
0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1383
};
1384
run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1385
for (int i = 0; i < inout.size(); ++i)
1386
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1387
}
1388
1389
TEST_F(ComputeTest, uint8_by_val)
1390
{
1391
struct uint8 {
1392
uint32_t s0; uint32_t s1; uint32_t s2; uint32_t s3;
1393
uint32_t s4; uint32_t s5; uint32_t s6; uint32_t s7;
1394
};
1395
const char *kernel_source =
1396
"__kernel void main_test(__global uint *out, uint8 val)\n\
1397
{\n\
1398
out[get_global_id(0)] = val.s0 + val.s1 + val.s2 + val.s3 +\n\
1399
val.s4 + val.s5 + val.s6 + val.s7;\n\
1400
}\n";
1401
auto out = ShaderArg<uint32_t>({ 0 }, SHADER_ARG_OUTPUT);
1402
auto val = ShaderArg<struct uint8>({ {0, 1, 2, 3, 4, 5, 6, 7 }}, SHADER_ARG_INPUT);
1403
const uint32_t expected[] = { 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 };
1404
run_shader(kernel_source, out.size(), 1, 1, out, val);
1405
for (int i = 0; i < out.size(); ++i)
1406
EXPECT_EQ(out[i], expected[i]);
1407
}
1408
1409
TEST_F(ComputeTest, link)
1410
{
1411
const char *foo_src =
1412
"float foo(float in)\n\
1413
{\n\
1414
return in * in;\n\
1415
}\n";
1416
const char *kernel_source =
1417
"float foo(float in);\n\
1418
__kernel void main_test(__global float *inout)\n\
1419
{\n\
1420
inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1421
}\n";
1422
std::vector<const char *> srcs = { foo_src, kernel_source };
1423
auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1424
const float expected[] = {
1425
4.0f,
1426
};
1427
run_shader(srcs, inout.size(), 1, 1, inout);
1428
for (int i = 0; i < inout.size(); ++i)
1429
EXPECT_EQ(inout[i], expected[i]);
1430
}
1431
1432
TEST_F(ComputeTest, link_library)
1433
{
1434
const char *bar_src =
1435
"float bar(float in)\n\
1436
{\n\
1437
return in * 5;\n\
1438
}\n";
1439
const char *foo_src =
1440
"float bar(float in);\n\
1441
float foo(float in)\n\
1442
{\n\
1443
return in * bar(in);\n\
1444
}\n";
1445
const char *kernel_source =
1446
"float foo(float in);\n\
1447
__kernel void main_test(__global float *inout)\n\
1448
{\n\
1449
inout[get_global_id(0)] = foo(inout[get_global_id(0)]);\n\
1450
}\n";
1451
std::vector<Shader> libraries = {
1452
compile({ bar_src, kernel_source }, {}, true),
1453
compile({ foo_src }, {}, true)
1454
};
1455
Shader exe = link(libraries);
1456
auto inout = ShaderArg<float>({ 2.0f }, SHADER_ARG_INOUT);
1457
const float expected[] = {
1458
20.0f,
1459
};
1460
run_shader(exe, { (unsigned)inout.size(), 1, 1 }, inout);
1461
for (int i = 0; i < inout.size(); ++i)
1462
EXPECT_EQ(inout[i], expected[i]);
1463
}
1464
1465
TEST_F(ComputeTest, localvar)
1466
{
1467
const char *kernel_source =
1468
"__kernel __attribute__((reqd_work_group_size(2, 1, 1)))\n\
1469
void main_test(__global float *inout)\n\
1470
{\n\
1471
__local float2 tmp[2];\n\
1472
tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1473
tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1474
barrier(CLK_LOCAL_MEM_FENCE);\n\
1475
inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1476
}\n";
1477
1478
auto inout = ShaderArg<float>({ 2.0f, 4.0f }, SHADER_ARG_INOUT);
1479
const float expected[] = {
1480
9.0f, 5.0f
1481
};
1482
run_shader(kernel_source, inout.size(), 1, 1, inout);
1483
for (int i = 0; i < inout.size(); ++i)
1484
EXPECT_EQ(inout[i], expected[i]);
1485
}
1486
1487
TEST_F(ComputeTest, localvar_uchar2)
1488
{
1489
const char *kernel_source =
1490
"__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1491
__kernel void main_test(__global uchar *inout)\n\
1492
{\n\
1493
__local uchar2 tmp[2];\n\
1494
tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1495
tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1496
barrier(CLK_LOCAL_MEM_FENCE);\n\
1497
inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1498
}\n";
1499
1500
auto inout = ShaderArg<uint8_t>({ 2, 4 }, SHADER_ARG_INOUT);
1501
const uint8_t expected[] = { 9, 5 };
1502
run_shader(kernel_source, inout.size(), 1, 1, inout);
1503
for (int i = 0; i < inout.size(); ++i)
1504
EXPECT_EQ(inout[i], expected[i]);
1505
}
1506
1507
TEST_F(ComputeTest, work_group_size_hint)
1508
{
1509
const char *kernel_source =
1510
"__attribute__((work_group_size_hint(2, 1, 1)))\n\
1511
__kernel void main_test(__global uint *output)\n\
1512
{\n\
1513
output[get_global_id(0)] = get_local_id(0);\n\
1514
}\n";
1515
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1516
SHADER_ARG_OUTPUT);
1517
const uint32_t expected[] = {
1518
0, 1, 2, 3
1519
};
1520
run_shader(kernel_source, output.size(), 1, 1, output);
1521
for (int i = 0; i < output.size(); ++i)
1522
EXPECT_EQ(output[i], expected[i]);
1523
}
1524
1525
TEST_F(ComputeTest, reqd_work_group_size)
1526
{
1527
const char *kernel_source =
1528
"__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1529
__kernel void main_test(__global uint *output)\n\
1530
{\n\
1531
output[get_global_id(0)] = get_local_id(0);\n\
1532
}\n";
1533
auto output = ShaderArg<uint32_t>(std::vector<uint32_t>(4, 0xdeadbeef),
1534
SHADER_ARG_OUTPUT);
1535
const uint32_t expected[] = {
1536
0, 1, 0, 1
1537
};
1538
run_shader(kernel_source, output.size(), 1, 1, output);
1539
for (int i = 0; i < output.size(); ++i)
1540
EXPECT_EQ(output[i], expected[i]);
1541
}
1542
1543
TEST_F(ComputeTest, image)
1544
{
1545
const char* kernel_source =
1546
"__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1547
{\n\
1548
int2 coords = (int2)(get_global_id(0), get_global_id(1));\n\
1549
write_imagef(output, coords, read_imagef(input, coords));\n\
1550
}\n";
1551
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1552
validate(shader);
1553
}
1554
1555
TEST_F(ComputeTest, image_two_reads)
1556
{
1557
const char* kernel_source =
1558
"__kernel void main_test(image2d_t image, int is_float, __global float* output)\n\
1559
{\n\
1560
if (is_float)\n\
1561
output[get_global_id(0)] = read_imagef(image, (int2)(0, 0)).x;\n\
1562
else \n\
1563
output[get_global_id(0)] = (float)read_imagei(image, (int2)(0, 0)).x;\n\
1564
}\n";
1565
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1566
validate(shader);
1567
}
1568
1569
TEST_F(ComputeTest, sampler)
1570
{
1571
const char* kernel_source =
1572
"__kernel void main_test(image2d_t image, sampler_t sampler, __global float* output)\n\
1573
{\n\
1574
output[get_global_id(0)] = read_imagef(image, sampler, (int2)(0, 0)).x;\n\
1575
}\n";
1576
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1577
validate(shader);
1578
}
1579
1580
TEST_F(ComputeTest, image_dims)
1581
{
1582
const char* kernel_source =
1583
"__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1584
{\n\
1585
output[get_global_id(0)] = get_image_width(roimage);\n\
1586
output[get_global_id(0) + 1] = get_image_width(woimage);\n\
1587
}\n";
1588
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1589
validate(shader);
1590
}
1591
1592
TEST_F(ComputeTest, image_format)
1593
{
1594
const char* kernel_source =
1595
"__kernel void main_test(image2d_t roimage, write_only image2d_t woimage, __global uint* output)\n\
1596
{\n\
1597
output[get_global_id(0)] = get_image_channel_data_type(roimage);\n\
1598
output[get_global_id(0) + 1] = get_image_channel_order(woimage);\n\
1599
}\n";
1600
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1601
validate(shader);
1602
}
1603
1604
TEST_F(ComputeTest, image1d_buffer_t)
1605
{
1606
const char* kernel_source =
1607
"__kernel void main_test(read_only image1d_buffer_t input, write_only image1d_buffer_t output)\n\
1608
{\n\
1609
write_imageui(output, get_global_id(0), read_imageui(input, get_global_id(0)));\n\
1610
}\n";
1611
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1612
validate(shader);
1613
}
1614
1615
TEST_F(ComputeTest, local_ptr)
1616
{
1617
struct uint2 { uint32_t x, y; };
1618
const char *kernel_source =
1619
"__kernel void main_test(__global uint *inout, __local uint2 *tmp)\n\
1620
{\n\
1621
tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1622
tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1623
barrier(CLK_LOCAL_MEM_FENCE);\n\
1624
inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y;\n\
1625
}\n";
1626
auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1627
auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(4096), SHADER_ARG_INPUT);
1628
const uint8_t expected[] = { 9, 5 };
1629
run_shader(kernel_source, inout.size(), 1, 1, inout, tmp);
1630
for (int i = 0; i < inout.size(); ++i)
1631
EXPECT_EQ(inout[i], expected[i]);
1632
}
1633
1634
TEST_F(ComputeTest, two_local_ptrs)
1635
{
1636
struct uint2 { uint32_t x, y; };
1637
const char *kernel_source =
1638
"__kernel void main_test(__global uint *inout, __local uint2 *tmp, __local uint *tmp2)\n\
1639
{\n\
1640
tmp[get_local_id(0)].x = inout[get_global_id(0)] + 1;\n\
1641
tmp[get_local_id(0)].y = inout[get_global_id(0)] - 1;\n\
1642
tmp2[get_local_id(0)] = get_global_id(0);\n\
1643
barrier(CLK_LOCAL_MEM_FENCE);\n\
1644
inout[get_global_id(0)] = tmp[get_local_id(0) % 2].x * tmp[(get_local_id(0) + 1) % 2].y + tmp2[get_local_id(0) % 2];\n\
1645
}\n";
1646
auto inout = ShaderArg<uint32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1647
auto tmp = ShaderArg<struct uint2>(std::vector<struct uint2>(1024), SHADER_ARG_INPUT);
1648
auto tmp2 = ShaderArg<uint32_t>(std::vector<uint32_t>(1024), SHADER_ARG_INPUT);
1649
const uint8_t expected[] = { 9, 6 };
1650
run_shader(kernel_source, inout.size(), 1, 1, inout, tmp, tmp2);
1651
for (int i = 0; i < inout.size(); ++i)
1652
EXPECT_EQ(inout[i], expected[i]);
1653
}
1654
1655
TEST_F(ComputeTest, int8_to_float)
1656
{
1657
const char *kernel_source =
1658
"__kernel void main_test(__global char* in, __global float* out)\n\
1659
{\n\
1660
uint pos = get_global_id(0);\n\
1661
out[pos] = in[pos] / 100.0f;\n\
1662
}";
1663
auto in = ShaderArg<char>({ 10, 20, 30, 40 }, SHADER_ARG_INPUT);
1664
auto out = ShaderArg<float>(std::vector<float>(4, std::numeric_limits<float>::infinity()), SHADER_ARG_OUTPUT);
1665
const float expected[] = { 0.1f, 0.2f, 0.3f, 0.4f };
1666
run_shader(kernel_source, in.size(), 1, 1, in, out);
1667
for (int i = 0; i < in.size(); ++i)
1668
EXPECT_FLOAT_EQ(out[i], expected[i]);
1669
}
1670
1671
TEST_F(ComputeTest, vec_hint_float4)
1672
{
1673
const char *kernel_source =
1674
"__kernel __attribute__((vec_type_hint(float4))) void main_test(__global float *inout)\n\
1675
{\n\
1676
inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1677
}";
1678
Shader shader = compile({ kernel_source });
1679
EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 4);
1680
EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_FLOAT);
1681
}
1682
1683
TEST_F(ComputeTest, vec_hint_uchar2)
1684
{
1685
const char *kernel_source =
1686
"__kernel __attribute__((vec_type_hint(uchar2))) void main_test(__global float *inout)\n\
1687
{\n\
1688
inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1689
}";
1690
Shader shader = compile({ kernel_source });
1691
EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 2);
1692
EXPECT_EQ(shader.obj->kernels[0].vec_hint_type, CLC_VEC_HINT_TYPE_CHAR);
1693
}
1694
1695
TEST_F(ComputeTest, vec_hint_none)
1696
{
1697
const char *kernel_source =
1698
"__kernel void main_test(__global float *inout)\n\
1699
{\n\
1700
inout[get_global_id(0)] *= inout[get_global_id(1)];\n\
1701
}";
1702
Shader shader = compile({ kernel_source });
1703
EXPECT_EQ(shader.obj->kernels[0].vec_hint_size, 0);
1704
}
1705
1706
TEST_F(ComputeTest, DISABLED_debug_layer_failure)
1707
{
1708
const char *kernel_source =
1709
"__kernel void main_test(__global float *inout, float mul)\n\
1710
{\n\
1711
inout[get_global_id(0)] = inout[get_global_id(0)] * mul;\n\
1712
}\n";
1713
auto inout = ShaderArg<float>({ 0, 0.3f, -0.3f, 0.5f, -0.5f, 1.1f, -1.1f },
1714
SHADER_ARG_INOUT);
1715
auto mul = ShaderArg<float>(10.0f, SHADER_ARG_INPUT);
1716
const float expected[] = {
1717
0.0f, 3.0f, -3.0f, 5.0f, -5.0f, 11.0f, -11.0f
1718
};
1719
ComPtr<ID3D12InfoQueue> info_queue;
1720
dev->QueryInterface(info_queue.ReleaseAndGetAddressOf());
1721
if (!info_queue) {
1722
GTEST_SKIP() << "No info queue";
1723
return;
1724
}
1725
1726
info_queue->AddApplicationMessage(D3D12_MESSAGE_SEVERITY_ERROR, "This should cause the test to fail");
1727
run_shader(kernel_source, inout.size(), 1, 1, inout, mul);
1728
for (int i = 0; i < inout.size(); ++i)
1729
EXPECT_FLOAT_EQ(inout[i], expected[i]);
1730
}
1731
1732
TEST_F(ComputeTest, compiler_defines)
1733
{
1734
const char *kernel_source =
1735
"__kernel void main_test(__global int* out)\n\
1736
{\n\
1737
out[0] = OUT_VAL0;\n\
1738
out[1] = __OPENCL_C_VERSION__;\n\
1739
}";
1740
auto out = ShaderArg<int>(std::vector<int>(2, 0), SHADER_ARG_OUTPUT);
1741
CompileArgs compile_args = { 1, 1, 1 };
1742
compile_args.compiler_command_line = { "-DOUT_VAL0=5", "-cl-std=cl" };
1743
std::vector<RawShaderArg *> raw_args = { &out };
1744
run_shader({ kernel_source }, compile_args, out);
1745
EXPECT_EQ(out[0], 5);
1746
EXPECT_EQ(out[1], 100);
1747
}
1748
1749
/* There's a bug in WARP turning atomic_add(ptr, x) into
1750
* atomic_add(ptr, x * 4). Works fine on intel HW.
1751
*/
1752
TEST_F(ComputeTest, DISABLED_global_atomic_add)
1753
{
1754
const char *kernel_source =
1755
"__kernel void main_test(__global int *inout, __global int *old)\n\
1756
{\n\
1757
old[get_global_id(0)] = atomic_add(inout + get_global_id(0), 3);\n\
1758
}\n";
1759
auto inout = ShaderArg<int32_t>({ 2, 4 }, SHADER_ARG_INOUT);
1760
auto old = ShaderArg<int32_t>(std::vector<int32_t>(2, 0xdeadbeef), SHADER_ARG_OUTPUT);
1761
const int32_t expected_inout[] = { 5, 7 };
1762
const int32_t expected_old[] = { 2, 4 };
1763
run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1764
for (int i = 0; i < inout.size(); ++i) {
1765
EXPECT_EQ(inout[i], expected_inout[i]);
1766
EXPECT_EQ(old[i], expected_old[i]);
1767
}
1768
}
1769
1770
TEST_F(ComputeTest, global_atomic_imin)
1771
{
1772
const char *kernel_source =
1773
"__kernel void main_test(__global int *inout, __global int *old)\n\
1774
{\n\
1775
old[get_global_id(0)] = atomic_min(inout + get_global_id(0), 1);\n\
1776
}\n";
1777
auto inout = ShaderArg<int32_t>({ 0, 2, -1 }, SHADER_ARG_INOUT);
1778
auto old = ShaderArg<int32_t>(std::vector<int32_t>(3, 0xdeadbeef), SHADER_ARG_OUTPUT);
1779
const int32_t expected_inout[] = { 0, 1, -1 };
1780
const int32_t expected_old[] = { 0, 2, -1 };
1781
run_shader(kernel_source, inout.size(), 1, 1, inout, old);
1782
for (int i = 0; i < inout.size(); ++i) {
1783
EXPECT_EQ(inout[i], expected_inout[i]);
1784
EXPECT_EQ(old[i], expected_old[i]);
1785
}
1786
}
1787
1788
TEST_F(ComputeTest, global_atomic_and_or)
1789
{
1790
const char *kernel_source =
1791
"__attribute__((reqd_work_group_size(3, 1, 1)))\n\
1792
__kernel void main_test(__global int *inout)\n\
1793
{\n\
1794
atomic_and(inout, ~(1 << get_global_id(0)));\n\
1795
atomic_or(inout, (1 << (get_global_id(0) + 4)));\n\
1796
}\n";
1797
auto inout = ShaderArg<int32_t>(0xf, SHADER_ARG_INOUT);
1798
const int32_t expected[] = { 0x78 };
1799
run_shader(kernel_source, 3, 1, 1, inout);
1800
for (int i = 0; i < inout.size(); ++i)
1801
EXPECT_EQ(inout[i], expected[i]);
1802
}
1803
1804
TEST_F(ComputeTest, global_atomic_cmpxchg)
1805
{
1806
const char *kernel_source =
1807
"__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1808
__kernel void main_test(__global int *inout)\n\
1809
{\n\
1810
while (atomic_cmpxchg(inout, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1811
;\n\
1812
}\n";
1813
auto inout = ShaderArg<int32_t>(0, SHADER_ARG_INOUT);
1814
const int32_t expected_inout[] = { 2 };
1815
run_shader(kernel_source, 2, 1, 1, inout);
1816
for (int i = 0; i < inout.size(); ++i)
1817
EXPECT_EQ(inout[i], expected_inout[i]);
1818
}
1819
1820
TEST_F(ComputeTest, local_atomic_and_or)
1821
{
1822
const char *kernel_source =
1823
"__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1824
__kernel void main_test(__global ushort *inout)\n\
1825
{\n\
1826
__local ushort tmp;\n\
1827
atomic_and(&tmp, ~(0xff << (get_global_id(0) * 8)));\n\
1828
atomic_or(&tmp, inout[get_global_id(0)] << (get_global_id(0) * 8));\n\
1829
barrier(CLK_LOCAL_MEM_FENCE);\n\
1830
inout[get_global_id(0)] = tmp;\n\
1831
}\n";
1832
auto inout = ShaderArg<uint16_t>({ 2, 4 }, SHADER_ARG_INOUT);
1833
const uint16_t expected[] = { 0x402, 0x402 };
1834
run_shader(kernel_source, inout.size(), 1, 1, inout);
1835
for (int i = 0; i < inout.size(); ++i)
1836
EXPECT_EQ(inout[i], expected[i]);
1837
}
1838
1839
TEST_F(ComputeTest, local_atomic_cmpxchg)
1840
{
1841
const char *kernel_source =
1842
"__attribute__((reqd_work_group_size(2, 1, 1)))\n\
1843
__kernel void main_test(__global int *out)\n\
1844
{\n\
1845
__local uint tmp;\n\
1846
tmp = 0;\n\
1847
barrier(CLK_LOCAL_MEM_FENCE);\n\
1848
while (atomic_cmpxchg(&tmp, get_global_id(0), get_global_id(0) + 1) != get_global_id(0))\n\
1849
;\n\
1850
barrier(CLK_LOCAL_MEM_FENCE);\n\
1851
out[0] = tmp;\n\
1852
}\n";
1853
1854
auto out = ShaderArg<uint32_t>(0xdeadbeef, SHADER_ARG_OUTPUT);
1855
const uint16_t expected[] = { 2 };
1856
run_shader(kernel_source, 2, 1, 1, out);
1857
for (int i = 0; i < out.size(); ++i)
1858
EXPECT_EQ(out[i], expected[i]);
1859
}
1860
1861
TEST_F(ComputeTest, constant_sampler)
1862
{
1863
const char* kernel_source =
1864
"__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_LINEAR;\n\
1865
__kernel void main_test(read_only image2d_t input, write_only image2d_t output)\n\
1866
{\n\
1867
int2 coordsi = (int2)(get_global_id(0), get_global_id(1));\n\
1868
float2 coordsf = (float2)((float)coordsi.x / get_image_width(input), (float)coordsi.y / get_image_height(input));\n\
1869
write_imagef(output, coordsi, \n\
1870
read_imagef(input, sampler, coordsf) + \n\
1871
read_imagef(input, sampler, coordsf + (float2)(0.1, 0.1)));\n\
1872
}\n";
1873
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1874
validate(shader);
1875
EXPECT_EQ(shader.dxil->metadata.num_const_samplers, 1);
1876
}
1877
1878
TEST_F(ComputeTest, hi)
1879
{
1880
const char *kernel_source = R"(
1881
__kernel void main_test(__global char3 *srcA, __global char2 *dst)
1882
{
1883
int tid = get_global_id(0);
1884
1885
char2 tmp = srcA[tid].hi;
1886
dst[tid] = tmp;
1887
})";
1888
Shader shader = compile(std::vector<const char*>({ kernel_source }));
1889
validate(shader);
1890
}
1891
1892
TEST_F(ComputeTest, system_values)
1893
{
1894
const char *kernel_source =
1895
"__kernel void main_test(__global uint* outputs)\n\
1896
{\n\
1897
outputs[0] = get_work_dim();\n\
1898
outputs[1] = get_global_size(0);\n\
1899
outputs[2] = get_local_size(0);\n\
1900
outputs[3] = get_num_groups(0);\n\
1901
outputs[4] = get_group_id(0);\n\
1902
outputs[5] = get_global_offset(0);\n\
1903
outputs[6] = get_global_id(0);\n\
1904
}\n";
1905
auto out = ShaderArg<uint32_t>(std::vector<uint32_t>(6, 0xdeadbeef), SHADER_ARG_OUTPUT);
1906
const uint16_t expected[] = { 3, 1, 1, 1, 0, 0, 0, };
1907
CompileArgs args = { 1, 1, 1 };
1908
Shader shader = compile({ kernel_source });
1909
run_shader(shader, args, out);
1910
for (int i = 0; i < out.size(); ++i)
1911
EXPECT_EQ(out[i], expected[i]);
1912
1913
args.work_props.work_dim = 2;
1914
args.work_props.global_offset_x = 100;
1915
args.work_props.group_id_offset_x = 2;
1916
args.work_props.group_count_total_x = 5;
1917
const uint32_t expected_withoffsets[] = { 2, 5, 1, 5, 2, 100, 102 };
1918
run_shader(shader, args, out);
1919
for (int i = 0; i < out.size(); ++i)
1920
EXPECT_EQ(out[i], expected_withoffsets[i]);
1921
}
1922
1923
TEST_F(ComputeTest, convert_round_sat)
1924
{
1925
const char *kernel_source =
1926
"__kernel void main_test(__global float *f, __global uchar *u)\n\
1927
{\n\
1928
uint idx = get_global_id(0);\n\
1929
u[idx] = convert_uchar_sat_rtp(f[idx]);\n\
1930
}\n";
1931
auto f = ShaderArg<float>({ -1.0f, 1.1f, 20.0f, 255.5f }, SHADER_ARG_INPUT);
1932
auto u = ShaderArg<uint8_t>({ 255, 0, 0, 0 }, SHADER_ARG_OUTPUT);
1933
const uint8_t expected[] = {
1934
0, 2, 20, 255
1935
};
1936
1937
run_shader(kernel_source, f.size(), 1, 1, f, u);
1938
for (int i = 0; i < u.size(); ++i)
1939
EXPECT_EQ(u[i], expected[i]);
1940
}
1941
1942
TEST_F(ComputeTest, convert_round_sat_vec)
1943
{
1944
const char *kernel_source =
1945
"__kernel void main_test(__global float16 *f, __global uchar16 *u)\n\
1946
{\n\
1947
uint idx = get_global_id(0);\n\
1948
u[idx] = convert_uchar16_sat_rtp(f[idx]);\n\
1949
}\n";
1950
auto f = ShaderArg<float>({
1951
-1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1952
-0.5f, 1.9f, 20.0f, 254.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1953
0.0f, 1.3f, 20.0f, 255.1f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1954
-0.0f, 1.5555f, 20.0f, 254.9f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f, -1.0f, 1.1f, 20.0f, 255.5f,
1955
}, SHADER_ARG_INPUT);
1956
auto u = ShaderArg<uint8_t>({
1957
255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1958
255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1959
255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1960
255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0,
1961
}, SHADER_ARG_OUTPUT);
1962
const uint8_t expected[] = {
1963
0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1964
0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1965
0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1966
0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255, 0, 2, 20, 255,
1967
};
1968
1969
run_shader(kernel_source, 4, 1, 1, f, u);
1970
for (int i = 0; i < u.size(); ++i)
1971
EXPECT_EQ(u[i], expected[i]);
1972
}
1973
1974
TEST_F(ComputeTest, convert_char2_uchar2)
1975
{
1976
const char *kernel_source =
1977
"__kernel void main_test( __global char2 *src, __global uchar2 *dest )\n\
1978
{\n\
1979
size_t i = get_global_id(0);\n\
1980
dest[i] = convert_uchar2_sat( src[i] );\n\
1981
}\n";
1982
1983
auto c = ShaderArg<int8_t>({ -127, -4, 0, 4, 126, 127, 16, 32 }, SHADER_ARG_INPUT);
1984
auto u = ShaderArg<uint8_t>({ 99, 99, 99, 99, 99, 99, 99, 99 }, SHADER_ARG_OUTPUT);
1985
const uint8_t expected[] = { 0, 0, 0, 4, 126, 127, 16, 32 };
1986
run_shader(kernel_source, 4, 1, 1, c, u);
1987
for (int i = 0; i < u.size(); i++)
1988
EXPECT_EQ(u[i], expected[i]);
1989
}
1990
1991
TEST_F(ComputeTest, async_copy)
1992
{
1993
const char *kernel_source = R"(
1994
__kernel void main_test( const __global char *src, __global char *dst, __local char *localBuffer, int copiesPerWorkgroup, int copiesPerWorkItem )
1995
{
1996
int i;
1997
for(i=0; i<copiesPerWorkItem; i++)
1998
localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ] = (char)(char)0;
1999
barrier( CLK_LOCAL_MEM_FENCE );
2000
event_t event;
2001
event = async_work_group_copy( (__local char*)localBuffer, (__global const char*)(src+copiesPerWorkgroup*get_group_id(0)), (size_t)copiesPerWorkgroup, 0 );
2002
wait_group_events( 1, &event );
2003
for(i=0; i<copiesPerWorkItem; i++)
2004
dst[ get_global_id( 0 )*copiesPerWorkItem+i ] = localBuffer[ get_local_id( 0 )*copiesPerWorkItem+i ];
2005
})";
2006
Shader shader = compile({ kernel_source });
2007
validate(shader);
2008
}
2009
2010
TEST_F(ComputeTest, packed_struct_global)
2011
{
2012
#pragma pack(push, 1)
2013
struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2014
#pragma pack(pop)
2015
2016
const char *kernel_source =
2017
"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2018
__kernel void main_test(__global struct s *inout, global uint *size)\n\
2019
{\n\
2020
uint idx = get_global_id(0);\n\
2021
inout[idx].uc = idx + 1;\n\
2022
inout[idx].ul = ((ulong)(idx + 1 + 0xfbfcfdfe) << 32) | 0x12345678;\n\
2023
inout[idx].us = ((ulong)(idx + 1 + 0xa0) << 8) | 0x12;\n\
2024
*size = sizeof(struct s);\n\
2025
}\n";
2026
auto inout = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2027
auto size = ShaderArg<uint32_t>(0, SHADER_ARG_OUTPUT);
2028
const struct s expected[] = {
2029
{ 1, 0xfbfcfdff12345678, 0xa112 }
2030
};
2031
2032
run_shader(kernel_source, inout.size(), 1, 1, inout, size);
2033
for (int i = 0; i < inout.size(); ++i) {
2034
EXPECT_EQ(inout[i].uc, expected[i].uc);
2035
EXPECT_EQ(inout[i].ul, expected[i].ul);
2036
EXPECT_EQ(inout[i].us, expected[i].us);
2037
}
2038
EXPECT_EQ(size, sizeof(struct s));
2039
}
2040
2041
TEST_F(ComputeTest, packed_struct_arg)
2042
{
2043
#pragma pack(push, 1)
2044
struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2045
#pragma pack(pop)
2046
2047
const char *kernel_source =
2048
"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2049
__kernel void main_test(__global struct s *out, struct s in)\n\
2050
{\n\
2051
uint idx = get_global_id(0);\n\
2052
out[idx].uc = in.uc + 0x12;\n\
2053
out[idx].ul = in.ul + 0x123456789abcdef;\n\
2054
out[idx].us = in.us + 0x1234;\n\
2055
}\n";
2056
auto out = ShaderArg<struct s>({0, 0, 0}, SHADER_ARG_OUTPUT);
2057
auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2058
const struct s expected[] = {
2059
{ 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 }
2060
};
2061
2062
run_shader(kernel_source, out.size(), 1, 1, out, in);
2063
for (int i = 0; i < out.size(); ++i) {
2064
EXPECT_EQ(out[i].uc, expected[i].uc);
2065
EXPECT_EQ(out[i].ul, expected[i].ul);
2066
EXPECT_EQ(out[i].us, expected[i].us);
2067
}
2068
}
2069
2070
TEST_F(ComputeTest, packed_struct_local)
2071
{
2072
#pragma pack(push, 1)
2073
struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2074
#pragma pack(pop)
2075
2076
const char *kernel_source =
2077
"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2078
__kernel void main_test(__global struct s *out, __constant struct s *in)\n\
2079
{\n\
2080
uint idx = get_global_id(0);\n\
2081
__local struct s tmp[2];\n\
2082
tmp[get_local_id(0)] = in[idx];\n\
2083
barrier(CLK_LOCAL_MEM_FENCE);\n\
2084
out[idx] = tmp[(get_local_id(0) + 1) % 2];\n\
2085
}\n";
2086
auto out = ShaderArg<struct s>({{0, 0, 0}, {0, 0, 0}}, SHADER_ARG_OUTPUT);
2087
auto in = ShaderArg<struct s>({{1, 2, 3}, {0x12, 0x123456789abcdef, 0x1234} }, SHADER_ARG_INPUT);
2088
const struct s expected[] = {
2089
{ 0x12, 0x123456789abcdef, 0x1234 },
2090
{ 1, 2, 3 },
2091
};
2092
2093
run_shader(kernel_source, out.size(), 1, 1, out, in);
2094
for (int i = 0; i < out.size(); ++i) {
2095
EXPECT_EQ(out[i].uc, expected[i].uc);
2096
EXPECT_EQ(out[i].ul, expected[i].ul);
2097
EXPECT_EQ(out[i].us, expected[i].us);
2098
}
2099
}
2100
2101
/* DISABLED because current release versions of WARP either return
2102
* rubbish from reads or crash: they are not prepared to handle
2103
* non-float global constants */
2104
TEST_F(ComputeTest, DISABLED_packed_struct_const)
2105
{
2106
#pragma pack(push, 1)
2107
struct s { uint8_t uc; uint64_t ul; uint16_t us; };
2108
#pragma pack(pop)
2109
2110
const char *kernel_source =
2111
"struct __attribute__((packed)) s {uchar uc; ulong ul; ushort us; };\n\
2112
__kernel void main_test(__global struct s *out, struct s in)\n\
2113
{\n\
2114
__constant struct s base[] = {\n\
2115
{0x12, 0x123456789abcdef, 0x1234},\n\
2116
{0x11, 0x123456789abcdee, 0x1233},\n\
2117
};\n\
2118
uint idx = get_global_id(0);\n\
2119
out[idx].uc = base[idx % 2].uc + in.uc;\n\
2120
out[idx].ul = base[idx % 2].ul + in.ul;\n\
2121
out[idx].us = base[idx % 2].us + in.us;\n\
2122
}\n";
2123
auto out = ShaderArg<struct s>(std::vector<struct s>(2, {0, 0, 0}), SHADER_ARG_OUTPUT);
2124
auto in = ShaderArg<struct s>({1, 2, 3}, SHADER_ARG_INPUT);
2125
const struct s expected[] = {
2126
{ 0x12 + 1, 0x123456789abcdef + 2, 0x1234 + 3 },
2127
{ 0x11 + 1, 0x123456789abcdee + 2, 0x1233 + 3 },
2128
};
2129
2130
run_shader(kernel_source, out.size(), 1, 1, out, in);
2131
for (int i = 0; i < out.size(); ++i) {
2132
EXPECT_EQ(out[i].uc, expected[i].uc);
2133
EXPECT_EQ(out[i].ul, expected[i].ul);
2134
EXPECT_EQ(out[i].us, expected[i].us);
2135
}
2136
}
2137
2138
TEST_F(ComputeTest, DISABLED_printf)
2139
{
2140
const char *kernel_source = R"(
2141
__kernel void main_test(__global float *src, __global uint *dest)
2142
{
2143
__constant char *format_str = "%s: %f";
2144
__constant char *str_val = "Test";
2145
*dest = printf(format_str, str_val, src[0]);
2146
})";
2147
2148
auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2149
auto dest = ShaderArg<uint32_t>({ 0xdeadbeef }, SHADER_ARG_OUTPUT);
2150
run_shader(kernel_source, 1, 1, 1, src, dest);
2151
EXPECT_EQ(dest[0], 0);
2152
}
2153
2154
TEST_F(ComputeTest, vload_half)
2155
{
2156
const char *kernel_source = R"(
2157
__kernel void main_test(__global half *src, __global float4 *dest)
2158
{
2159
int offset = get_global_id(0);
2160
dest[offset] = vload_half4(offset, src);
2161
})";
2162
auto src = ShaderArg<uint16_t>({ 0x3c00, 0x4000, 0x4200, 0x4400,
2163
0x4500, 0x4600, 0x4700, 0x4800 }, SHADER_ARG_INPUT);
2164
auto dest = ShaderArg<float>({ FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX,
2165
FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX }, SHADER_ARG_OUTPUT);
2166
run_shader(kernel_source, 2, 1, 1, src, dest);
2167
for (unsigned i = 0; i < 8; ++i)
2168
EXPECT_FLOAT_EQ(dest[i], (float)(i + 1));
2169
}
2170
2171
TEST_F(ComputeTest, vstore_half)
2172
{
2173
const char *kernel_source = R"(
2174
__kernel void main_test(__global half *dst, __global float4 *src)
2175
{
2176
int offset = get_global_id(0);
2177
vstore_half4(src[offset], offset, dst);
2178
})";
2179
auto dest = ShaderArg<uint16_t>({0xdead, 0xdead, 0xdead, 0xdead,
2180
0xdead, 0xdead, 0xdead, 0xdead}, SHADER_ARG_OUTPUT);
2181
auto src = ShaderArg<float>({ 1.0, 2.0, 3.0, 4.0,
2182
5.0, 6.0, 7.0, 8.0 }, SHADER_ARG_INPUT);
2183
run_shader(kernel_source, 2, 1, 1, dest, src);
2184
const uint16_t expected[] = { 0x3c00, 0x4000, 0x4200, 0x4400,
2185
0x4500, 0x4600, 0x4700, 0x4800 };
2186
for (unsigned i = 0; i < 8; ++i)
2187
EXPECT_EQ(dest[i], expected[i]);
2188
}
2189
2190
TEST_F(ComputeTest, inline_function)
2191
{
2192
const char *kernel_source = R"(
2193
inline float helper(float foo)
2194
{
2195
return foo * 2;
2196
}
2197
2198
__kernel void main_test(__global float *dst, __global float *src)
2199
{
2200
*dst = helper(*src);
2201
})";
2202
auto dest = ShaderArg<float>({ NAN }, SHADER_ARG_OUTPUT);
2203
auto src = ShaderArg<float>({ 1.0f }, SHADER_ARG_INPUT);
2204
run_shader(kernel_source, 1, 1, 1, dest, src);
2205
EXPECT_EQ(dest[0], 2.0f);
2206
}
2207
2208
TEST_F(ComputeTest, unused_arg)
2209
{
2210
const char *kernel_source = R"(
2211
__kernel void main_test(__global int *dst, __global int *unused, __global int *src)
2212
{
2213
int i = get_global_id(0);
2214
dst[i] = src[i];
2215
})";
2216
auto dest = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_OUTPUT);
2217
auto src = ShaderArg<int>({ 1, 2, 3, 4 }, SHADER_ARG_INPUT);
2218
auto unused = ShaderArg<int>({ -1, -1, -1, -1 }, SHADER_ARG_INPUT);
2219
run_shader(kernel_source, 4, 1, 1, dest, unused, src);
2220
for (int i = 0; i < 4; ++i)
2221
EXPECT_EQ(dest[i], i + 1);
2222
}
2223
2224