Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/compiler/spirv/vtn_opencl.c
4545 views
1
/*
2
* Copyright © 2018 Red Hat
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark ([email protected])
25
*/
26
27
#include "math.h"
28
#include "nir/nir_builtin_builder.h"
29
30
#include "util/u_printf.h"
31
#include "vtn_private.h"
32
#include "OpenCL.std.h"
33
34
typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,
35
uint32_t opcode,
36
unsigned num_srcs, nir_ssa_def **srcs,
37
struct vtn_type **src_types,
38
const struct vtn_type *dest_type);
39
40
static int to_llvm_address_space(SpvStorageClass mode)
41
{
42
switch (mode) {
43
case SpvStorageClassPrivate:
44
case SpvStorageClassFunction: return 0;
45
case SpvStorageClassCrossWorkgroup: return 1;
46
case SpvStorageClassUniform:
47
case SpvStorageClassUniformConstant: return 2;
48
case SpvStorageClassWorkgroup: return 3;
49
default: return -1;
50
}
51
}
52
53
54
static void
55
vtn_opencl_mangle(const char *in_name,
56
uint32_t const_mask,
57
int ntypes, struct vtn_type **src_types,
58
char **outstring)
59
{
60
char local_name[256] = "";
61
char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);
62
63
for (unsigned i = 0; i < ntypes; ++i) {
64
const struct glsl_type *type = src_types[i]->type;
65
enum vtn_base_type base_type = src_types[i]->base_type;
66
if (src_types[i]->base_type == vtn_base_type_pointer) {
67
*(args_str++) = 'P';
68
int address_space = to_llvm_address_space(src_types[i]->storage_class);
69
if (address_space > 0)
70
args_str += sprintf(args_str, "U3AS%d", address_space);
71
72
type = src_types[i]->deref->type;
73
base_type = src_types[i]->deref->base_type;
74
}
75
76
if (const_mask & (1 << i))
77
*(args_str++) = 'K';
78
79
unsigned num_elements = glsl_get_components(type);
80
if (num_elements > 1) {
81
/* Vectors are not treated as built-ins for mangling, so check for substitution.
82
* In theory, we'd need to know which substitution value this is. In practice,
83
* the functions we need from libclc only support 1
84
*/
85
bool substitution = false;
86
for (unsigned j = 0; j < i; ++j) {
87
const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?
88
src_types[j]->deref->type : src_types[j]->type;
89
if (type == other_type) {
90
substitution = true;
91
break;
92
}
93
}
94
95
if (substitution) {
96
args_str += sprintf(args_str, "S_");
97
continue;
98
} else
99
args_str += sprintf(args_str, "Dv%d_", num_elements);
100
}
101
102
const char *suffix = NULL;
103
switch (base_type) {
104
case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;
105
case vtn_base_type_event: suffix = "9ocl_event"; break;
106
default: {
107
const char *primitives[] = {
108
[GLSL_TYPE_UINT] = "j",
109
[GLSL_TYPE_INT] = "i",
110
[GLSL_TYPE_FLOAT] = "f",
111
[GLSL_TYPE_FLOAT16] = "Dh",
112
[GLSL_TYPE_DOUBLE] = "d",
113
[GLSL_TYPE_UINT8] = "h",
114
[GLSL_TYPE_INT8] = "c",
115
[GLSL_TYPE_UINT16] = "t",
116
[GLSL_TYPE_INT16] = "s",
117
[GLSL_TYPE_UINT64] = "m",
118
[GLSL_TYPE_INT64] = "l",
119
[GLSL_TYPE_BOOL] = "b",
120
[GLSL_TYPE_ERROR] = NULL,
121
};
122
enum glsl_base_type glsl_base_type = glsl_get_base_type(type);
123
assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);
124
suffix = primitives[glsl_base_type];
125
break;
126
}
127
}
128
args_str += sprintf(args_str, "%s", suffix);
129
}
130
131
*outstring = strdup(local_name);
132
}
133
134
static nir_function *mangle_and_find(struct vtn_builder *b,
135
const char *name,
136
uint32_t const_mask,
137
uint32_t num_srcs,
138
struct vtn_type **src_types)
139
{
140
char *mname;
141
nir_function *found = NULL;
142
143
vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);
144
/* try and find in current shader first. */
145
nir_foreach_function(funcs, b->shader) {
146
if (!strcmp(funcs->name, mname)) {
147
found = funcs;
148
break;
149
}
150
}
151
/* if not found here find in clc shader and create a decl mirroring it */
152
if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {
153
nir_foreach_function(funcs, b->options->clc_shader) {
154
if (!strcmp(funcs->name, mname)) {
155
found = funcs;
156
break;
157
}
158
}
159
if (found) {
160
nir_function *decl = nir_function_create(b->shader, mname);
161
decl->num_params = found->num_params;
162
decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);
163
for (unsigned i = 0; i < decl->num_params; i++) {
164
decl->params[i] = found->params[i];
165
}
166
found = decl;
167
}
168
}
169
if (!found)
170
vtn_fail("Can't find clc function %s\n", mname);
171
free(mname);
172
return found;
173
}
174
175
static bool call_mangled_function(struct vtn_builder *b,
176
const char *name,
177
uint32_t const_mask,
178
uint32_t num_srcs,
179
struct vtn_type **src_types,
180
const struct vtn_type *dest_type,
181
nir_ssa_def **srcs,
182
nir_deref_instr **ret_deref_ptr)
183
{
184
nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);
185
if (!found)
186
return false;
187
188
nir_call_instr *call = nir_call_instr_create(b->shader, found);
189
190
nir_deref_instr *ret_deref = NULL;
191
uint32_t param_idx = 0;
192
if (dest_type) {
193
nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,
194
glsl_get_bare_type(dest_type->type),
195
"return_tmp");
196
ret_deref = nir_build_deref_var(&b->nb, ret_tmp);
197
call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);
198
}
199
200
for (unsigned i = 0; i < num_srcs; i++)
201
call->params[param_idx++] = nir_src_for_ssa(srcs[i]);
202
nir_builder_instr_insert(&b->nb, &call->instr);
203
204
*ret_deref_ptr = ret_deref;
205
return true;
206
}
207
208
static void
209
handle_instr(struct vtn_builder *b, uint32_t opcode,
210
const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)
211
{
212
struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;
213
214
nir_ssa_def *srcs[5] = { NULL };
215
struct vtn_type *src_types[5] = { NULL };
216
vtn_assert(num_srcs <= ARRAY_SIZE(srcs));
217
for (unsigned i = 0; i < num_srcs; i++) {
218
struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
219
struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);
220
srcs[i] = ssa->def;
221
src_types[i] = val->type;
222
}
223
224
nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);
225
if (result) {
226
vtn_push_nir_ssa(b, w_dest[1], result);
227
} else {
228
vtn_assert(dest_type == NULL);
229
}
230
}
231
232
static nir_op
233
nir_alu_op_for_opencl_opcode(struct vtn_builder *b,
234
enum OpenCLstd_Entrypoints opcode)
235
{
236
switch (opcode) {
237
case OpenCLstd_Fabs: return nir_op_fabs;
238
case OpenCLstd_SAbs: return nir_op_iabs;
239
case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;
240
case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;
241
case OpenCLstd_Ceil: return nir_op_fceil;
242
case OpenCLstd_Floor: return nir_op_ffloor;
243
case OpenCLstd_SHadd: return nir_op_ihadd;
244
case OpenCLstd_UHadd: return nir_op_uhadd;
245
case OpenCLstd_Fmax: return nir_op_fmax;
246
case OpenCLstd_SMax: return nir_op_imax;
247
case OpenCLstd_UMax: return nir_op_umax;
248
case OpenCLstd_Fmin: return nir_op_fmin;
249
case OpenCLstd_SMin: return nir_op_imin;
250
case OpenCLstd_UMin: return nir_op_umin;
251
case OpenCLstd_Mix: return nir_op_flrp;
252
case OpenCLstd_Native_cos: return nir_op_fcos;
253
case OpenCLstd_Native_divide: return nir_op_fdiv;
254
case OpenCLstd_Native_exp2: return nir_op_fexp2;
255
case OpenCLstd_Native_log2: return nir_op_flog2;
256
case OpenCLstd_Native_powr: return nir_op_fpow;
257
case OpenCLstd_Native_recip: return nir_op_frcp;
258
case OpenCLstd_Native_rsqrt: return nir_op_frsq;
259
case OpenCLstd_Native_sin: return nir_op_fsin;
260
case OpenCLstd_Native_sqrt: return nir_op_fsqrt;
261
case OpenCLstd_SMul_hi: return nir_op_imul_high;
262
case OpenCLstd_UMul_hi: return nir_op_umul_high;
263
case OpenCLstd_Popcount: return nir_op_bit_count;
264
case OpenCLstd_SRhadd: return nir_op_irhadd;
265
case OpenCLstd_URhadd: return nir_op_urhadd;
266
case OpenCLstd_Rsqrt: return nir_op_frsq;
267
case OpenCLstd_Sign: return nir_op_fsign;
268
case OpenCLstd_Sqrt: return nir_op_fsqrt;
269
case OpenCLstd_SSub_sat: return nir_op_isub_sat;
270
case OpenCLstd_USub_sat: return nir_op_usub_sat;
271
case OpenCLstd_Trunc: return nir_op_ftrunc;
272
case OpenCLstd_Rint: return nir_op_fround_even;
273
case OpenCLstd_Half_divide: return nir_op_fdiv;
274
case OpenCLstd_Half_recip: return nir_op_frcp;
275
/* uhm... */
276
case OpenCLstd_UAbs: return nir_op_mov;
277
default:
278
vtn_fail("No NIR equivalent");
279
}
280
}
281
282
static nir_ssa_def *
283
handle_alu(struct vtn_builder *b, uint32_t opcode,
284
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
285
const struct vtn_type *dest_type)
286
{
287
nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),
288
srcs[0], srcs[1], srcs[2], NULL);
289
if (opcode == OpenCLstd_Popcount)
290
ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));
291
return ret;
292
}
293
294
#define REMAP(op, str) [OpenCLstd_##op] = { str }
295
static const struct {
296
const char *fn;
297
} remap_table[] = {
298
REMAP(Distance, "distance"),
299
REMAP(Fast_distance, "fast_distance"),
300
REMAP(Fast_length, "fast_length"),
301
REMAP(Fast_normalize, "fast_normalize"),
302
REMAP(Half_rsqrt, "half_rsqrt"),
303
REMAP(Half_sqrt, "half_sqrt"),
304
REMAP(Length, "length"),
305
REMAP(Normalize, "normalize"),
306
REMAP(Degrees, "degrees"),
307
REMAP(Radians, "radians"),
308
REMAP(Rotate, "rotate"),
309
REMAP(Smoothstep, "smoothstep"),
310
REMAP(Step, "step"),
311
312
REMAP(Pow, "pow"),
313
REMAP(Pown, "pown"),
314
REMAP(Powr, "powr"),
315
REMAP(Rootn, "rootn"),
316
REMAP(Modf, "modf"),
317
318
REMAP(Acos, "acos"),
319
REMAP(Acosh, "acosh"),
320
REMAP(Acospi, "acospi"),
321
REMAP(Asin, "asin"),
322
REMAP(Asinh, "asinh"),
323
REMAP(Asinpi, "asinpi"),
324
REMAP(Atan, "atan"),
325
REMAP(Atan2, "atan2"),
326
REMAP(Atanh, "atanh"),
327
REMAP(Atanpi, "atanpi"),
328
REMAP(Atan2pi, "atan2pi"),
329
REMAP(Cos, "cos"),
330
REMAP(Cosh, "cosh"),
331
REMAP(Cospi, "cospi"),
332
REMAP(Sin, "sin"),
333
REMAP(Sinh, "sinh"),
334
REMAP(Sinpi, "sinpi"),
335
REMAP(Tan, "tan"),
336
REMAP(Tanh, "tanh"),
337
REMAP(Tanpi, "tanpi"),
338
REMAP(Sincos, "sincos"),
339
REMAP(Fract, "fract"),
340
REMAP(Frexp, "frexp"),
341
REMAP(Fma, "fma"),
342
REMAP(Fmod, "fmod"),
343
344
REMAP(Half_cos, "cos"),
345
REMAP(Half_exp, "exp"),
346
REMAP(Half_exp2, "exp2"),
347
REMAP(Half_exp10, "exp10"),
348
REMAP(Half_log, "log"),
349
REMAP(Half_log2, "log2"),
350
REMAP(Half_log10, "log10"),
351
REMAP(Half_powr, "powr"),
352
REMAP(Half_sin, "sin"),
353
REMAP(Half_tan, "tan"),
354
355
REMAP(Remainder, "remainder"),
356
REMAP(Remquo, "remquo"),
357
REMAP(Hypot, "hypot"),
358
REMAP(Exp, "exp"),
359
REMAP(Exp2, "exp2"),
360
REMAP(Exp10, "exp10"),
361
REMAP(Expm1, "expm1"),
362
REMAP(Ldexp, "ldexp"),
363
364
REMAP(Ilogb, "ilogb"),
365
REMAP(Log, "log"),
366
REMAP(Log2, "log2"),
367
REMAP(Log10, "log10"),
368
REMAP(Log1p, "log1p"),
369
REMAP(Logb, "logb"),
370
371
REMAP(Cbrt, "cbrt"),
372
REMAP(Erfc, "erfc"),
373
REMAP(Erf, "erf"),
374
375
REMAP(Lgamma, "lgamma"),
376
REMAP(Lgamma_r, "lgamma_r"),
377
REMAP(Tgamma, "tgamma"),
378
379
REMAP(UMad_sat, "mad_sat"),
380
REMAP(SMad_sat, "mad_sat"),
381
382
REMAP(Shuffle, "shuffle"),
383
REMAP(Shuffle2, "shuffle2"),
384
};
385
#undef REMAP
386
387
static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)
388
{
389
if (opcode >= (sizeof(remap_table) / sizeof(const char *)))
390
return NULL;
391
return remap_table[opcode].fn;
392
}
393
394
static struct vtn_type *
395
get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)
396
{
397
struct vtn_type *ret = rzalloc(b, struct vtn_type);
398
assert(glsl_type_is_vector_or_scalar(type));
399
ret->type = type;
400
ret->length = glsl_get_vector_elements(type);
401
ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;
402
return ret;
403
}
404
405
static struct vtn_type *
406
get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)
407
{
408
struct vtn_type *ret = rzalloc(b, struct vtn_type);
409
ret->type = nir_address_format_to_glsl_type(
410
vtn_mode_to_address_format(
411
b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));
412
ret->base_type = vtn_base_type_pointer;
413
ret->storage_class = storage_class;
414
ret->deref = t;
415
return ret;
416
}
417
418
static struct vtn_type *
419
get_signed_type(struct vtn_builder *b, struct vtn_type *t)
420
{
421
if (t->base_type == vtn_base_type_pointer) {
422
return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);
423
}
424
return get_vtn_type_for_glsl_type(
425
b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),
426
glsl_get_vector_elements(t->type)));
427
}
428
429
static nir_ssa_def *
430
handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
431
int num_srcs,
432
nir_ssa_def **srcs,
433
struct vtn_type **src_types,
434
const struct vtn_type *dest_type)
435
{
436
const char *name = remap_clc_opcode(opcode);
437
if (!name)
438
return NULL;
439
440
/* Some functions which take params end up with uint (or pointer-to-uint) being passed,
441
* which doesn't mangle correctly when the function expects int or pointer-to-int.
442
* See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers
443
*/
444
int signed_param = -1;
445
switch (opcode) {
446
case OpenCLstd_Frexp:
447
case OpenCLstd_Lgamma_r:
448
case OpenCLstd_Pown:
449
case OpenCLstd_Rootn:
450
case OpenCLstd_Ldexp:
451
signed_param = 1;
452
break;
453
case OpenCLstd_Remquo:
454
signed_param = 2;
455
break;
456
case OpenCLstd_SMad_sat: {
457
/* All parameters need to be converted to signed */
458
src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);
459
break;
460
}
461
default: break;
462
}
463
464
if (signed_param >= 0) {
465
src_types[signed_param] = get_signed_type(b, src_types[signed_param]);
466
}
467
468
nir_deref_instr *ret_deref = NULL;
469
470
if (!call_mangled_function(b, name, 0, num_srcs, src_types,
471
dest_type, srcs, &ret_deref))
472
return NULL;
473
474
return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
475
}
476
477
static nir_ssa_def *
478
handle_special(struct vtn_builder *b, uint32_t opcode,
479
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
480
const struct vtn_type *dest_type)
481
{
482
nir_builder *nb = &b->nb;
483
enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;
484
485
switch (cl_opcode) {
486
case OpenCLstd_SAbs_diff:
487
/* these works easier in direct NIR */
488
return nir_iabs_diff(nb, srcs[0], srcs[1]);
489
case OpenCLstd_UAbs_diff:
490
return nir_uabs_diff(nb, srcs[0], srcs[1]);
491
case OpenCLstd_Bitselect:
492
return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);
493
case OpenCLstd_SMad_hi:
494
return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);
495
case OpenCLstd_UMad_hi:
496
return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);
497
case OpenCLstd_SMul24:
498
return nir_imul24_relaxed(nb, srcs[0], srcs[1]);
499
case OpenCLstd_UMul24:
500
return nir_umul24_relaxed(nb, srcs[0], srcs[1]);
501
case OpenCLstd_SMad24:
502
return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);
503
case OpenCLstd_UMad24:
504
return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);
505
case OpenCLstd_FClamp:
506
return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);
507
case OpenCLstd_SClamp:
508
return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);
509
case OpenCLstd_UClamp:
510
return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);
511
case OpenCLstd_Copysign:
512
return nir_copysign(nb, srcs[0], srcs[1]);
513
case OpenCLstd_Cross:
514
if (dest_type->length == 4)
515
return nir_cross4(nb, srcs[0], srcs[1]);
516
return nir_cross3(nb, srcs[0], srcs[1]);
517
case OpenCLstd_Fdim:
518
return nir_fdim(nb, srcs[0], srcs[1]);
519
case OpenCLstd_Fmod:
520
if (nb->shader->options->lower_fmod)
521
break;
522
return nir_fmod(nb, srcs[0], srcs[1]);
523
case OpenCLstd_Mad:
524
return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);
525
case OpenCLstd_Maxmag:
526
return nir_maxmag(nb, srcs[0], srcs[1]);
527
case OpenCLstd_Minmag:
528
return nir_minmag(nb, srcs[0], srcs[1]);
529
case OpenCLstd_Nan:
530
return nir_nan(nb, srcs[0]);
531
case OpenCLstd_Nextafter:
532
return nir_nextafter(nb, srcs[0], srcs[1]);
533
case OpenCLstd_Normalize:
534
return nir_normalize(nb, srcs[0]);
535
case OpenCLstd_Clz:
536
return nir_clz_u(nb, srcs[0]);
537
case OpenCLstd_Ctz:
538
return nir_ctz_u(nb, srcs[0]);
539
case OpenCLstd_Select:
540
return nir_select(nb, srcs[0], srcs[1], srcs[2]);
541
case OpenCLstd_S_Upsample:
542
case OpenCLstd_U_Upsample:
543
/* SPIR-V and CL have different defs for upsample, just implement in nir */
544
return nir_upsample(nb, srcs[0], srcs[1]);
545
case OpenCLstd_Native_exp:
546
return nir_fexp(nb, srcs[0]);
547
case OpenCLstd_Native_exp10:
548
return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));
549
case OpenCLstd_Native_log:
550
return nir_flog(nb, srcs[0]);
551
case OpenCLstd_Native_log10:
552
return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));
553
case OpenCLstd_Native_tan:
554
return nir_ftan(nb, srcs[0]);
555
case OpenCLstd_Ldexp:
556
if (nb->shader->options->lower_ldexp)
557
break;
558
return nir_ldexp(nb, srcs[0], srcs[1]);
559
case OpenCLstd_Fma:
560
/* FIXME: the software implementation only supports fp32 for now. */
561
if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)
562
break;
563
return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);
564
default:
565
break;
566
}
567
568
nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);
569
if (!ret)
570
vtn_fail("No NIR equivalent");
571
572
return ret;
573
}
574
575
static nir_ssa_def *
576
handle_core(struct vtn_builder *b, uint32_t opcode,
577
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
578
const struct vtn_type *dest_type)
579
{
580
nir_deref_instr *ret_deref = NULL;
581
582
switch ((SpvOp)opcode) {
583
case SpvOpGroupAsyncCopy: {
584
/* Libclc doesn't include 3-component overloads of the async copy functions.
585
* However, the CLC spec says:
586
* async_work_group_copy and async_work_group_strided_copy for 3-component vector types
587
* behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component
588
* vector types
589
*/
590
for (unsigned i = 0; i < num_srcs; ++i) {
591
if (src_types[i]->base_type == vtn_base_type_pointer &&
592
src_types[i]->deref->base_type == vtn_base_type_vector &&
593
src_types[i]->deref->length == 3) {
594
src_types[i] =
595
get_pointer_type(b,
596
get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),
597
src_types[i]->storage_class);
598
}
599
}
600
if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))
601
return NULL;
602
break;
603
}
604
case SpvOpGroupWaitEvents: {
605
src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());
606
if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))
607
return NULL;
608
break;
609
}
610
default:
611
return NULL;
612
}
613
614
return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;
615
}
616
617
618
static void
619
_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
620
const uint32_t *w, unsigned count, bool load,
621
bool vec_aligned, nir_rounding_mode rounding)
622
{
623
struct vtn_type *type;
624
if (load)
625
type = vtn_get_type(b, w[1]);
626
else
627
type = vtn_get_value_type(b, w[5]);
628
unsigned a = load ? 0 : 1;
629
630
enum glsl_base_type base_type = glsl_get_base_type(type->type);
631
unsigned components = glsl_get_vector_elements(type->type);
632
633
nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);
634
struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);
635
636
struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];
637
nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];
638
639
nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,
640
(vec_aligned && components == 3) ? 4 : components);
641
nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);
642
643
unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :
644
glsl_get_bit_size(type->type) / 8;
645
enum glsl_base_type ptr_base_type =
646
glsl_get_base_type(p->pointer->type->type);
647
if (base_type != ptr_base_type) {
648
vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||
649
(base_type != GLSL_TYPE_FLOAT &&
650
base_type != GLSL_TYPE_DOUBLE),
651
"vload/vstore cannot do type conversion. "
652
"vload/vstore_half can only convert from half to other "
653
"floating-point types.");
654
655
/* Above-computed alignment was for floats/doubles, not halves */
656
alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);
657
}
658
659
deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);
660
661
for (int i = 0; i < components; i++) {
662
nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);
663
nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);
664
665
if (load) {
666
comps[i] = vtn_local_load(b, arr_deref, p->type->access);
667
ncomps[i] = comps[i]->def;
668
if (base_type != ptr_base_type) {
669
assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
670
(base_type == GLSL_TYPE_FLOAT ||
671
base_type == GLSL_TYPE_DOUBLE));
672
ncomps[i] = nir_f2fN(&b->nb, ncomps[i],
673
glsl_base_type_get_bit_size(base_type));
674
}
675
} else {
676
struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));
677
struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);
678
ssa->def = nir_channel(&b->nb, val->def, i);
679
if (base_type != ptr_base_type) {
680
assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&
681
(base_type == GLSL_TYPE_FLOAT ||
682
base_type == GLSL_TYPE_DOUBLE));
683
if (rounding == nir_rounding_mode_undef) {
684
ssa->def = nir_f2f16(&b->nb, ssa->def);
685
} else {
686
ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,
687
nir_type_float | ssa->def->bit_size,
688
nir_type_float16,
689
rounding, false);
690
}
691
}
692
vtn_local_store(b, ssa, arr_deref, p->type->access);
693
}
694
}
695
if (load) {
696
vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));
697
}
698
}
699
700
static void
701
vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
702
const uint32_t *w, unsigned count)
703
{
704
_handle_v_load_store(b, opcode, w, count, true,
705
opcode == OpenCLstd_Vloada_halfn,
706
nir_rounding_mode_undef);
707
}
708
709
static void
710
vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
711
const uint32_t *w, unsigned count)
712
{
713
_handle_v_load_store(b, opcode, w, count, false,
714
opcode == OpenCLstd_Vstorea_halfn,
715
nir_rounding_mode_undef);
716
}
717
718
static void
719
vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,
720
const uint32_t *w, unsigned count)
721
{
722
_handle_v_load_store(b, opcode, w, count, false,
723
opcode == OpenCLstd_Vstorea_halfn_r,
724
vtn_rounding_mode_to_nir(b, w[8]));
725
}
726
727
static unsigned
728
vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)
729
{
730
nir_deref_instr *deref = vtn_nir_deref(b, id);
731
732
while (deref && deref->deref_type != nir_deref_type_var)
733
deref = nir_deref_instr_parent(deref);
734
735
vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),
736
"Printf string argument must be a pointer to a constant variable");
737
vtn_fail_if(deref->var->constant_initializer == NULL,
738
"Printf string argument must have an initializer");
739
vtn_fail_if(!glsl_type_is_array(deref->var->type),
740
"Printf string must be an char array");
741
const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);
742
vtn_fail_if(char_type != glsl_uint8_t_type() &&
743
char_type != glsl_int8_t_type(),
744
"Printf string must be an char array");
745
746
nir_constant *c = deref->var->constant_initializer;
747
assert(c->num_elements == glsl_get_length(deref->var->type));
748
749
unsigned idx = info->string_size;
750
info->strings = reralloc_size(b->shader, info->strings,
751
idx + c->num_elements);
752
info->string_size += c->num_elements;
753
754
char *str = &info->strings[idx];
755
bool found_null = false;
756
for (unsigned i = 0; i < c->num_elements; i++) {
757
memcpy((char *)str + i, c->elements[i]->values, 1);
758
if (str[i] == '\0')
759
found_null = true;
760
}
761
vtn_fail_if(!found_null, "Printf string must be null terminated");
762
return idx;
763
}
764
765
/* printf is special because there are no limits on args */
766
static void
767
handle_printf(struct vtn_builder *b, uint32_t opcode,
768
const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)
769
{
770
if (!b->options->caps.printf) {
771
vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));
772
return;
773
}
774
775
/* Step 1. extract the format string */
776
777
/*
778
* info_idx is 1-based to match clover/llvm
779
* the backend indexes the info table at info_idx - 1.
780
*/
781
b->shader->printf_info_count++;
782
unsigned info_idx = b->shader->printf_info_count;
783
784
b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,
785
nir_printf_info, info_idx);
786
nir_printf_info *info = &b->shader->printf_info[info_idx - 1];
787
788
info->strings = NULL;
789
info->string_size = 0;
790
791
vtn_add_printf_string(b, w_src[0], info);
792
793
info->num_args = num_srcs - 1;
794
info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);
795
796
/* Step 2, build an ad-hoc struct type out of the args */
797
unsigned field_offset = 0;
798
struct glsl_struct_field *fields =
799
rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);
800
for (unsigned i = 1; i < num_srcs; ++i) {
801
struct vtn_value *val = vtn_untyped_value(b, w_src[i]);
802
struct vtn_type *src_type = val->type;
803
fields[i - 1].type = src_type->type;
804
fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);
805
field_offset = align(field_offset, 4);
806
fields[i - 1].offset = field_offset;
807
info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);
808
field_offset += glsl_get_cl_size(src_type->type);
809
}
810
const struct glsl_type *struct_type =
811
glsl_struct_type(fields, num_srcs - 1, "printf", true);
812
813
/* Step 3, create a variable of that type and populate its fields */
814
nir_variable *var = nir_local_variable_create(b->func->nir_func->impl,
815
struct_type, NULL);
816
nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);
817
size_t fmt_pos = 0;
818
for (unsigned i = 1; i < num_srcs; ++i) {
819
nir_deref_instr *field_deref =
820
nir_build_deref_struct(&b->nb, deref_var, i - 1);
821
nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;
822
/* extract strings */
823
fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);
824
if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {
825
unsigned idx = vtn_add_printf_string(b, w_src[i], info);
826
nir_store_deref(&b->nb, field_deref,
827
nir_imm_intN_t(&b->nb, idx, field_src->bit_size),
828
~0 /* write_mask */);
829
} else
830
nir_store_deref(&b->nb, field_deref, field_src, ~0);
831
}
832
833
/* Lastly, the actual intrinsic */
834
nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);
835
nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);
836
vtn_push_nir_ssa(b, w_dest[1], ret);
837
}
838
839
static nir_ssa_def *
840
handle_round(struct vtn_builder *b, uint32_t opcode,
841
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
842
const struct vtn_type *dest_type)
843
{
844
nir_ssa_def *src = srcs[0];
845
nir_builder *nb = &b->nb;
846
nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);
847
nir_ssa_def *truncated = nir_ftrunc(nb, src);
848
nir_ssa_def *remainder = nir_fsub(nb, src, truncated);
849
850
return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),
851
nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);
852
}
853
854
static nir_ssa_def *
855
handle_shuffle(struct vtn_builder *b, uint32_t opcode,
856
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
857
const struct vtn_type *dest_type)
858
{
859
struct nir_ssa_def *input = srcs[0];
860
struct nir_ssa_def *mask = srcs[1];
861
862
unsigned out_elems = dest_type->length;
863
nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
864
unsigned in_elems = input->num_components;
865
if (mask->bit_size != 32)
866
mask = nir_u2u32(&b->nb, mask);
867
mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));
868
for (unsigned i = 0; i < out_elems; i++)
869
outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));
870
871
return nir_vec(&b->nb, outres, out_elems);
872
}
873
874
static nir_ssa_def *
875
handle_shuffle2(struct vtn_builder *b, uint32_t opcode,
876
unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,
877
const struct vtn_type *dest_type)
878
{
879
struct nir_ssa_def *input0 = srcs[0];
880
struct nir_ssa_def *input1 = srcs[1];
881
struct nir_ssa_def *mask = srcs[2];
882
883
unsigned out_elems = dest_type->length;
884
nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];
885
unsigned in_elems = input0->num_components;
886
unsigned total_mask = 2 * in_elems - 1;
887
unsigned half_mask = in_elems - 1;
888
if (mask->bit_size != 32)
889
mask = nir_u2u32(&b->nb, mask);
890
mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));
891
for (unsigned i = 0; i < out_elems; i++) {
892
nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);
893
nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));
894
nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);
895
nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);
896
nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));
897
outres[i] = nir_bcsel(&b->nb, sel, val0, val1);
898
}
899
return nir_vec(&b->nb, outres, out_elems);
900
}
901
902
bool
903
vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,
904
const uint32_t *w, unsigned count)
905
{
906
enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;
907
908
switch (cl_opcode) {
909
case OpenCLstd_Fabs:
910
case OpenCLstd_SAbs:
911
case OpenCLstd_UAbs:
912
case OpenCLstd_SAdd_sat:
913
case OpenCLstd_UAdd_sat:
914
case OpenCLstd_Ceil:
915
case OpenCLstd_Floor:
916
case OpenCLstd_Fmax:
917
case OpenCLstd_SHadd:
918
case OpenCLstd_UHadd:
919
case OpenCLstd_SMax:
920
case OpenCLstd_UMax:
921
case OpenCLstd_Fmin:
922
case OpenCLstd_SMin:
923
case OpenCLstd_UMin:
924
case OpenCLstd_Mix:
925
case OpenCLstd_Native_cos:
926
case OpenCLstd_Native_divide:
927
case OpenCLstd_Native_exp2:
928
case OpenCLstd_Native_log2:
929
case OpenCLstd_Native_powr:
930
case OpenCLstd_Native_recip:
931
case OpenCLstd_Native_rsqrt:
932
case OpenCLstd_Native_sin:
933
case OpenCLstd_Native_sqrt:
934
case OpenCLstd_SMul_hi:
935
case OpenCLstd_UMul_hi:
936
case OpenCLstd_Popcount:
937
case OpenCLstd_SRhadd:
938
case OpenCLstd_URhadd:
939
case OpenCLstd_Rsqrt:
940
case OpenCLstd_Sign:
941
case OpenCLstd_Sqrt:
942
case OpenCLstd_SSub_sat:
943
case OpenCLstd_USub_sat:
944
case OpenCLstd_Trunc:
945
case OpenCLstd_Rint:
946
case OpenCLstd_Half_divide:
947
case OpenCLstd_Half_recip:
948
handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);
949
return true;
950
case OpenCLstd_SAbs_diff:
951
case OpenCLstd_UAbs_diff:
952
case OpenCLstd_SMad_hi:
953
case OpenCLstd_UMad_hi:
954
case OpenCLstd_SMad24:
955
case OpenCLstd_UMad24:
956
case OpenCLstd_SMul24:
957
case OpenCLstd_UMul24:
958
case OpenCLstd_Bitselect:
959
case OpenCLstd_FClamp:
960
case OpenCLstd_SClamp:
961
case OpenCLstd_UClamp:
962
case OpenCLstd_Copysign:
963
case OpenCLstd_Cross:
964
case OpenCLstd_Degrees:
965
case OpenCLstd_Fdim:
966
case OpenCLstd_Fma:
967
case OpenCLstd_Distance:
968
case OpenCLstd_Fast_distance:
969
case OpenCLstd_Fast_length:
970
case OpenCLstd_Fast_normalize:
971
case OpenCLstd_Half_rsqrt:
972
case OpenCLstd_Half_sqrt:
973
case OpenCLstd_Length:
974
case OpenCLstd_Mad:
975
case OpenCLstd_Maxmag:
976
case OpenCLstd_Minmag:
977
case OpenCLstd_Nan:
978
case OpenCLstd_Nextafter:
979
case OpenCLstd_Normalize:
980
case OpenCLstd_Radians:
981
case OpenCLstd_Rotate:
982
case OpenCLstd_Select:
983
case OpenCLstd_Step:
984
case OpenCLstd_Smoothstep:
985
case OpenCLstd_S_Upsample:
986
case OpenCLstd_U_Upsample:
987
case OpenCLstd_Clz:
988
case OpenCLstd_Ctz:
989
case OpenCLstd_Native_exp:
990
case OpenCLstd_Native_exp10:
991
case OpenCLstd_Native_log:
992
case OpenCLstd_Native_log10:
993
case OpenCLstd_Acos:
994
case OpenCLstd_Acosh:
995
case OpenCLstd_Acospi:
996
case OpenCLstd_Asin:
997
case OpenCLstd_Asinh:
998
case OpenCLstd_Asinpi:
999
case OpenCLstd_Atan:
1000
case OpenCLstd_Atan2:
1001
case OpenCLstd_Atanh:
1002
case OpenCLstd_Atanpi:
1003
case OpenCLstd_Atan2pi:
1004
case OpenCLstd_Fract:
1005
case OpenCLstd_Frexp:
1006
case OpenCLstd_Exp:
1007
case OpenCLstd_Exp2:
1008
case OpenCLstd_Expm1:
1009
case OpenCLstd_Exp10:
1010
case OpenCLstd_Fmod:
1011
case OpenCLstd_Ilogb:
1012
case OpenCLstd_Log:
1013
case OpenCLstd_Log2:
1014
case OpenCLstd_Log10:
1015
case OpenCLstd_Log1p:
1016
case OpenCLstd_Logb:
1017
case OpenCLstd_Ldexp:
1018
case OpenCLstd_Cos:
1019
case OpenCLstd_Cosh:
1020
case OpenCLstd_Cospi:
1021
case OpenCLstd_Sin:
1022
case OpenCLstd_Sinh:
1023
case OpenCLstd_Sinpi:
1024
case OpenCLstd_Tan:
1025
case OpenCLstd_Tanh:
1026
case OpenCLstd_Tanpi:
1027
case OpenCLstd_Cbrt:
1028
case OpenCLstd_Erfc:
1029
case OpenCLstd_Erf:
1030
case OpenCLstd_Lgamma:
1031
case OpenCLstd_Lgamma_r:
1032
case OpenCLstd_Tgamma:
1033
case OpenCLstd_Pow:
1034
case OpenCLstd_Powr:
1035
case OpenCLstd_Pown:
1036
case OpenCLstd_Rootn:
1037
case OpenCLstd_Remainder:
1038
case OpenCLstd_Remquo:
1039
case OpenCLstd_Hypot:
1040
case OpenCLstd_Sincos:
1041
case OpenCLstd_Modf:
1042
case OpenCLstd_UMad_sat:
1043
case OpenCLstd_SMad_sat:
1044
case OpenCLstd_Native_tan:
1045
case OpenCLstd_Half_cos:
1046
case OpenCLstd_Half_exp:
1047
case OpenCLstd_Half_exp2:
1048
case OpenCLstd_Half_exp10:
1049
case OpenCLstd_Half_log:
1050
case OpenCLstd_Half_log2:
1051
case OpenCLstd_Half_log10:
1052
case OpenCLstd_Half_powr:
1053
case OpenCLstd_Half_sin:
1054
case OpenCLstd_Half_tan:
1055
handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);
1056
return true;
1057
case OpenCLstd_Vloadn:
1058
case OpenCLstd_Vload_half:
1059
case OpenCLstd_Vload_halfn:
1060
case OpenCLstd_Vloada_halfn:
1061
vtn_handle_opencl_vload(b, cl_opcode, w, count);
1062
return true;
1063
case OpenCLstd_Vstoren:
1064
case OpenCLstd_Vstore_half:
1065
case OpenCLstd_Vstore_halfn:
1066
case OpenCLstd_Vstorea_halfn:
1067
vtn_handle_opencl_vstore(b, cl_opcode, w, count);
1068
return true;
1069
case OpenCLstd_Vstore_half_r:
1070
case OpenCLstd_Vstore_halfn_r:
1071
case OpenCLstd_Vstorea_halfn_r:
1072
vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);
1073
return true;
1074
case OpenCLstd_Shuffle:
1075
handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);
1076
return true;
1077
case OpenCLstd_Shuffle2:
1078
handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);
1079
return true;
1080
case OpenCLstd_Round:
1081
handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);
1082
return true;
1083
case OpenCLstd_Printf:
1084
handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);
1085
return true;
1086
case OpenCLstd_Prefetch:
1087
/* TODO maybe add a nir instruction for this? */
1088
return true;
1089
default:
1090
vtn_fail("unhandled opencl opc: %u\n", ext_opcode);
1091
return false;
1092
}
1093
}
1094
1095
bool
1096
vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,
1097
const uint32_t *w, unsigned count)
1098
{
1099
switch (opcode) {
1100
case SpvOpGroupAsyncCopy:
1101
handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);
1102
return true;
1103
case SpvOpGroupWaitEvents:
1104
handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);
1105
return true;
1106
default:
1107
return false;
1108
}
1109
return true;
1110
}
1111
1112