Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/microsoft/clc/clc_compiler.c
4560 views
1
/*
2
* Copyright © Microsoft Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "nir.h"
25
#include "nir_serialize.h"
26
#include "glsl_types.h"
27
#include "nir_types.h"
28
#include "clc_compiler.h"
29
#include "clc_helpers.h"
30
#include "clc_nir.h"
31
#include "../compiler/dxil_nir.h"
32
#include "../compiler/dxil_nir_lower_int_samplers.h"
33
#include "../compiler/nir_to_dxil.h"
34
35
#include "util/u_debug.h"
36
#include <util/u_math.h>
37
#include "spirv/nir_spirv.h"
38
#include "nir_builder.h"
39
#include "nir_builtin_builder.h"
40
41
#include "git_sha1.h"
42
43
enum clc_debug_flags {
44
CLC_DEBUG_DUMP_SPIRV = 1 << 0,
45
CLC_DEBUG_VERBOSE = 1 << 1,
46
};
47
48
static const struct debug_named_value clc_debug_options[] = {
49
{ "dump_spirv", CLC_DEBUG_DUMP_SPIRV, "Dump spirv blobs" },
50
{ "verbose", CLC_DEBUG_VERBOSE, NULL },
51
DEBUG_NAMED_VALUE_END
52
};
53
54
DEBUG_GET_ONCE_FLAGS_OPTION(debug_clc, "CLC_DEBUG", clc_debug_options, 0)
55
56
static void
57
clc_print_kernels_info(const struct clc_object *obj)
58
{
59
fprintf(stdout, "Kernels:\n");
60
for (unsigned i = 0; i < obj->num_kernels; i++) {
61
const struct clc_kernel_arg *args = obj->kernels[i].args;
62
bool first = true;
63
64
fprintf(stdout, "\tvoid %s(", obj->kernels[i].name);
65
for (unsigned j = 0; j < obj->kernels[i].num_args; j++) {
66
if (!first)
67
fprintf(stdout, ", ");
68
else
69
first = false;
70
71
switch (args[j].address_qualifier) {
72
case CLC_KERNEL_ARG_ADDRESS_GLOBAL:
73
fprintf(stdout, "__global ");
74
break;
75
case CLC_KERNEL_ARG_ADDRESS_LOCAL:
76
fprintf(stdout, "__local ");
77
break;
78
case CLC_KERNEL_ARG_ADDRESS_CONSTANT:
79
fprintf(stdout, "__constant ");
80
break;
81
default:
82
break;
83
}
84
85
if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_VOLATILE)
86
fprintf(stdout, "volatile ");
87
if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_CONST)
88
fprintf(stdout, "const ");
89
if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_RESTRICT)
90
fprintf(stdout, "restrict ");
91
92
fprintf(stdout, "%s %s", args[j].type_name, args[j].name);
93
}
94
fprintf(stdout, ");\n");
95
}
96
}
97
98
struct clc_image_lower_context
99
{
100
struct clc_dxil_metadata *metadata;
101
unsigned *num_srvs;
102
unsigned *num_uavs;
103
nir_deref_instr *deref;
104
unsigned num_buf_ids;
105
int metadata_index;
106
};
107
108
static int
109
lower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context,
110
const struct glsl_type *new_var_type,
111
unsigned *num_bindings)
112
{
113
nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
114
nir_variable *uniform = nir_variable_create(b->shader, nir_var_uniform, new_var_type, NULL);
115
uniform->data.access = in_var->data.access;
116
uniform->data.binding = in_var->data.binding;
117
if (context->num_buf_ids > 0) {
118
// Need to assign a new binding
119
context->metadata->args[context->metadata_index].
120
image.buf_ids[context->num_buf_ids] = uniform->data.binding = (*num_bindings)++;
121
}
122
context->num_buf_ids++;
123
return uniform->data.binding;
124
}
125
126
static int
127
lower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context,
128
nir_alu_type image_type)
129
{
130
nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
131
132
// Non-writeable images should be converted to samplers,
133
// since they may have texture operations done on them
134
const struct glsl_type *new_var_type =
135
glsl_sampler_type(glsl_get_sampler_dim(in_var->type),
136
false, glsl_sampler_type_is_array(in_var->type),
137
nir_get_glsl_base_type_for_nir_type(image_type | 32));
138
return lower_image_deref_impl(b, context, new_var_type, context->num_srvs);
139
}
140
141
static int
142
lower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context,
143
nir_alu_type image_type)
144
{
145
nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
146
const struct glsl_type *new_var_type =
147
glsl_image_type(glsl_get_sampler_dim(in_var->type),
148
glsl_sampler_type_is_array(in_var->type),
149
nir_get_glsl_base_type_for_nir_type(image_type | 32));
150
return lower_image_deref_impl(b, context, new_var_type, context->num_uavs);
151
}
152
153
static void
154
clc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context)
155
{
156
// The input variable here isn't actually an image, it's just the
157
// image format data.
158
//
159
// For every use of an image in a different way, we'll add an
160
// appropriate uniform to match it. That can result in up to
161
// 3 uniforms (float4, int4, uint4) for each image. Only one of these
162
// formats will actually produce correct data, but a single kernel
163
// could use runtime conditionals to potentially access any of them.
164
//
165
// If the image is used in a query that doesn't have a corresponding
166
// DXIL intrinsic (CL image channel order or channel format), then
167
// we'll add a kernel input for that data that'll be lowered by the
168
// explicit IO pass later on.
169
//
170
// After all that, we can remove the image input variable and deref.
171
172
enum image_uniform_type {
173
FLOAT4,
174
INT4,
175
UINT4,
176
IMAGE_UNIFORM_TYPE_COUNT
177
};
178
179
int image_bindings[IMAGE_UNIFORM_TYPE_COUNT] = {-1, -1, -1};
180
nir_ssa_def *format_deref_dest = NULL, *order_deref_dest = NULL;
181
182
nir_variable *in_var = nir_deref_instr_get_variable(context->deref);
183
enum gl_access_qualifier access = in_var->data.access;
184
185
context->metadata_index = 0;
186
while (context->metadata->args[context->metadata_index].image.buf_ids[0] != in_var->data.binding)
187
context->metadata_index++;
188
189
context->num_buf_ids = 0;
190
191
/* Do this in 2 passes:
192
* 1. When encountering a strongly-typed access (load/store), replace the deref
193
* with one that references an appropriately typed variable. When encountering
194
* an untyped access (size query), if we have a strongly-typed variable already,
195
* replace the deref to point to it.
196
* 2. If there's any references left, they should all be untyped. If we found
197
* a strongly-typed access later in the 1st pass, then just replace the reference.
198
* If we didn't, e.g. the resource is only used for a size query, then pick an
199
* arbitrary type for it.
200
*/
201
for (int pass = 0; pass < 2; ++pass) {
202
nir_foreach_use_safe(src, &context->deref->dest.ssa) {
203
enum image_uniform_type type;
204
205
if (src->parent_instr->type == nir_instr_type_intrinsic) {
206
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(src->parent_instr);
207
enum nir_alu_type dest_type;
208
209
b->cursor = nir_before_instr(&intrinsic->instr);
210
211
switch (intrinsic->intrinsic) {
212
case nir_intrinsic_image_deref_load:
213
case nir_intrinsic_image_deref_store: {
214
dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ?
215
nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic);
216
217
switch (nir_alu_type_get_base_type(dest_type)) {
218
case nir_type_float: type = FLOAT4; break;
219
case nir_type_int: type = INT4; break;
220
case nir_type_uint: type = UINT4; break;
221
default: unreachable("Unsupported image type for load.");
222
}
223
224
int image_binding = image_bindings[type];
225
if (image_binding < 0) {
226
image_binding = image_bindings[type] =
227
lower_read_write_image_deref(b, context, dest_type);
228
}
229
230
assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
231
nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
232
break;
233
}
234
235
case nir_intrinsic_image_deref_size: {
236
int image_binding = -1;
237
for (unsigned i = 0; i < IMAGE_UNIFORM_TYPE_COUNT; ++i) {
238
if (image_bindings[i] >= 0) {
239
image_binding = image_bindings[i];
240
break;
241
}
242
}
243
if (image_binding < 0) {
244
// Skip for now and come back to it
245
if (pass == 0)
246
break;
247
248
type = FLOAT4;
249
image_binding = image_bindings[type] =
250
lower_read_write_image_deref(b, context, nir_type_float32);
251
}
252
253
assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);
254
nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);
255
break;
256
}
257
258
case nir_intrinsic_image_deref_format:
259
case nir_intrinsic_image_deref_order: {
260
nir_ssa_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ?
261
&format_deref_dest : &order_deref_dest;
262
if (!*cached_deref) {
263
nir_variable *new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL);
264
new_input->data.driver_location = in_var->data.driver_location;
265
if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) {
266
/* Match cl_image_format { image_channel_order, image_channel_data_type }; */
267
new_input->data.driver_location += glsl_get_cl_size(new_input->type);
268
}
269
270
b->cursor = nir_after_instr(&context->deref->instr);
271
*cached_deref = nir_load_var(b, new_input);
272
}
273
274
/* No actual intrinsic needed here, just reference the loaded variable */
275
nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, *cached_deref);
276
nir_instr_remove(&intrinsic->instr);
277
break;
278
}
279
280
default:
281
unreachable("Unsupported image intrinsic");
282
}
283
} else if (src->parent_instr->type == nir_instr_type_tex) {
284
assert(in_var->data.access & ACCESS_NON_WRITEABLE);
285
nir_tex_instr *tex = nir_instr_as_tex(src->parent_instr);
286
287
switch (nir_alu_type_get_base_type(tex->dest_type)) {
288
case nir_type_float: type = FLOAT4; break;
289
case nir_type_int: type = INT4; break;
290
case nir_type_uint: type = UINT4; break;
291
default: unreachable("Unsupported image format for sample.");
292
}
293
294
int image_binding = image_bindings[type];
295
if (image_binding < 0) {
296
image_binding = image_bindings[type] =
297
lower_read_only_image_deref(b, context, tex->dest_type);
298
}
299
300
nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref));
301
tex->texture_index = image_binding;
302
}
303
}
304
}
305
306
context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids;
307
308
nir_instr_remove(&context->deref->instr);
309
exec_node_remove(&in_var->node);
310
}
311
312
static void
313
clc_lower_images(nir_shader *nir, struct clc_image_lower_context *context)
314
{
315
nir_foreach_function(func, nir) {
316
if (!func->is_entrypoint)
317
continue;
318
assert(func->impl);
319
320
nir_builder b;
321
nir_builder_init(&b, func->impl);
322
323
nir_foreach_block(block, func->impl) {
324
nir_foreach_instr_safe(instr, block) {
325
if (instr->type == nir_instr_type_deref) {
326
context->deref = nir_instr_as_deref(instr);
327
328
if (glsl_type_is_image(context->deref->type)) {
329
assert(context->deref->deref_type == nir_deref_type_var);
330
clc_lower_input_image_deref(&b, context);
331
}
332
}
333
}
334
}
335
}
336
}
337
338
static void
339
clc_lower_64bit_semantics(nir_shader *nir)
340
{
341
nir_foreach_function(func, nir) {
342
nir_builder b;
343
nir_builder_init(&b, func->impl);
344
345
nir_foreach_block(block, func->impl) {
346
nir_foreach_instr_safe(instr, block) {
347
if (instr->type == nir_instr_type_intrinsic) {
348
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
349
switch (intrinsic->intrinsic) {
350
case nir_intrinsic_load_global_invocation_id:
351
case nir_intrinsic_load_global_invocation_id_zero_base:
352
case nir_intrinsic_load_base_global_invocation_id:
353
case nir_intrinsic_load_local_invocation_id:
354
case nir_intrinsic_load_workgroup_id:
355
case nir_intrinsic_load_workgroup_id_zero_base:
356
case nir_intrinsic_load_base_workgroup_id:
357
case nir_intrinsic_load_num_workgroups:
358
break;
359
default:
360
continue;
361
}
362
363
if (nir_instr_ssa_def(instr)->bit_size != 64)
364
continue;
365
366
intrinsic->dest.ssa.bit_size = 32;
367
b.cursor = nir_after_instr(instr);
368
369
nir_ssa_def *i64 = nir_u2u64(&b, &intrinsic->dest.ssa);
370
nir_ssa_def_rewrite_uses_after(
371
&intrinsic->dest.ssa,
372
i64,
373
i64->parent_instr);
374
}
375
}
376
}
377
}
378
}
379
380
static void
381
clc_lower_nonnormalized_samplers(nir_shader *nir,
382
const dxil_wrap_sampler_state *states)
383
{
384
nir_foreach_function(func, nir) {
385
if (!func->is_entrypoint)
386
continue;
387
assert(func->impl);
388
389
nir_builder b;
390
nir_builder_init(&b, func->impl);
391
392
nir_foreach_block(block, func->impl) {
393
nir_foreach_instr_safe(instr, block) {
394
if (instr->type != nir_instr_type_tex)
395
continue;
396
nir_tex_instr *tex = nir_instr_as_tex(instr);
397
398
int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);
399
if (sampler_src_idx == -1)
400
continue;
401
402
nir_src *sampler_src = &tex->src[sampler_src_idx].src;
403
assert(sampler_src->is_ssa && sampler_src->ssa->parent_instr->type == nir_instr_type_deref);
404
nir_variable *sampler = nir_deref_instr_get_variable(
405
nir_instr_as_deref(sampler_src->ssa->parent_instr));
406
407
// If the sampler returns ints, we'll handle this in the int lowering pass
408
if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float)
409
continue;
410
411
// If sampler uses normalized coords, nothing to do
412
if (!states[sampler->data.binding].is_nonnormalized_coords)
413
continue;
414
415
b.cursor = nir_before_instr(&tex->instr);
416
417
int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
418
assert(coords_idx != -1);
419
nir_ssa_def *coords =
420
nir_ssa_for_src(&b, tex->src[coords_idx].src, tex->coord_components);
421
422
nir_ssa_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex));
423
424
// Normalize coords for tex
425
nir_ssa_def *scale = nir_frcp(&b, txs);
426
nir_ssa_def *comps[4];
427
for (unsigned i = 0; i < coords->num_components; ++i) {
428
comps[i] = nir_channel(&b, coords, i);
429
if (tex->is_array && i == coords->num_components - 1) {
430
// Don't scale the array index, but do clamp it
431
comps[i] = nir_fround_even(&b, comps[i]);
432
comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f));
433
comps[i] = nir_fmin(&b, comps[i], nir_fsub(&b, nir_channel(&b, txs, i), nir_imm_float(&b, 1.0f)));
434
break;
435
}
436
437
// The CTS is pretty clear that this value has to be floored for nearest sampling
438
// but must not be for linear sampling.
439
if (!states[sampler->data.binding].is_linear_filtering)
440
comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f);
441
comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i));
442
}
443
nir_ssa_def *normalized_coords = nir_vec(&b, comps, coords->num_components);
444
nir_instr_rewrite_src(&tex->instr,
445
&tex->src[coords_idx].src,
446
nir_src_for_ssa(normalized_coords));
447
}
448
}
449
}
450
}
451
452
453
static void
454
clc_context_optimize(nir_shader *s)
455
{
456
bool progress;
457
do {
458
progress = false;
459
NIR_PASS(progress, s, nir_split_var_copies);
460
NIR_PASS(progress, s, nir_opt_copy_prop_vars);
461
NIR_PASS(progress, s, nir_lower_var_copies);
462
NIR_PASS(progress, s, nir_lower_vars_to_ssa);
463
NIR_PASS(progress, s, nir_copy_prop);
464
NIR_PASS(progress, s, nir_opt_remove_phis);
465
NIR_PASS(progress, s, nir_opt_dce);
466
NIR_PASS(progress, s, nir_opt_if, true);
467
NIR_PASS(progress, s, nir_opt_dead_cf);
468
NIR_PASS(progress, s, nir_opt_cse);
469
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
470
NIR_PASS(progress, s, nir_opt_algebraic);
471
NIR_PASS(progress, s, nir_opt_constant_folding);
472
NIR_PASS(progress, s, nir_opt_undef);
473
NIR_PASS(progress, s, nir_lower_undef_to_zero);
474
NIR_PASS(progress, s, nir_opt_deref);
475
} while (progress);
476
}
477
478
struct clc_context *
479
clc_context_new(const struct clc_logger *logger, const struct clc_context_options *options)
480
{
481
struct clc_context *ctx = rzalloc(NULL, struct clc_context);
482
if (!ctx) {
483
clc_error(logger, "D3D12: failed to allocate a clc_context");
484
return NULL;
485
}
486
487
const struct spirv_to_nir_options libclc_spirv_options = {
488
.environment = NIR_SPIRV_OPENCL,
489
.create_library = true,
490
.constant_addr_format = nir_address_format_32bit_index_offset_pack64,
491
.global_addr_format = nir_address_format_32bit_index_offset_pack64,
492
.shared_addr_format = nir_address_format_32bit_offset_as_64bit,
493
.temp_addr_format = nir_address_format_32bit_offset_as_64bit,
494
.float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
495
.caps = {
496
.address = true,
497
.float64 = true,
498
.int8 = true,
499
.int16 = true,
500
.int64 = true,
501
.kernel = true,
502
},
503
};
504
const struct nir_shader_compiler_options *libclc_nir_options =
505
dxil_get_nir_compiler_options();
506
507
glsl_type_singleton_init_or_ref();
508
nir_shader *s = nir_load_libclc_shader(64, NULL, &libclc_spirv_options, libclc_nir_options);
509
if (!s) {
510
clc_error(logger, "D3D12: spirv_to_nir failed on libclc blob");
511
ralloc_free(ctx);
512
return NULL;
513
}
514
515
if (options && options->optimize)
516
clc_context_optimize(s);
517
518
ralloc_steal(ctx, s);
519
ctx->libclc_nir = s;
520
521
return ctx;
522
}
523
524
void
525
clc_free_context(struct clc_context *ctx)
526
{
527
ralloc_free(ctx);
528
glsl_type_singleton_decref();
529
};
530
531
void clc_context_serialize(struct clc_context *context,
532
void **serialized,
533
size_t *serialized_size)
534
{
535
struct blob tmp;
536
blob_init(&tmp);
537
nir_serialize(&tmp, context->libclc_nir, true);
538
539
blob_finish_get_buffer(&tmp, serialized, serialized_size);
540
}
541
542
void clc_context_free_serialized(void *serialized)
543
{
544
free(serialized);
545
}
546
547
struct clc_context *
548
clc_context_deserialize(const void *serialized, size_t serialized_size)
549
{
550
struct clc_context *ctx = rzalloc(NULL, struct clc_context);
551
if (!ctx) {
552
return NULL;
553
}
554
const struct nir_shader_compiler_options *libclc_nir_options =
555
dxil_get_nir_compiler_options();
556
557
glsl_type_singleton_init_or_ref();
558
559
struct blob_reader tmp;
560
blob_reader_init(&tmp, serialized, serialized_size);
561
562
nir_shader *s = nir_deserialize(NULL, libclc_nir_options, &tmp);
563
if (!s) {
564
ralloc_free(ctx);
565
return NULL;
566
}
567
568
ralloc_steal(ctx, s);
569
ctx->libclc_nir = s;
570
571
return ctx;
572
}
573
574
struct clc_object *
575
clc_compile(struct clc_context *ctx,
576
const struct clc_compile_args *args,
577
const struct clc_logger *logger)
578
{
579
struct clc_object *obj;
580
int ret;
581
582
obj = calloc(1, sizeof(*obj));
583
if (!obj) {
584
clc_error(logger, "D3D12: failed to allocate a clc_object");
585
return NULL;
586
}
587
588
ret = clc_to_spirv(args, &obj->spvbin, logger);
589
if (ret < 0) {
590
free(obj);
591
return NULL;
592
}
593
594
if (debug_get_option_debug_clc() & CLC_DEBUG_DUMP_SPIRV)
595
clc_dump_spirv(&obj->spvbin, stdout);
596
597
return obj;
598
}
599
600
struct clc_object *
601
clc_link(struct clc_context *ctx,
602
const struct clc_linker_args *args,
603
const struct clc_logger *logger)
604
{
605
struct clc_object *out_obj;
606
int ret;
607
608
out_obj = malloc(sizeof(*out_obj));
609
if (!out_obj) {
610
clc_error(logger, "failed to allocate a clc_object");
611
return NULL;
612
}
613
614
ret = clc_link_spirv_binaries(args, &out_obj->spvbin, logger);
615
if (ret < 0) {
616
free(out_obj);
617
return NULL;
618
}
619
620
if (debug_get_option_debug_clc() & CLC_DEBUG_DUMP_SPIRV)
621
clc_dump_spirv(&out_obj->spvbin, stdout);
622
623
out_obj->kernels = clc_spirv_get_kernels_info(&out_obj->spvbin,
624
&out_obj->num_kernels);
625
626
if (debug_get_option_debug_clc() & CLC_DEBUG_VERBOSE)
627
clc_print_kernels_info(out_obj);
628
629
return out_obj;
630
}
631
632
void clc_free_object(struct clc_object *obj)
633
{
634
clc_free_kernels_info(obj->kernels, obj->num_kernels);
635
clc_free_spirv_binary(&obj->spvbin);
636
free(obj);
637
}
638
639
static nir_variable *
640
add_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir,
641
unsigned *cbv_id)
642
{
643
if (!dxil->kernel->num_args)
644
return NULL;
645
646
struct clc_dxil_metadata *metadata = &dxil->metadata;
647
unsigned size = 0;
648
649
nir_foreach_variable_with_modes(var, nir, nir_var_uniform)
650
size = MAX2(size,
651
var->data.driver_location +
652
glsl_get_cl_size(var->type));
653
654
size = align(size, 4);
655
656
const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4);
657
const struct glsl_struct_field field = { array_type, "arr" };
658
nir_variable *var =
659
nir_variable_create(nir, nir_var_mem_ubo,
660
glsl_struct_type(&field, 1, "kernel_inputs", false),
661
"kernel_inputs");
662
var->data.binding = (*cbv_id)++;
663
var->data.how_declared = nir_var_hidden;
664
return var;
665
}
666
667
static nir_variable *
668
add_work_properties_var(struct clc_dxil_object *dxil,
669
struct nir_shader *nir, unsigned *cbv_id)
670
{
671
struct clc_dxil_metadata *metadata = &dxil->metadata;
672
const struct glsl_type *array_type =
673
glsl_array_type(glsl_uint_type(),
674
sizeof(struct clc_work_properties_data) / sizeof(unsigned),
675
sizeof(unsigned));
676
const struct glsl_struct_field field = { array_type, "arr" };
677
nir_variable *var =
678
nir_variable_create(nir, nir_var_mem_ubo,
679
glsl_struct_type(&field, 1, "kernel_work_properties", false),
680
"kernel_work_properies");
681
var->data.binding = (*cbv_id)++;
682
var->data.how_declared = nir_var_hidden;
683
return var;
684
}
685
686
static void
687
clc_lower_constant_to_ssbo(nir_shader *nir,
688
const struct clc_kernel_info *kerninfo, unsigned *uav_id)
689
{
690
/* Update UBO vars and assign them a binding. */
691
nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) {
692
var->data.mode = nir_var_mem_ssbo;
693
var->data.binding = (*uav_id)++;
694
}
695
696
/* And finally patch all the derefs referincing the constant
697
* variables/pointers.
698
*/
699
nir_foreach_function(func, nir) {
700
if (!func->is_entrypoint)
701
continue;
702
703
assert(func->impl);
704
705
nir_builder b;
706
nir_builder_init(&b, func->impl);
707
708
nir_foreach_block(block, func->impl) {
709
nir_foreach_instr(instr, block) {
710
if (instr->type != nir_instr_type_deref)
711
continue;
712
713
nir_deref_instr *deref = nir_instr_as_deref(instr);
714
715
if (deref->modes != nir_var_mem_constant)
716
continue;
717
718
deref->modes = nir_var_mem_ssbo;
719
}
720
}
721
}
722
}
723
724
static void
725
clc_lower_global_to_ssbo(nir_shader *nir)
726
{
727
nir_foreach_function(func, nir) {
728
if (!func->is_entrypoint)
729
continue;
730
731
assert(func->impl);
732
733
nir_foreach_block(block, func->impl) {
734
nir_foreach_instr(instr, block) {
735
if (instr->type != nir_instr_type_deref)
736
continue;
737
738
nir_deref_instr *deref = nir_instr_as_deref(instr);
739
740
if (deref->modes != nir_var_mem_global)
741
continue;
742
743
deref->modes = nir_var_mem_ssbo;
744
}
745
}
746
}
747
}
748
749
static void
750
copy_const_initializer(const nir_constant *constant, const struct glsl_type *type,
751
uint8_t *data)
752
{
753
unsigned size = glsl_get_cl_size(type);
754
755
if (glsl_type_is_array(type)) {
756
const struct glsl_type *elm_type = glsl_get_array_element(type);
757
unsigned step_size = glsl_get_explicit_stride(type);
758
759
for (unsigned i = 0; i < constant->num_elements; i++) {
760
copy_const_initializer(constant->elements[i], elm_type,
761
data + (i * step_size));
762
}
763
} else if (glsl_type_is_struct(type)) {
764
for (unsigned i = 0; i < constant->num_elements; i++) {
765
const struct glsl_type *elm_type = glsl_get_struct_field(type, i);
766
int offset = glsl_get_struct_field_offset(type, i);
767
copy_const_initializer(constant->elements[i], elm_type, data + offset);
768
}
769
} else {
770
assert(glsl_type_is_vector_or_scalar(type));
771
772
for (unsigned i = 0; i < glsl_get_components(type); i++) {
773
switch (glsl_get_bit_size(type)) {
774
case 64:
775
*((uint64_t *)data) = constant->values[i].u64;
776
break;
777
case 32:
778
*((uint32_t *)data) = constant->values[i].u32;
779
break;
780
case 16:
781
*((uint16_t *)data) = constant->values[i].u16;
782
break;
783
case 8:
784
*((uint8_t *)data) = constant->values[i].u8;
785
break;
786
default:
787
unreachable("Invalid base type");
788
}
789
790
data += glsl_get_bit_size(type) / 8;
791
}
792
}
793
}
794
795
static const struct glsl_type *
796
get_cast_type(unsigned bit_size)
797
{
798
switch (bit_size) {
799
case 64:
800
return glsl_int64_t_type();
801
case 32:
802
return glsl_int_type();
803
case 16:
804
return glsl_int16_t_type();
805
case 8:
806
return glsl_int8_t_type();
807
}
808
unreachable("Invalid bit_size");
809
}
810
811
static void
812
split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
813
{
814
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
815
nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];
816
unsigned comp_size = intrin->dest.ssa.bit_size / 8;
817
unsigned num_comps = intrin->dest.ssa.num_components;
818
819
b->cursor = nir_before_instr(&intrin->instr);
820
821
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
822
823
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
824
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
825
826
unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);
827
for (unsigned i = 0; i < num_loads; ++i) {
828
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
829
srcs[i] = nir_load_deref_with_access(b, elem, access);
830
}
831
832
nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);
833
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);
834
nir_instr_remove(&intrin->instr);
835
}
836
837
static void
838
split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)
839
{
840
enum gl_access_qualifier access = nir_intrinsic_access(intrin);
841
842
assert(intrin->src[1].is_ssa);
843
nir_ssa_def *value = intrin->src[1].ssa;
844
unsigned comp_size = value->bit_size / 8;
845
unsigned num_comps = value->num_components;
846
847
b->cursor = nir_before_instr(&intrin->instr);
848
849
nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);
850
851
const struct glsl_type *cast_type = get_cast_type(alignment * 8);
852
nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);
853
854
unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);
855
for (unsigned i = 0; i < num_stores; ++i) {
856
nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);
857
nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));
858
nir_store_deref_with_access(b, elem, substore_val, ~0, access);
859
}
860
861
nir_instr_remove(&intrin->instr);
862
}
863
864
static bool
865
split_unaligned_loads_stores(nir_shader *shader)
866
{
867
bool progress = false;
868
869
nir_foreach_function(function, shader) {
870
if (!function->impl)
871
continue;
872
873
nir_builder b;
874
nir_builder_init(&b, function->impl);
875
876
nir_foreach_block(block, function->impl) {
877
nir_foreach_instr_safe(instr, block) {
878
if (instr->type != nir_instr_type_intrinsic)
879
continue;
880
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
881
if (intrin->intrinsic != nir_intrinsic_load_deref &&
882
intrin->intrinsic != nir_intrinsic_store_deref)
883
continue;
884
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
885
886
unsigned align_mul = 0, align_offset = 0;
887
nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);
888
889
unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;
890
891
/* We can load anything at 4-byte alignment, except for
892
* UBOs (AKA CBs where the granularity is 16 bytes).
893
*/
894
if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))
895
continue;
896
897
nir_ssa_def *val;
898
if (intrin->intrinsic == nir_intrinsic_load_deref) {
899
assert(intrin->dest.is_ssa);
900
val = &intrin->dest.ssa;
901
} else {
902
assert(intrin->src[1].is_ssa);
903
val = intrin->src[1].ssa;
904
}
905
906
unsigned natural_alignment =
907
val->bit_size / 8 *
908
(val->num_components == 3 ? 4 : val->num_components);
909
910
if (alignment >= natural_alignment)
911
continue;
912
913
if (intrin->intrinsic == nir_intrinsic_load_deref)
914
split_unaligned_load(&b, intrin, alignment);
915
else
916
split_unaligned_store(&b, intrin, alignment);
917
progress = true;
918
}
919
}
920
}
921
922
return progress;
923
}
924
925
static enum pipe_tex_wrap
926
wrap_from_cl_addressing(unsigned addressing_mode)
927
{
928
switch (addressing_mode)
929
{
930
default:
931
case SAMPLER_ADDRESSING_MODE_NONE:
932
case SAMPLER_ADDRESSING_MODE_CLAMP:
933
// Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode
934
return (enum pipe_tex_wrap)-1;
935
case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return PIPE_TEX_WRAP_CLAMP_TO_EDGE;
936
case SAMPLER_ADDRESSING_MODE_REPEAT: return PIPE_TEX_WRAP_REPEAT;
937
case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return PIPE_TEX_WRAP_MIRROR_REPEAT;
938
}
939
}
940
941
static bool shader_has_double(nir_shader *nir)
942
{
943
bool progress = false;
944
945
foreach_list_typed(nir_function, func, node, &nir->functions) {
946
if (!func->is_entrypoint)
947
continue;
948
949
assert(func->impl);
950
951
nir_foreach_block(block, func->impl) {
952
nir_foreach_instr_safe(instr, block) {
953
if (instr->type != nir_instr_type_alu)
954
continue;
955
956
nir_alu_instr *alu = nir_instr_as_alu(instr);
957
const nir_op_info *info = &nir_op_infos[alu->op];
958
959
if (info->output_type & nir_type_float &&
960
nir_dest_bit_size(alu->dest.dest) == 64)
961
return true;
962
}
963
}
964
}
965
966
return false;
967
}
968
969
static bool
970
scale_fdiv(nir_shader *nir)
971
{
972
bool progress = false;
973
nir_foreach_function(func, nir) {
974
if (!func->impl)
975
continue;
976
nir_builder b;
977
nir_builder_init(&b, func->impl);
978
nir_foreach_block(block, func->impl) {
979
nir_foreach_instr(instr, block) {
980
if (instr->type != nir_instr_type_alu)
981
continue;
982
nir_alu_instr *alu = nir_instr_as_alu(instr);
983
if (alu->op != nir_op_fdiv || alu->src[0].src.ssa->bit_size != 32)
984
continue;
985
986
b.cursor = nir_before_instr(instr);
987
nir_ssa_def *fabs = nir_fabs(&b, alu->src[1].src.ssa);
988
nir_ssa_def *big = nir_flt(&b, nir_imm_int(&b, 0x7e800000), fabs);
989
nir_ssa_def *small = nir_flt(&b, fabs, nir_imm_int(&b, 0x00800000));
990
991
nir_ssa_def *scaled_down_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 0.25);
992
nir_ssa_def *scaled_down_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 0.25);
993
nir_ssa_def *scaled_up_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 16777216.0);
994
nir_ssa_def *scaled_up_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 16777216.0);
995
996
nir_ssa_def *final_a =
997
nir_bcsel(&b, big, scaled_down_a,
998
(nir_bcsel(&b, small, scaled_up_a, alu->src[0].src.ssa)));
999
nir_ssa_def *final_b =
1000
nir_bcsel(&b, big, scaled_down_b,
1001
(nir_bcsel(&b, small, scaled_up_b, alu->src[1].src.ssa)));
1002
1003
nir_instr_rewrite_src(instr, &alu->src[0].src, nir_src_for_ssa(final_a));
1004
nir_instr_rewrite_src(instr, &alu->src[1].src, nir_src_for_ssa(final_b));
1005
progress = true;
1006
}
1007
}
1008
}
1009
return progress;
1010
}
1011
1012
struct clc_dxil_object *
1013
clc_to_dxil(struct clc_context *ctx,
1014
const struct clc_object *obj,
1015
const char *entrypoint,
1016
const struct clc_runtime_kernel_conf *conf,
1017
const struct clc_logger *logger)
1018
{
1019
struct clc_dxil_object *dxil;
1020
struct nir_shader *nir;
1021
1022
dxil = calloc(1, sizeof(*dxil));
1023
if (!dxil) {
1024
clc_error(logger, "failed to allocate the dxil object");
1025
return NULL;
1026
}
1027
1028
for (unsigned i = 0; i < obj->num_kernels; i++) {
1029
if (!strcmp(obj->kernels[i].name, entrypoint)) {
1030
dxil->kernel = &obj->kernels[i];
1031
break;
1032
}
1033
}
1034
1035
if (!dxil->kernel) {
1036
clc_error(logger, "no '%s' kernel found", entrypoint);
1037
goto err_free_dxil;
1038
}
1039
1040
const struct spirv_to_nir_options spirv_options = {
1041
.environment = NIR_SPIRV_OPENCL,
1042
.clc_shader = ctx->libclc_nir,
1043
.constant_addr_format = nir_address_format_32bit_index_offset_pack64,
1044
.global_addr_format = nir_address_format_32bit_index_offset_pack64,
1045
.shared_addr_format = nir_address_format_32bit_offset_as_64bit,
1046
.temp_addr_format = nir_address_format_32bit_offset_as_64bit,
1047
.float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,
1048
.caps = {
1049
.address = true,
1050
.float64 = true,
1051
.int8 = true,
1052
.int16 = true,
1053
.int64 = true,
1054
.kernel = true,
1055
.kernel_image = true,
1056
.literal_sampler = true,
1057
.printf = true,
1058
},
1059
};
1060
nir_shader_compiler_options nir_options =
1061
*dxil_get_nir_compiler_options();
1062
1063
if (conf && conf->lower_bit_size & 64) {
1064
nir_options.lower_pack_64_2x32_split = false;
1065
nir_options.lower_unpack_64_2x32_split = false;
1066
nir_options.lower_int64_options = ~0;
1067
}
1068
1069
if (conf && conf->lower_bit_size & 16)
1070
nir_options.support_16bit_alu = true;
1071
1072
glsl_type_singleton_init_or_ref();
1073
1074
nir = spirv_to_nir(obj->spvbin.data, obj->spvbin.size / 4,
1075
NULL, 0,
1076
MESA_SHADER_KERNEL, entrypoint,
1077
&spirv_options,
1078
&nir_options);
1079
if (!nir) {
1080
clc_error(logger, "spirv_to_nir() failed");
1081
goto err_free_dxil;
1082
}
1083
nir->info.workgroup_size_variable = true;
1084
1085
NIR_PASS_V(nir, nir_lower_goto_ifs);
1086
NIR_PASS_V(nir, nir_opt_dead_cf);
1087
1088
struct clc_dxil_metadata *metadata = &dxil->metadata;
1089
1090
metadata->args = calloc(dxil->kernel->num_args,
1091
sizeof(*metadata->args));
1092
if (!metadata->args) {
1093
clc_error(logger, "failed to allocate arg positions");
1094
goto err_free_dxil;
1095
}
1096
1097
{
1098
bool progress;
1099
do
1100
{
1101
progress = false;
1102
NIR_PASS(progress, nir, nir_copy_prop);
1103
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1104
NIR_PASS(progress, nir, nir_opt_deref);
1105
NIR_PASS(progress, nir, nir_opt_dce);
1106
NIR_PASS(progress, nir, nir_opt_undef);
1107
NIR_PASS(progress, nir, nir_opt_constant_folding);
1108
NIR_PASS(progress, nir, nir_opt_cse);
1109
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
1110
NIR_PASS(progress, nir, nir_opt_algebraic);
1111
} while (progress);
1112
}
1113
1114
// Inline all functions first.
1115
// according to the comment on nir_inline_functions
1116
NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);
1117
NIR_PASS_V(nir, nir_lower_returns);
1118
NIR_PASS_V(nir, nir_lower_libclc, ctx->libclc_nir);
1119
NIR_PASS_V(nir, nir_inline_functions);
1120
1121
// Pick off the single entrypoint that we want.
1122
foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
1123
if (!func->is_entrypoint)
1124
exec_node_remove(&func->node);
1125
}
1126
assert(exec_list_length(&nir->functions) == 1);
1127
1128
{
1129
bool progress;
1130
do
1131
{
1132
progress = false;
1133
NIR_PASS(progress, nir, nir_copy_prop);
1134
NIR_PASS(progress, nir, nir_opt_copy_prop_vars);
1135
NIR_PASS(progress, nir, nir_opt_deref);
1136
NIR_PASS(progress, nir, nir_opt_dce);
1137
NIR_PASS(progress, nir, nir_opt_undef);
1138
NIR_PASS(progress, nir, nir_opt_constant_folding);
1139
NIR_PASS(progress, nir, nir_opt_cse);
1140
NIR_PASS(progress, nir, nir_split_var_copies);
1141
NIR_PASS(progress, nir, nir_lower_var_copies);
1142
NIR_PASS(progress, nir, nir_lower_vars_to_ssa);
1143
NIR_PASS(progress, nir, nir_opt_algebraic);
1144
NIR_PASS(progress, nir, nir_opt_if, true);
1145
NIR_PASS(progress, nir, nir_opt_dead_cf);
1146
NIR_PASS(progress, nir, nir_opt_remove_phis);
1147
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
1148
NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);
1149
} while (progress);
1150
}
1151
1152
NIR_PASS_V(nir, scale_fdiv);
1153
1154
dxil_wrap_sampler_state int_sampler_states[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { {{0}} };
1155
unsigned sampler_id = 0;
1156
1157
struct exec_list inline_samplers_list;
1158
exec_list_make_empty(&inline_samplers_list);
1159
1160
// Move inline samplers to the end of the uniforms list
1161
nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {
1162
if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
1163
exec_node_remove(&var->node);
1164
exec_list_push_tail(&inline_samplers_list, &var->node);
1165
}
1166
}
1167
exec_node_insert_list_after(exec_list_get_tail(&nir->variables), &inline_samplers_list);
1168
1169
NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp));
1170
1171
// Lower memcpy
1172
NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);
1173
1174
// Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't
1175
// necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later
1176
assert(nir->scratch_size == 0);
1177
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);
1178
1179
nir_lower_printf_options printf_options = {
1180
.treat_doubles_as_floats = true,
1181
.max_buffer_size = 1024 * 1024
1182
};
1183
NIR_PASS_V(nir, nir_lower_printf, &printf_options);
1184
1185
metadata->printf.info_count = nir->printf_info_count;
1186
metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));
1187
for (unsigned i = 0; i < nir->printf_info_count; i++) {
1188
metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);
1189
memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);
1190
metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;
1191
metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));
1192
memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));
1193
}
1194
1195
// copy propagate to prepare for lower_explicit_io
1196
NIR_PASS_V(nir, nir_split_var_copies);
1197
NIR_PASS_V(nir, nir_opt_copy_prop_vars);
1198
NIR_PASS_V(nir, nir_lower_var_copies);
1199
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1200
NIR_PASS_V(nir, nir_lower_alu);
1201
NIR_PASS_V(nir, nir_opt_dce);
1202
NIR_PASS_V(nir, nir_opt_deref);
1203
1204
// For uniforms (kernel inputs), run this before adjusting variable list via image/sampler lowering
1205
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);
1206
1207
// Calculate input offsets/metadata.
1208
unsigned uav_id = 0;
1209
nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
1210
int i = var->data.location;
1211
if (i < 0)
1212
continue;
1213
1214
unsigned size = glsl_get_cl_size(var->type);
1215
1216
metadata->args[i].offset = var->data.driver_location;
1217
metadata->args[i].size = size;
1218
metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size,
1219
var->data.driver_location + size);
1220
if ((dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||
1221
dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) &&
1222
// Ignore images during this pass - global memory buffers need to have contiguous bindings
1223
!glsl_type_is_image(var->type)) {
1224
metadata->args[i].globconstptr.buf_id = uav_id++;
1225
} else if (glsl_type_is_sampler(var->type)) {
1226
unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u;
1227
int_sampler_states[sampler_id].wrap[0] =
1228
int_sampler_states[sampler_id].wrap[1] =
1229
int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode);
1230
int_sampler_states[sampler_id].is_nonnormalized_coords =
1231
conf ? !conf->args[i].sampler.normalized_coords : 0;
1232
int_sampler_states[sampler_id].is_linear_filtering =
1233
conf ? conf->args[i].sampler.linear_filtering : 0;
1234
metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++;
1235
}
1236
}
1237
1238
unsigned num_global_inputs = uav_id;
1239
1240
// Second pass over inputs to calculate image bindings
1241
unsigned srv_id = 0;
1242
nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
1243
int i = var->data.location;
1244
if (i < 0)
1245
continue;
1246
1247
if (glsl_type_is_image(var->type)) {
1248
if (var->data.access == ACCESS_NON_WRITEABLE) {
1249
metadata->args[i].image.buf_ids[0] = srv_id++;
1250
} else {
1251
// Write or read-write are UAVs
1252
metadata->args[i].image.buf_ids[0] = uav_id++;
1253
}
1254
1255
metadata->args[i].image.num_buf_ids = 1;
1256
var->data.binding = metadata->args[i].image.buf_ids[0];
1257
}
1258
}
1259
1260
// Before removing dead uniforms, dedupe constant samplers to make more dead uniforms
1261
NIR_PASS_V(nir, clc_nir_dedupe_const_samplers);
1262
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant | nir_var_function_temp, NULL);
1263
1264
// Fill out inline sampler metadata, now that they've been deduped and dead ones removed
1265
nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {
1266
if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {
1267
int_sampler_states[sampler_id].wrap[0] =
1268
int_sampler_states[sampler_id].wrap[1] =
1269
int_sampler_states[sampler_id].wrap[2] =
1270
wrap_from_cl_addressing(var->data.sampler.addressing_mode);
1271
int_sampler_states[sampler_id].is_nonnormalized_coords =
1272
!var->data.sampler.normalized_coordinates;
1273
int_sampler_states[sampler_id].is_linear_filtering =
1274
var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR;
1275
var->data.binding = sampler_id++;
1276
1277
assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS);
1278
metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding;
1279
metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode;
1280
metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates;
1281
metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode;
1282
metadata->num_const_samplers++;
1283
}
1284
}
1285
1286
// Needs to come before lower_explicit_io
1287
NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);
1288
struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };
1289
NIR_PASS_V(nir, clc_lower_images, &image_lower_context);
1290
NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states);
1291
NIR_PASS_V(nir, nir_lower_samplers);
1292
NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,
1293
int_sampler_states, NULL, 14.0f);
1294
1295
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);
1296
1297
nir->scratch_size = 0;
1298
NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,
1299
nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,
1300
glsl_get_cl_type_size_align);
1301
1302
NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);
1303
NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id);
1304
NIR_PASS_V(nir, clc_lower_global_to_ssbo);
1305
1306
bool has_printf = false;
1307
NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);
1308
metadata->printf.uav_id = has_printf ? uav_id++ : -1;
1309
1310
NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);
1311
1312
NIR_PASS_V(nir, split_unaligned_loads_stores);
1313
1314
assert(nir->info.cs.ptr_size == 64);
1315
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,
1316
nir_address_format_32bit_index_offset_pack64);
1317
NIR_PASS_V(nir, nir_lower_explicit_io,
1318
nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,
1319
nir_address_format_32bit_offset_as_64bit);
1320
1321
NIR_PASS_V(nir, nir_lower_system_values);
1322
1323
nir_lower_compute_system_values_options compute_options = {
1324
.has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets),
1325
.has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets),
1326
};
1327
NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options);
1328
1329
NIR_PASS_V(nir, clc_lower_64bit_semantics);
1330
1331
NIR_PASS_V(nir, nir_opt_deref);
1332
NIR_PASS_V(nir, nir_lower_vars_to_ssa);
1333
1334
unsigned cbv_id = 0;
1335
1336
nir_variable *inputs_var =
1337
add_kernel_inputs_var(dxil, nir, &cbv_id);
1338
nir_variable *work_properties_var =
1339
add_work_properties_var(dxil, nir, &cbv_id);
1340
1341
memcpy(metadata->local_size, nir->info.workgroup_size,
1342
sizeof(metadata->local_size));
1343
memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint,
1344
sizeof(metadata->local_size));
1345
1346
// Patch the localsize before calling clc_nir_lower_system_values().
1347
if (conf) {
1348
for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1349
if (!conf->local_size[i] ||
1350
conf->local_size[i] == nir->info.workgroup_size[i])
1351
continue;
1352
1353
if (nir->info.workgroup_size[i] &&
1354
nir->info.workgroup_size[i] != conf->local_size[i]) {
1355
debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n");
1356
goto err_free_dxil;
1357
}
1358
1359
nir->info.workgroup_size[i] = conf->local_size[i];
1360
}
1361
memcpy(metadata->local_size, nir->info.workgroup_size,
1362
sizeof(metadata->local_size));
1363
} else {
1364
/* Make sure there's at least one thread that's set to run */
1365
for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {
1366
if (nir->info.workgroup_size[i] == 0)
1367
nir->info.workgroup_size[i] = 1;
1368
}
1369
}
1370
1371
NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);
1372
NIR_PASS_V(nir, split_unaligned_loads_stores);
1373
NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,
1374
nir_address_format_32bit_index_offset);
1375
NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);
1376
NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);
1377
NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);
1378
NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);
1379
NIR_PASS_V(nir, nir_lower_fp16_casts);
1380
NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);
1381
1382
// Convert pack to pack_split
1383
NIR_PASS_V(nir, nir_lower_pack);
1384
// Lower pack_split to bit math
1385
NIR_PASS_V(nir, nir_opt_algebraic);
1386
1387
NIR_PASS_V(nir, nir_opt_dce);
1388
1389
nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler");
1390
struct nir_to_dxil_options opts = {
1391
.interpolate_at_vertex = false,
1392
.lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),
1393
.ubo_binding_offset = 0,
1394
.disable_math_refactoring = true,
1395
.num_kernel_globals = num_global_inputs,
1396
};
1397
1398
for (unsigned i = 0; i < dxil->kernel->num_args; i++) {
1399
if (dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL)
1400
continue;
1401
1402
/* If we don't have the runtime conf yet, we just create a dummy variable.
1403
* This will be adjusted when clc_to_dxil() is called with a conf
1404
* argument.
1405
*/
1406
unsigned size = 4;
1407
if (conf && conf->args)
1408
size = conf->args[i].localptr.size;
1409
1410
/* The alignment required for the pointee type is not easy to get from
1411
* here, so let's base our logic on the size itself. Anything bigger than
1412
* the maximum alignment constraint (which is 128 bytes, since ulong16 or
1413
* doubl16 size are the biggest base types) should be aligned on this
1414
* maximum alignment constraint. For smaller types, we use the size
1415
* itself to calculate the alignment.
1416
*/
1417
unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;
1418
1419
nir->info.shared_size = align(nir->info.shared_size, alignment);
1420
metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;
1421
nir->info.shared_size += size;
1422
}
1423
1424
metadata->local_mem_size = nir->info.shared_size;
1425
metadata->priv_mem_size = nir->scratch_size;
1426
1427
/* DXIL double math is too limited compared to what NIR expects. Let's refuse
1428
* to compile a shader when it contains double operations until we have
1429
* double lowering hooked up.
1430
*/
1431
if (shader_has_double(nir)) {
1432
clc_error(logger, "NIR shader contains doubles, which we don't support yet");
1433
goto err_free_dxil;
1434
}
1435
1436
struct blob tmp;
1437
if (!nir_to_dxil(nir, &opts, &tmp)) {
1438
debug_printf("D3D12: nir_to_dxil failed\n");
1439
goto err_free_dxil;
1440
}
1441
1442
nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) {
1443
if (var->constant_initializer) {
1444
if (glsl_type_is_array(var->type)) {
1445
int size = align(glsl_get_cl_size(var->type), 4);
1446
uint8_t *data = malloc(size);
1447
if (!data)
1448
goto err_free_dxil;
1449
1450
copy_const_initializer(var->constant_initializer, var->type, data);
1451
metadata->consts[metadata->num_consts].data = data;
1452
metadata->consts[metadata->num_consts].size = size;
1453
metadata->consts[metadata->num_consts].uav_id = var->data.binding;
1454
metadata->num_consts++;
1455
} else
1456
unreachable("unexpected constant initializer");
1457
}
1458
}
1459
1460
metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0;
1461
metadata->work_properties_cbv_id = work_properties_var->data.binding;
1462
metadata->num_uavs = uav_id;
1463
metadata->num_srvs = srv_id;
1464
metadata->num_samplers = sampler_id;
1465
1466
ralloc_free(nir);
1467
glsl_type_singleton_decref();
1468
1469
blob_finish_get_buffer(&tmp, &dxil->binary.data,
1470
&dxil->binary.size);
1471
return dxil;
1472
1473
err_free_dxil:
1474
clc_free_dxil_object(dxil);
1475
return NULL;
1476
}
1477
1478
void clc_free_dxil_object(struct clc_dxil_object *dxil)
1479
{
1480
for (unsigned i = 0; i < dxil->metadata.num_consts; i++)
1481
free(dxil->metadata.consts[i].data);
1482
1483
for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {
1484
free(dxil->metadata.printf.infos[i].arg_sizes);
1485
free(dxil->metadata.printf.infos[i].str);
1486
}
1487
free(dxil->metadata.printf.infos);
1488
1489
free(dxil->binary.data);
1490
free(dxil);
1491
}
1492
1493
uint64_t clc_compiler_get_version()
1494
{
1495
const char sha1[] = MESA_GIT_SHA1;
1496
const char* dash = strchr(sha1, '-');
1497
if (dash) {
1498
return strtoull(dash + 1, NULL, 16);
1499
}
1500
return 0;
1501
}
1502
1503