Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/nir/nir_to_tgsi.c
4561 views
1
/*
2
* Copyright © 2014-2015 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "compiler/nir/nir.h"
25
#include "compiler/nir/nir_deref.h"
26
#include "nir/nir_to_tgsi.h"
27
#include "pipe/p_screen.h"
28
#include "pipe/p_state.h"
29
#include "tgsi/tgsi_dump.h"
30
#include "tgsi/tgsi_from_mesa.h"
31
#include "tgsi/tgsi_info.h"
32
#include "tgsi/tgsi_ureg.h"
33
#include "util/debug.h"
34
#include "util/u_math.h"
35
#include "util/u_memory.h"
36
37
struct ntt_compile {
38
nir_shader *s;
39
nir_function_impl *impl;
40
struct pipe_screen *screen;
41
struct ureg_program *ureg;
42
43
bool needs_texcoord_semantic;
44
bool any_reg_as_address;
45
bool native_integers;
46
bool has_txf_lz;
47
48
int next_addr_reg;
49
bool addr_declared[2];
50
struct ureg_dst addr_reg[2];
51
52
/* if condition set up at the end of a block, for ntt_emit_if(). */
53
struct ureg_src if_cond;
54
55
/* TGSI temps for our NIR SSA and register values. */
56
struct ureg_dst *reg_temp;
57
struct ureg_dst *ssa_temp;
58
59
nir_instr_liveness *liveness;
60
61
/* Mappings from driver_location to TGSI input/output number.
62
*
63
* We'll be declaring TGSI input/outputs in an arbitrary order, and they get
64
* their numbers assigned incrementally, unlike inputs or constants.
65
*/
66
struct ureg_src *input_index_map;
67
uint64_t centroid_inputs;
68
69
struct ureg_src images[PIPE_MAX_SHADER_IMAGES];
70
};
71
72
static void ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list);
73
74
/**
75
* Interprets a nir_load_const used as a NIR src as a uint.
76
*
77
* For non-native-integers drivers, nir_load_const_instrs used by an integer ALU
78
* instruction (or in a phi-web used by an integer ALU instruction) were
79
* converted to floats and the ALU instruction swapped to the float equivalent.
80
* However, this means that integer load_consts used by intrinsics (which don't
81
* normally get that conversion) may have been reformatted to be floats. Given
82
* that all of our intrinsic nir_src_as_uint() calls are expected to be small,
83
* we can just look and see if they look like floats and convert them back to
84
* ints.
85
*/
86
static uint32_t
87
ntt_src_as_uint(struct ntt_compile *c, nir_src src)
88
{
89
uint32_t val = nir_src_as_uint(src);
90
if (!c->native_integers && val >= fui(1.0))
91
val = (uint32_t)uif(val);
92
return val;
93
}
94
95
static unsigned
96
ntt_64bit_write_mask(unsigned write_mask)
97
{
98
return ((write_mask & 1) ? 0x3 : 0) | ((write_mask & 2) ? 0xc : 0);
99
}
100
101
static struct ureg_src
102
ntt_64bit_1f(struct ntt_compile *c)
103
{
104
return ureg_imm4u(c->ureg,
105
0x00000000, 0x3ff00000,
106
0x00000000, 0x3ff00000);
107
}
108
109
static const struct glsl_type *
110
ntt_shader_input_type(struct ntt_compile *c,
111
struct nir_variable *var)
112
{
113
switch (c->s->info.stage) {
114
case MESA_SHADER_GEOMETRY:
115
case MESA_SHADER_TESS_EVAL:
116
case MESA_SHADER_TESS_CTRL:
117
if (glsl_type_is_array(var->type))
118
return glsl_get_array_element(var->type);
119
else
120
return var->type;
121
default:
122
return var->type;
123
}
124
}
125
126
static void
127
ntt_get_gl_varying_semantic(struct ntt_compile *c, unsigned location,
128
unsigned *semantic_name, unsigned *semantic_index)
129
{
130
/* We want to use most of tgsi_get_gl_varying_semantic(), but the
131
* !texcoord shifting has already been applied, so avoid that.
132
*/
133
if (!c->needs_texcoord_semantic &&
134
(location >= VARYING_SLOT_VAR0 && location < VARYING_SLOT_PATCH0)) {
135
*semantic_name = TGSI_SEMANTIC_GENERIC;
136
*semantic_index = location - VARYING_SLOT_VAR0;
137
return;
138
}
139
140
tgsi_get_gl_varying_semantic(location, true,
141
semantic_name, semantic_index);
142
}
143
144
/* TGSI varying declarations have a component usage mask associated (used by
145
* r600 and svga).
146
*/
147
static uint32_t
148
ntt_tgsi_usage_mask(unsigned start_component, unsigned num_components,
149
bool is_64)
150
{
151
uint32_t usage_mask =
152
u_bit_consecutive(start_component, num_components);
153
154
if (is_64) {
155
if (start_component >= 2)
156
usage_mask >>= 2;
157
158
uint32_t tgsi_usage_mask = 0;
159
160
if (usage_mask & TGSI_WRITEMASK_X)
161
tgsi_usage_mask |= TGSI_WRITEMASK_XY;
162
if (usage_mask & TGSI_WRITEMASK_Y)
163
tgsi_usage_mask |= TGSI_WRITEMASK_ZW;
164
165
return tgsi_usage_mask;
166
} else {
167
return usage_mask;
168
}
169
}
170
171
/* TGSI varying declarations have a component usage mask associated (used by
172
* r600 and svga).
173
*/
174
static uint32_t
175
ntt_tgsi_var_usage_mask(const struct nir_variable *var)
176
{
177
const struct glsl_type *type_without_array =
178
glsl_without_array(var->type);
179
unsigned num_components = glsl_get_vector_elements(type_without_array);
180
if (num_components == 0) /* structs */
181
num_components = 4;
182
183
return ntt_tgsi_usage_mask(var->data.location_frac, num_components,
184
glsl_type_is_64bit(type_without_array));
185
}
186
187
static struct ureg_dst
188
ntt_store_output_decl(struct ntt_compile *c, nir_intrinsic_instr *instr, uint32_t *frac)
189
{
190
nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
191
int base = nir_intrinsic_base(instr);
192
*frac = nir_intrinsic_component(instr);
193
bool is_64 = nir_src_bit_size(instr->src[0]) == 64;
194
195
struct ureg_dst out;
196
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
197
if (semantics.location == FRAG_RESULT_COLOR)
198
ureg_property(c->ureg, TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS, 1);
199
200
unsigned semantic_name, semantic_index;
201
tgsi_get_gl_frag_result_semantic(semantics.location,
202
&semantic_name, &semantic_index);
203
semantic_index += semantics.dual_source_blend_index;
204
205
switch (semantics.location) {
206
case FRAG_RESULT_DEPTH:
207
*frac = 2; /* z write is the to the .z channel in TGSI */
208
break;
209
case FRAG_RESULT_STENCIL:
210
*frac = 1;
211
break;
212
default:
213
break;
214
}
215
216
out = ureg_DECL_output(c->ureg, semantic_name, semantic_index);
217
} else {
218
unsigned semantic_name, semantic_index;
219
220
ntt_get_gl_varying_semantic(c, semantics.location,
221
&semantic_name, &semantic_index);
222
223
uint32_t usage_mask = ntt_tgsi_usage_mask(*frac,
224
instr->num_components,
225
is_64);
226
uint32_t gs_streams = semantics.gs_streams;
227
for (int i = 0; i < 4; i++) {
228
if (!(usage_mask & (1 << i)))
229
gs_streams &= ~(0x3 << 2 * i);
230
}
231
232
/* No driver appears to use array_id of outputs. */
233
unsigned array_id = 0;
234
235
/* This bit is lost in the i/o semantics, but it's unused in in-tree
236
* drivers.
237
*/
238
bool invariant = false;
239
240
out = ureg_DECL_output_layout(c->ureg,
241
semantic_name, semantic_index,
242
gs_streams,
243
base,
244
usage_mask,
245
array_id,
246
semantics.num_slots,
247
invariant);
248
}
249
250
unsigned write_mask = nir_intrinsic_write_mask(instr);
251
252
if (is_64) {
253
write_mask = ntt_64bit_write_mask(write_mask);
254
if (*frac >= 2)
255
write_mask = write_mask << 2;
256
} else {
257
write_mask = write_mask << *frac;
258
}
259
return ureg_writemask(out, write_mask);
260
}
261
262
/* If this reg or SSA def is used only for storing an output, then in the simple
263
* cases we can write directly to the TGSI output instead of having store_output
264
* emit its own MOV.
265
*/
266
static bool
267
ntt_try_store_in_tgsi_output(struct ntt_compile *c, struct ureg_dst *dst,
268
struct list_head *uses, struct list_head *if_uses)
269
{
270
*dst = ureg_dst_undef();
271
272
switch (c->s->info.stage) {
273
case MESA_SHADER_FRAGMENT:
274
case MESA_SHADER_VERTEX:
275
break;
276
default:
277
/* tgsi_exec (at least) requires that output stores happen per vertex
278
* emitted, you don't get to reuse a previous output value for the next
279
* vertex.
280
*/
281
return false;
282
}
283
284
if (!list_is_empty(if_uses) || !list_is_singular(uses))
285
return false;
286
287
nir_src *src = list_first_entry(uses, nir_src, use_link);
288
289
if (src->parent_instr->type != nir_instr_type_intrinsic)
290
return false;
291
292
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src->parent_instr);
293
if (intr->intrinsic != nir_intrinsic_store_output ||
294
!nir_src_is_const(intr->src[1])) {
295
return false;
296
}
297
298
uint32_t frac;
299
*dst = ntt_store_output_decl(c, intr, &frac);
300
dst->Index += ntt_src_as_uint(c, intr->src[1]);
301
302
return frac == 0;
303
}
304
305
static void
306
ntt_setup_inputs(struct ntt_compile *c)
307
{
308
if (c->s->info.stage != MESA_SHADER_FRAGMENT)
309
return;
310
311
unsigned num_inputs = 0;
312
int num_input_arrays = 0;
313
314
nir_foreach_shader_in_variable(var, c->s) {
315
const struct glsl_type *type = ntt_shader_input_type(c, var);
316
unsigned array_len =
317
glsl_count_attribute_slots(type, false);
318
319
num_inputs = MAX2(num_inputs, var->data.driver_location + array_len);
320
}
321
322
c->input_index_map = ralloc_array(c, struct ureg_src, num_inputs);
323
324
nir_foreach_shader_in_variable(var, c->s) {
325
const struct glsl_type *type = ntt_shader_input_type(c, var);
326
unsigned array_len =
327
glsl_count_attribute_slots(type, false);
328
329
unsigned interpolation = TGSI_INTERPOLATE_CONSTANT;
330
unsigned sample_loc;
331
struct ureg_src decl;
332
333
if (c->s->info.stage == MESA_SHADER_FRAGMENT) {
334
interpolation =
335
tgsi_get_interp_mode(var->data.interpolation,
336
var->data.location == VARYING_SLOT_COL0 ||
337
var->data.location == VARYING_SLOT_COL1);
338
339
if (var->data.location == VARYING_SLOT_POS)
340
interpolation = TGSI_INTERPOLATE_LINEAR;
341
}
342
343
unsigned semantic_name, semantic_index;
344
ntt_get_gl_varying_semantic(c, var->data.location,
345
&semantic_name, &semantic_index);
346
347
if (var->data.sample) {
348
sample_loc = TGSI_INTERPOLATE_LOC_SAMPLE;
349
} else if (var->data.centroid) {
350
sample_loc = TGSI_INTERPOLATE_LOC_CENTROID;
351
c->centroid_inputs |= (BITSET_MASK(array_len) <<
352
var->data.driver_location);
353
} else {
354
sample_loc = TGSI_INTERPOLATE_LOC_CENTER;
355
}
356
357
unsigned array_id = 0;
358
if (glsl_type_is_array(type))
359
array_id = ++num_input_arrays;
360
361
uint32_t usage_mask = ntt_tgsi_var_usage_mask(var);
362
363
decl = ureg_DECL_fs_input_cyl_centroid_layout(c->ureg,
364
semantic_name,
365
semantic_index,
366
interpolation,
367
0,
368
sample_loc,
369
var->data.driver_location,
370
usage_mask,
371
array_id, array_len);
372
373
if (semantic_name == TGSI_SEMANTIC_FACE) {
374
struct ureg_dst temp = ureg_DECL_temporary(c->ureg);
375
/* NIR is ~0 front and 0 back, while TGSI is +1 front */
376
ureg_SGE(c->ureg, temp, decl, ureg_imm1f(c->ureg, 0));
377
decl = ureg_src(temp);
378
}
379
380
for (unsigned i = 0; i < array_len; i++) {
381
c->input_index_map[var->data.driver_location + i] = decl;
382
c->input_index_map[var->data.driver_location + i].Index += i;
383
}
384
}
385
}
386
387
static void
388
ntt_setup_uniforms(struct ntt_compile *c)
389
{
390
struct pipe_screen *screen = c->screen;
391
bool packed = screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS);
392
393
nir_foreach_uniform_variable(var, c->s) {
394
if (glsl_type_is_image(var->type)) {
395
c->images[var->data.binding] = ureg_DECL_image(c->ureg,
396
var->data.binding,
397
TGSI_TEXTURE_2D,
398
var->data.image.format,
399
!var->data.read_only,
400
false);
401
} else {
402
unsigned size;
403
if (packed) {
404
size = DIV_ROUND_UP(glsl_count_dword_slots(var->type,
405
var->data.bindless), 4);
406
} else {
407
size = glsl_count_vec4_slots(var->type, false, var->data.bindless);
408
}
409
410
for (unsigned i = 0; i < size; i++)
411
ureg_DECL_constant(c->ureg, var->data.driver_location + i);
412
}
413
}
414
415
nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ubo) {
416
ureg_DECL_constant2D(c->ureg, 0, 0, var->data.driver_location);
417
}
418
419
nir_foreach_variable_with_modes(var, c->s, nir_var_mem_ssbo) {
420
/* XXX: nv50 uses the atomic flag to set caching for (lowered) atomic
421
* counters
422
*/
423
bool atomic = false;
424
ureg_DECL_buffer(c->ureg, var->data.binding, atomic);
425
}
426
427
for (int i = 0; i < PIPE_MAX_SAMPLERS; i++) {
428
if (BITSET_TEST(c->s->info.textures_used, i))
429
ureg_DECL_sampler(c->ureg, i);
430
}
431
}
432
433
static void
434
ntt_setup_registers(struct ntt_compile *c, struct exec_list *list)
435
{
436
foreach_list_typed(nir_register, nir_reg, node, list) {
437
struct ureg_dst decl;
438
if (nir_reg->num_array_elems == 0) {
439
uint32_t write_mask = BITFIELD_MASK(nir_reg->num_components);
440
if (!ntt_try_store_in_tgsi_output(c, &decl, &nir_reg->uses, &nir_reg->if_uses)) {
441
if (nir_reg->bit_size == 64) {
442
if (nir_reg->num_components > 2) {
443
fprintf(stderr, "NIR-to-TGSI: error: %d-component NIR r%d\n",
444
nir_reg->num_components, nir_reg->index);
445
}
446
447
write_mask = ntt_64bit_write_mask(write_mask);
448
}
449
450
decl = ureg_writemask(ureg_DECL_temporary(c->ureg), write_mask);
451
}
452
} else {
453
decl = ureg_DECL_array_temporary(c->ureg, nir_reg->num_array_elems,
454
true);
455
}
456
c->reg_temp[nir_reg->index] = decl;
457
}
458
}
459
460
static struct ureg_src
461
ntt_get_load_const_src(struct ntt_compile *c, nir_load_const_instr *instr)
462
{
463
int num_components = instr->def.num_components;
464
465
if (!c->native_integers) {
466
float values[4];
467
assert(instr->def.bit_size == 32);
468
for (int i = 0; i < num_components; i++)
469
values[i] = uif(instr->value[i].u32);
470
471
return ureg_DECL_immediate(c->ureg, values, num_components);
472
} else {
473
uint32_t values[4];
474
475
if (instr->def.bit_size == 32) {
476
for (int i = 0; i < num_components; i++)
477
values[i] = instr->value[i].u32;
478
} else {
479
assert(num_components <= 2);
480
for (int i = 0; i < num_components; i++) {
481
values[i * 2 + 0] = instr->value[i].u64 & 0xffffffff;
482
values[i * 2 + 1] = instr->value[i].u64 >> 32;
483
}
484
num_components *= 2;
485
}
486
487
return ureg_DECL_immediate_uint(c->ureg, values, num_components);
488
}
489
}
490
491
static struct ureg_src
492
ntt_reladdr(struct ntt_compile *c, struct ureg_src addr)
493
{
494
if (c->any_reg_as_address) {
495
/* Make sure we're getting the refcounting right even on any_reg
496
* drivers.
497
*/
498
c->next_addr_reg++;
499
500
return ureg_scalar(addr, 0);
501
}
502
503
assert(c->next_addr_reg < ARRAY_SIZE(c->addr_reg));
504
505
if (!c->addr_declared[c->next_addr_reg]) {
506
c->addr_reg[c->next_addr_reg] = ureg_writemask(ureg_DECL_address(c->ureg),
507
TGSI_WRITEMASK_X);
508
c->addr_declared[c->next_addr_reg] = true;
509
}
510
511
if (c->native_integers)
512
ureg_UARL(c->ureg, c->addr_reg[c->next_addr_reg], addr);
513
else
514
ureg_ARL(c->ureg, c->addr_reg[c->next_addr_reg], addr);
515
return ureg_scalar(ureg_src(c->addr_reg[c->next_addr_reg++]), 0);
516
}
517
518
static void
519
ntt_put_reladdr(struct ntt_compile *c)
520
{
521
c->next_addr_reg--;
522
assert(c->next_addr_reg >= 0);
523
}
524
525
static void
526
ntt_reladdr_dst_put(struct ntt_compile *c, struct ureg_dst dst)
527
{
528
if (c->any_reg_as_address)
529
return;
530
531
if (dst.Indirect)
532
ntt_put_reladdr(c);
533
if (dst.DimIndirect)
534
ntt_put_reladdr(c);
535
}
536
537
static struct ureg_src
538
ntt_get_src(struct ntt_compile *c, nir_src src)
539
{
540
if (src.is_ssa) {
541
if (src.ssa->parent_instr->type == nir_instr_type_load_const)
542
return ntt_get_load_const_src(c, nir_instr_as_load_const(src.ssa->parent_instr));
543
544
return ureg_src(c->ssa_temp[src.ssa->index]);
545
} else {
546
nir_register *reg = src.reg.reg;
547
struct ureg_dst reg_temp = c->reg_temp[reg->index];
548
reg_temp.Index += src.reg.base_offset;
549
550
if (src.reg.indirect) {
551
struct ureg_src offset = ntt_get_src(c, *src.reg.indirect);
552
return ureg_src_indirect(ureg_src(reg_temp),
553
ntt_reladdr(c, offset));
554
} else {
555
return ureg_src(reg_temp);
556
}
557
}
558
}
559
560
static struct ureg_src
561
ntt_get_alu_src(struct ntt_compile *c, nir_alu_instr *instr, int i)
562
{
563
nir_alu_src src = instr->src[i];
564
struct ureg_src usrc = ntt_get_src(c, src.src);
565
566
if (nir_src_bit_size(src.src) == 64) {
567
int chan0 = 0, chan1 = 1;
568
if (nir_op_infos[instr->op].input_sizes[i] == 0) {
569
chan0 = ffs(instr->dest.write_mask) - 1;
570
chan1 = ffs(instr->dest.write_mask & ~(1 << chan0)) - 1;
571
if (chan1 == -1)
572
chan1 = chan0;
573
}
574
usrc = ureg_swizzle(usrc,
575
src.swizzle[chan0] * 2,
576
src.swizzle[chan0] * 2 + 1,
577
src.swizzle[chan1] * 2,
578
src.swizzle[chan1] * 2 + 1);
579
} else {
580
usrc = ureg_swizzle(usrc,
581
src.swizzle[0],
582
src.swizzle[1],
583
src.swizzle[2],
584
src.swizzle[3]);
585
}
586
587
if (src.abs)
588
usrc = ureg_abs(usrc);
589
if (src.negate)
590
usrc = ureg_negate(usrc);
591
592
return usrc;
593
}
594
595
/* Reswizzles a source so that the unset channels in the write mask still refer
596
* to one of the channels present in the write mask.
597
*/
598
static struct ureg_src
599
ntt_swizzle_for_write_mask(struct ureg_src src, uint32_t write_mask)
600
{
601
assert(write_mask);
602
int first_chan = ffs(write_mask) - 1;
603
return ureg_swizzle(src,
604
(write_mask & TGSI_WRITEMASK_X) ? TGSI_SWIZZLE_X : first_chan,
605
(write_mask & TGSI_WRITEMASK_Y) ? TGSI_SWIZZLE_Y : first_chan,
606
(write_mask & TGSI_WRITEMASK_Z) ? TGSI_SWIZZLE_Z : first_chan,
607
(write_mask & TGSI_WRITEMASK_W) ? TGSI_SWIZZLE_W : first_chan);
608
}
609
610
static struct ureg_dst *
611
ntt_get_ssa_def_decl(struct ntt_compile *c, nir_ssa_def *ssa)
612
{
613
uint32_t writemask = BITSET_MASK(ssa->num_components);
614
if (ssa->bit_size == 64)
615
writemask = ntt_64bit_write_mask(writemask);
616
617
struct ureg_dst dst;
618
if (!ntt_try_store_in_tgsi_output(c, &dst, &ssa->uses, &ssa->if_uses))
619
dst = ureg_DECL_temporary(c->ureg);
620
621
c->ssa_temp[ssa->index] = ureg_writemask(dst, writemask);
622
623
return &c->ssa_temp[ssa->index];
624
}
625
626
static struct ureg_dst *
627
ntt_get_dest_decl(struct ntt_compile *c, nir_dest *dest)
628
{
629
if (dest->is_ssa)
630
return ntt_get_ssa_def_decl(c, &dest->ssa);
631
else
632
return &c->reg_temp[dest->reg.reg->index];
633
}
634
635
static struct ureg_dst
636
ntt_get_dest(struct ntt_compile *c, nir_dest *dest)
637
{
638
struct ureg_dst dst = *ntt_get_dest_decl(c, dest);
639
640
if (!dest->is_ssa) {
641
dst.Index += dest->reg.base_offset;
642
643
if (dest->reg.indirect) {
644
struct ureg_src offset = ntt_get_src(c, *dest->reg.indirect);
645
dst = ureg_dst_indirect(dst, ntt_reladdr(c, offset));
646
}
647
}
648
649
return dst;
650
}
651
652
/* For an SSA dest being populated by a constant src, replace the storage with
653
* a copy of the ureg_src.
654
*/
655
static void
656
ntt_store_def(struct ntt_compile *c, nir_ssa_def *def, struct ureg_src src)
657
{
658
if (!src.Negate && !src.Absolute && !src.Indirect && !src.DimIndirect &&
659
src.SwizzleX == TGSI_SWIZZLE_X &&
660
(src.SwizzleY == TGSI_SWIZZLE_Y || def->num_components < 2) &&
661
(src.SwizzleZ == TGSI_SWIZZLE_Z || def->num_components < 3) &&
662
(src.SwizzleW == TGSI_SWIZZLE_W || def->num_components < 4)) {
663
switch (src.File) {
664
case TGSI_FILE_IMMEDIATE:
665
case TGSI_FILE_INPUT:
666
case TGSI_FILE_CONSTANT:
667
case TGSI_FILE_SYSTEM_VALUE:
668
c->ssa_temp[def->index] = ureg_dst(src);
669
return;
670
}
671
}
672
673
ureg_MOV(c->ureg, *ntt_get_ssa_def_decl(c, def), src);
674
}
675
676
static void
677
ntt_store(struct ntt_compile *c, nir_dest *dest, struct ureg_src src)
678
{
679
if (dest->is_ssa)
680
ntt_store_def(c, &dest->ssa, src);
681
else {
682
struct ureg_dst dst = ntt_get_dest(c, dest);
683
ureg_MOV(c->ureg, dst, src);
684
}
685
}
686
687
static void
688
ntt_emit_scalar(struct ntt_compile *c, unsigned tgsi_op,
689
struct ureg_dst dst,
690
struct ureg_src src0,
691
struct ureg_src src1)
692
{
693
unsigned i;
694
int num_src;
695
696
/* POW is the only 2-operand scalar op. */
697
if (tgsi_op == TGSI_OPCODE_POW) {
698
num_src = 2;
699
} else {
700
num_src = 1;
701
src1 = src0;
702
}
703
704
for (i = 0; i < 4; i++) {
705
if (dst.WriteMask & (1 << i)) {
706
struct ureg_dst this_dst = dst;
707
struct ureg_src srcs[2] = {
708
ureg_scalar(src0, i),
709
ureg_scalar(src1, i),
710
};
711
this_dst.WriteMask = (1 << i);
712
713
ureg_insn(c->ureg, tgsi_op, &this_dst, 1, srcs, num_src, false);
714
}
715
}
716
}
717
718
static void
719
ntt_emit_alu(struct ntt_compile *c, nir_alu_instr *instr)
720
{
721
struct ureg_src src[4];
722
struct ureg_dst dst;
723
unsigned i;
724
int dst_64 = nir_dest_bit_size(instr->dest.dest) == 64;
725
int src_64 = nir_src_bit_size(instr->src[0].src) == 64;
726
int num_srcs = nir_op_infos[instr->op].num_inputs;
727
728
assert(num_srcs <= ARRAY_SIZE(src));
729
for (i = 0; i < num_srcs; i++)
730
src[i] = ntt_get_alu_src(c, instr, i);
731
dst = ntt_get_dest(c, &instr->dest.dest);
732
733
if (instr->dest.saturate)
734
dst.Saturate = true;
735
736
if (dst_64)
737
dst = ureg_writemask(dst, ntt_64bit_write_mask(instr->dest.write_mask));
738
else
739
dst = ureg_writemask(dst, instr->dest.write_mask);
740
741
static enum tgsi_opcode op_map[][2] = {
742
[nir_op_mov] = { TGSI_OPCODE_MOV, TGSI_OPCODE_MOV },
743
744
/* fabs/fneg 32-bit are special-cased below. */
745
[nir_op_fabs] = { 0, TGSI_OPCODE_DABS },
746
[nir_op_fneg] = { 0, TGSI_OPCODE_DNEG },
747
748
[nir_op_fdot2] = { TGSI_OPCODE_DP2 },
749
[nir_op_fdot3] = { TGSI_OPCODE_DP3 },
750
[nir_op_fdot4] = { TGSI_OPCODE_DP4 },
751
[nir_op_ffloor] = { TGSI_OPCODE_FLR, TGSI_OPCODE_DFLR },
752
[nir_op_ffract] = { TGSI_OPCODE_FRC, TGSI_OPCODE_DFRAC },
753
[nir_op_fceil] = { TGSI_OPCODE_CEIL, TGSI_OPCODE_DCEIL },
754
[nir_op_fround_even] = { TGSI_OPCODE_ROUND, TGSI_OPCODE_DROUND },
755
[nir_op_fdiv] = { TGSI_OPCODE_DIV, TGSI_OPCODE_DDIV },
756
[nir_op_idiv] = { TGSI_OPCODE_IDIV, TGSI_OPCODE_I64DIV },
757
[nir_op_udiv] = { TGSI_OPCODE_UDIV, TGSI_OPCODE_U64DIV },
758
759
[nir_op_frcp] = { 0, TGSI_OPCODE_DRCP },
760
[nir_op_frsq] = { 0, TGSI_OPCODE_DRSQ },
761
[nir_op_fsqrt] = { 0, TGSI_OPCODE_DSQRT },
762
763
/* The conversions will have one combination of src and dst bitsize. */
764
[nir_op_f2f32] = { 0, TGSI_OPCODE_D2F },
765
[nir_op_f2f64] = { TGSI_OPCODE_F2D },
766
[nir_op_i2i64] = { TGSI_OPCODE_I2I64 },
767
768
[nir_op_f2i32] = { TGSI_OPCODE_F2I, TGSI_OPCODE_D2I },
769
[nir_op_f2i64] = { TGSI_OPCODE_F2I64, TGSI_OPCODE_D2I64 },
770
[nir_op_f2u32] = { TGSI_OPCODE_F2U, TGSI_OPCODE_D2U },
771
[nir_op_f2u64] = { TGSI_OPCODE_F2U64, TGSI_OPCODE_D2U64 },
772
[nir_op_i2f32] = { TGSI_OPCODE_I2F, TGSI_OPCODE_I642F },
773
[nir_op_i2f64] = { TGSI_OPCODE_I2D, TGSI_OPCODE_I642D },
774
[nir_op_u2f32] = { TGSI_OPCODE_U2F, TGSI_OPCODE_U642F },
775
[nir_op_u2f64] = { TGSI_OPCODE_U2D, TGSI_OPCODE_U642D },
776
777
[nir_op_slt] = { TGSI_OPCODE_SLT },
778
[nir_op_sge] = { TGSI_OPCODE_SGE },
779
[nir_op_seq] = { TGSI_OPCODE_SEQ },
780
[nir_op_sne] = { TGSI_OPCODE_SNE },
781
782
[nir_op_flt32] = { TGSI_OPCODE_FSLT, TGSI_OPCODE_DSLT },
783
[nir_op_fge32] = { TGSI_OPCODE_FSGE, TGSI_OPCODE_DSGE },
784
[nir_op_feq32] = { TGSI_OPCODE_FSEQ, TGSI_OPCODE_DSEQ },
785
[nir_op_fneu32] = { TGSI_OPCODE_FSNE, TGSI_OPCODE_DSNE },
786
787
[nir_op_ilt32] = { TGSI_OPCODE_ISLT, TGSI_OPCODE_I64SLT },
788
[nir_op_ige32] = { TGSI_OPCODE_ISGE, TGSI_OPCODE_I64SGE },
789
[nir_op_ieq32] = { TGSI_OPCODE_USEQ, TGSI_OPCODE_U64SEQ },
790
[nir_op_ine32] = { TGSI_OPCODE_USNE, TGSI_OPCODE_U64SNE },
791
792
[nir_op_ult32] = { TGSI_OPCODE_USLT, TGSI_OPCODE_U64SLT },
793
[nir_op_uge32] = { TGSI_OPCODE_USGE, TGSI_OPCODE_U64SGE },
794
795
[nir_op_iabs] = { TGSI_OPCODE_IABS, TGSI_OPCODE_I64ABS },
796
[nir_op_ineg] = { TGSI_OPCODE_INEG, TGSI_OPCODE_I64NEG },
797
[nir_op_fsign] = { TGSI_OPCODE_SSG },
798
[nir_op_isign] = { TGSI_OPCODE_ISSG },
799
[nir_op_ftrunc] = { TGSI_OPCODE_TRUNC, TGSI_OPCODE_DTRUNC },
800
[nir_op_fddx] = { TGSI_OPCODE_DDX },
801
[nir_op_fddy] = { TGSI_OPCODE_DDY },
802
[nir_op_fddx_coarse] = { TGSI_OPCODE_DDX },
803
[nir_op_fddy_coarse] = { TGSI_OPCODE_DDY },
804
[nir_op_fddx_fine] = { TGSI_OPCODE_DDX_FINE },
805
[nir_op_fddy_fine] = { TGSI_OPCODE_DDY_FINE },
806
[nir_op_pack_half_2x16] = { TGSI_OPCODE_PK2H },
807
[nir_op_unpack_half_2x16] = { TGSI_OPCODE_UP2H },
808
[nir_op_ibitfield_extract] = { TGSI_OPCODE_IBFE },
809
[nir_op_ubitfield_extract] = { TGSI_OPCODE_UBFE },
810
[nir_op_bitfield_insert] = { TGSI_OPCODE_BFI },
811
[nir_op_bitfield_reverse] = { TGSI_OPCODE_BREV },
812
[nir_op_bit_count] = { TGSI_OPCODE_POPC },
813
[nir_op_ifind_msb] = { TGSI_OPCODE_IMSB },
814
[nir_op_ufind_msb] = { TGSI_OPCODE_UMSB },
815
[nir_op_find_lsb] = { TGSI_OPCODE_LSB },
816
[nir_op_fadd] = { TGSI_OPCODE_ADD, TGSI_OPCODE_DADD },
817
[nir_op_iadd] = { TGSI_OPCODE_UADD, TGSI_OPCODE_U64ADD },
818
[nir_op_fmul] = { TGSI_OPCODE_MUL, TGSI_OPCODE_DMUL },
819
[nir_op_imul] = { TGSI_OPCODE_UMUL, TGSI_OPCODE_U64MUL },
820
[nir_op_imod] = { TGSI_OPCODE_MOD, TGSI_OPCODE_I64MOD },
821
[nir_op_umod] = { TGSI_OPCODE_UMOD, TGSI_OPCODE_U64MOD },
822
[nir_op_imul_high] = { TGSI_OPCODE_IMUL_HI },
823
[nir_op_umul_high] = { TGSI_OPCODE_UMUL_HI },
824
[nir_op_ishl] = { TGSI_OPCODE_SHL, TGSI_OPCODE_U64SHL },
825
[nir_op_ishr] = { TGSI_OPCODE_ISHR, TGSI_OPCODE_I64SHR },
826
[nir_op_ushr] = { TGSI_OPCODE_USHR, TGSI_OPCODE_U64SHR },
827
828
/* These bitwise ops don't care about 32 vs 64 types, so they have the
829
* same TGSI op.
830
*/
831
[nir_op_inot] = { TGSI_OPCODE_NOT, TGSI_OPCODE_NOT },
832
[nir_op_iand] = { TGSI_OPCODE_AND, TGSI_OPCODE_AND },
833
[nir_op_ior] = { TGSI_OPCODE_OR, TGSI_OPCODE_OR },
834
[nir_op_ixor] = { TGSI_OPCODE_XOR, TGSI_OPCODE_XOR },
835
836
[nir_op_fmin] = { TGSI_OPCODE_MIN, TGSI_OPCODE_DMIN },
837
[nir_op_imin] = { TGSI_OPCODE_IMIN, TGSI_OPCODE_I64MIN },
838
[nir_op_umin] = { TGSI_OPCODE_UMIN, TGSI_OPCODE_U64MIN },
839
[nir_op_fmax] = { TGSI_OPCODE_MAX, TGSI_OPCODE_DMAX },
840
[nir_op_imax] = { TGSI_OPCODE_IMAX, TGSI_OPCODE_I64MAX },
841
[nir_op_umax] = { TGSI_OPCODE_UMAX, TGSI_OPCODE_U64MAX },
842
[nir_op_ffma] = { TGSI_OPCODE_MAD, TGSI_OPCODE_DMAD },
843
[nir_op_ldexp] = { TGSI_OPCODE_LDEXP, 0 },
844
};
845
846
/* TGSI's 64 bit compares storing to 32-bit are weird and write .xz instead
847
* of .xy. Store to a temp and move it to the real dst.
848
*/
849
bool tgsi_64bit_compare = src_64 && !dst_64 &&
850
(num_srcs == 2 ||
851
nir_op_infos[instr->op].output_type == nir_type_bool32) &&
852
(dst.WriteMask != TGSI_WRITEMASK_X);
853
854
/* TGSI 64bit-to-32-bit conversions only generate results in the .xy
855
* channels and will need to get fixed up.
856
*/
857
bool tgsi_64bit_downconvert = (src_64 && !dst_64 &&
858
num_srcs == 1 && !tgsi_64bit_compare &&
859
(dst.WriteMask & ~TGSI_WRITEMASK_XY));
860
861
struct ureg_dst real_dst = ureg_dst_undef();
862
if (tgsi_64bit_compare || tgsi_64bit_downconvert) {
863
real_dst = dst;
864
dst = ureg_DECL_temporary(c->ureg);
865
}
866
867
bool table_op64 = src_64;
868
if (instr->op < ARRAY_SIZE(op_map) && op_map[instr->op][table_op64] != 0) {
869
/* The normal path for NIR to TGSI ALU op translation */
870
ureg_insn(c->ureg, op_map[instr->op][table_op64],
871
&dst, 1, src, num_srcs, false);
872
} else {
873
/* Special cases for NIR to TGSI ALU op translation. */
874
875
/* TODO: Use something like the ntt_store() path for the MOV calls so we
876
* don't emit extra MOVs for swizzles/srcmods of inputs/const/imm.
877
*/
878
879
switch (instr->op) {
880
case nir_op_u2u64:
881
ureg_AND(c->ureg, dst, ureg_swizzle(src[0],
882
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
883
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
884
ureg_imm4u(c->ureg, ~0, 0, ~0, 0));
885
break;
886
887
case nir_op_i2i32:
888
case nir_op_u2u32:
889
assert(src_64);
890
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
891
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
892
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X));
893
break;
894
895
case nir_op_fabs:
896
ureg_MOV(c->ureg, dst, ureg_abs(src[0]));
897
break;
898
899
case nir_op_fsat:
900
if (dst_64) {
901
ureg_MIN(c->ureg, dst, src[0], ntt_64bit_1f(c));
902
ureg_MAX(c->ureg, dst, ureg_src(dst), ureg_imm1u(c->ureg, 0));
903
} else {
904
ureg_MOV(c->ureg, ureg_saturate(dst), src[0]);
905
}
906
break;
907
908
case nir_op_fneg:
909
ureg_MOV(c->ureg, dst, ureg_negate(src[0]));
910
break;
911
912
/* NOTE: TGSI 32-bit math ops have the old "one source channel
913
* replicated to all dst channels" behavior, while 64 is normal mapping
914
* of src channels to dst.
915
*/
916
case nir_op_frcp:
917
assert(!dst_64);
918
ntt_emit_scalar(c, TGSI_OPCODE_RCP, dst, src[0], src[1]);
919
break;
920
921
case nir_op_frsq:
922
assert(!dst_64);
923
ntt_emit_scalar(c, TGSI_OPCODE_RSQ, dst, src[0], src[1]);
924
break;
925
926
case nir_op_fsqrt:
927
assert(!dst_64);
928
ntt_emit_scalar(c, TGSI_OPCODE_SQRT, dst, src[0], src[1]);
929
break;
930
931
case nir_op_fexp2:
932
assert(!dst_64);
933
ntt_emit_scalar(c, TGSI_OPCODE_EX2, dst, src[0], src[1]);
934
break;
935
936
case nir_op_flog2:
937
assert(!dst_64);
938
ntt_emit_scalar(c, TGSI_OPCODE_LG2, dst, src[0], src[1]);
939
break;
940
941
case nir_op_b2f32:
942
ureg_AND(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 1.0));
943
break;
944
945
case nir_op_b2f64:
946
ureg_AND(c->ureg, dst,
947
ureg_swizzle(src[0],
948
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
949
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
950
ntt_64bit_1f(c));
951
break;
952
953
case nir_op_f2b32:
954
if (src_64)
955
ureg_DSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
956
else
957
ureg_FSNE(c->ureg, dst, src[0], ureg_imm1f(c->ureg, 0));
958
break;
959
960
case nir_op_i2b32:
961
if (src_64) {
962
ureg_U64SNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
963
} else
964
ureg_USNE(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 0));
965
break;
966
967
case nir_op_b2i32:
968
ureg_AND(c->ureg, dst, src[0], ureg_imm1u(c->ureg, 1));
969
break;
970
971
case nir_op_b2i64:
972
ureg_AND(c->ureg, dst,
973
ureg_swizzle(src[0],
974
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
975
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
976
ureg_imm4u(c->ureg, 1, 0, 1, 0));
977
break;
978
979
case nir_op_fsin:
980
ntt_emit_scalar(c, TGSI_OPCODE_SIN, dst, src[0], src[1]);
981
break;
982
983
case nir_op_fcos:
984
ntt_emit_scalar(c, TGSI_OPCODE_COS, dst, src[0], src[1]);
985
break;
986
987
case nir_op_fsub:
988
assert(!dst_64);
989
ureg_ADD(c->ureg, dst, src[0], ureg_negate(src[1]));
990
break;
991
992
case nir_op_isub:
993
assert(!dst_64);
994
ureg_UADD(c->ureg, dst, src[0], ureg_negate(src[1]));
995
break;
996
997
case nir_op_fmod:
998
unreachable("should be handled by .lower_fmod = true");
999
break;
1000
1001
case nir_op_fpow:
1002
ntt_emit_scalar(c, TGSI_OPCODE_POW, dst, src[0], src[1]);
1003
break;
1004
1005
case nir_op_flrp:
1006
ureg_LRP(c->ureg, dst, src[2], src[1], src[0]);
1007
break;
1008
1009
case nir_op_pack_64_2x32_split:
1010
ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_XZ),
1011
ureg_swizzle(src[0],
1012
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
1013
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
1014
ureg_MOV(c->ureg, ureg_writemask(dst, TGSI_WRITEMASK_YW),
1015
ureg_swizzle(src[1],
1016
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
1017
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
1018
break;
1019
1020
case nir_op_unpack_64_2x32_split_x:
1021
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
1022
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
1023
TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z));
1024
break;
1025
1026
case nir_op_unpack_64_2x32_split_y:
1027
ureg_MOV(c->ureg, dst, ureg_swizzle(src[0],
1028
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W,
1029
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W));
1030
break;
1031
1032
case nir_op_b32csel:
1033
if (nir_src_bit_size(instr->src[1].src) == 64) {
1034
ureg_UCMP(c->ureg, dst, ureg_swizzle(src[0],
1035
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
1036
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y),
1037
src[1], src[2]);
1038
} else {
1039
ureg_UCMP(c->ureg, dst, src[0], src[1], src[2]);
1040
}
1041
break;
1042
1043
case nir_op_fcsel:
1044
/* NIR is src0 != 0 ? src1 : src2.
1045
* TGSI is src0 < 0 ? src1 : src2.
1046
*
1047
* However, fcsel so far as I can find only appears on
1048
* bools-as-floats (1.0 or 0.0), so we can negate it for the TGSI op.
1049
*/
1050
ureg_CMP(c->ureg, dst, ureg_negate(ureg_abs(src[0])), src[1], src[2]);
1051
break;
1052
1053
/* It would be nice if we could get this left as scalar in NIR, since
1054
* the TGSI op is scalar.
1055
*/
1056
case nir_op_frexp_sig:
1057
case nir_op_frexp_exp: {
1058
assert(src_64);
1059
struct ureg_dst temp = ureg_DECL_temporary(c->ureg);
1060
1061
for (int chan = 0; chan < 2; chan++) {
1062
int wm = 1 << chan;
1063
1064
if (!(instr->dest.write_mask & wm))
1065
continue;
1066
1067
struct ureg_dst dsts[2] = { temp, temp };
1068
if (instr->op == nir_op_frexp_sig) {
1069
dsts[0] = ureg_writemask(dst, ntt_64bit_write_mask(wm));
1070
} else {
1071
dsts[1] = ureg_writemask(dst, wm);
1072
}
1073
1074
struct ureg_src chan_src = ureg_swizzle(src[0],
1075
chan * 2, chan * 2 + 1,
1076
chan * 2, chan * 2 + 1);
1077
1078
ureg_insn(c->ureg, TGSI_OPCODE_DFRACEXP,
1079
dsts, 2,
1080
&chan_src, 1, false);
1081
}
1082
1083
ureg_release_temporary(c->ureg, temp);
1084
break;
1085
}
1086
1087
case nir_op_ldexp:
1088
assert(dst_64); /* 32bit handled in table. */
1089
ureg_DLDEXP(c->ureg, dst, src[0],
1090
ureg_swizzle(src[1],
1091
TGSI_SWIZZLE_X, TGSI_SWIZZLE_X,
1092
TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y));
1093
break;
1094
1095
case nir_op_vec4:
1096
case nir_op_vec3:
1097
case nir_op_vec2:
1098
unreachable("covered by nir_lower_vec_to_movs()");
1099
1100
default:
1101
fprintf(stderr, "Unknown NIR opcode: %s\n", nir_op_infos[instr->op].name);
1102
unreachable("Unknown NIR opcode");
1103
}
1104
}
1105
1106
/* 64-bit op fixup movs */
1107
if (!ureg_dst_is_undef(real_dst)) {
1108
if (tgsi_64bit_compare) {
1109
ureg_MOV(c->ureg, real_dst,
1110
ureg_swizzle(ureg_src(dst), 0, 2, 0, 2));
1111
} else {
1112
assert(tgsi_64bit_downconvert);
1113
uint8_t swizzle[] = {0, 0, 0, 0};
1114
uint32_t second_bit = real_dst.WriteMask & ~(1 << (ffs(real_dst.WriteMask) - 1));
1115
if (second_bit)
1116
swizzle[ffs(second_bit) - 1] = 1;
1117
ureg_MOV(c->ureg, real_dst, ureg_swizzle(ureg_src(dst),
1118
swizzle[0],
1119
swizzle[1],
1120
swizzle[2],
1121
swizzle[3]));
1122
}
1123
ureg_release_temporary(c->ureg, dst);
1124
}
1125
}
1126
1127
static struct ureg_src
1128
ntt_ureg_src_indirect(struct ntt_compile *c, struct ureg_src usrc,
1129
nir_src src)
1130
{
1131
if (nir_src_is_const(src)) {
1132
usrc.Index += ntt_src_as_uint(c, src);
1133
return usrc;
1134
} else {
1135
return ureg_src_indirect(usrc, ntt_reladdr(c, ntt_get_src(c, src)));
1136
}
1137
}
1138
1139
static struct ureg_dst
1140
ntt_ureg_dst_indirect(struct ntt_compile *c, struct ureg_dst dst,
1141
nir_src src)
1142
{
1143
if (nir_src_is_const(src)) {
1144
dst.Index += ntt_src_as_uint(c, src);
1145
return dst;
1146
} else {
1147
return ureg_dst_indirect(dst, ntt_reladdr(c, ntt_get_src(c, src)));
1148
}
1149
}
1150
1151
static struct ureg_src
1152
ntt_ureg_src_dimension_indirect(struct ntt_compile *c, struct ureg_src usrc,
1153
nir_src src)
1154
{
1155
if (nir_src_is_const(src)) {
1156
return ureg_src_dimension(usrc, ntt_src_as_uint(c, src));
1157
}
1158
else
1159
{
1160
return ureg_src_dimension_indirect(usrc,
1161
ntt_reladdr(c, ntt_get_src(c, src)),
1162
0);
1163
}
1164
}
1165
1166
static struct ureg_dst
1167
ntt_ureg_dst_dimension_indirect(struct ntt_compile *c, struct ureg_dst udst,
1168
nir_src src)
1169
{
1170
if (nir_src_is_const(src)) {
1171
return ureg_dst_dimension(udst, ntt_src_as_uint(c, src));
1172
} else {
1173
return ureg_dst_dimension_indirect(udst,
1174
ntt_reladdr(c, ntt_get_src(c, src)),
1175
0);
1176
}
1177
}
1178
/* Some load operations in NIR will have a fractional offset that we need to
1179
* swizzle down before storing to the result register.
1180
*/
1181
static struct ureg_src
1182
ntt_shift_by_frac(struct ureg_src src, unsigned frac, unsigned num_components)
1183
{
1184
return ureg_swizzle(src,
1185
frac,
1186
frac + MIN2(num_components - 1, 1),
1187
frac + MIN2(num_components - 1, 2),
1188
frac + MIN2(num_components - 1, 3));
1189
}
1190
1191
1192
static void
1193
ntt_emit_load_ubo(struct ntt_compile *c, nir_intrinsic_instr *instr)
1194
{
1195
int bit_size = nir_dest_bit_size(instr->dest);
1196
assert(bit_size == 32 || instr->num_components <= 2);
1197
1198
struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
1199
1200
src = ntt_ureg_src_dimension_indirect(c, src, instr->src[0]);
1201
1202
if (instr->intrinsic == nir_intrinsic_load_ubo_vec4) {
1203
/* !PIPE_CAP_LOAD_CONSTBUF: Just emit it as a vec4 reference to the const
1204
* file.
1205
*/
1206
1207
if (nir_src_is_const(instr->src[1])) {
1208
src.Index += ntt_src_as_uint(c, instr->src[1]);
1209
} else {
1210
src = ureg_src_indirect(src, ntt_reladdr(c, ntt_get_src(c, instr->src[1])));
1211
}
1212
1213
int start_component = nir_intrinsic_component(instr);
1214
if (bit_size == 64)
1215
start_component *= 2;
1216
1217
src = ntt_shift_by_frac(src, start_component,
1218
instr->num_components * bit_size / 32);
1219
1220
ntt_store(c, &instr->dest, src);
1221
} else {
1222
/* PIPE_CAP_LOAD_CONSTBUF: Not necessarily vec4 aligned, emit a
1223
* TGSI_OPCODE_LOAD instruction from the const file.
1224
*/
1225
struct ureg_dst dst = ntt_get_dest(c, &instr->dest);
1226
struct ureg_src srcs[2] = {
1227
src,
1228
ntt_get_src(c, instr->src[1]),
1229
};
1230
ureg_memory_insn(c->ureg, TGSI_OPCODE_LOAD,
1231
&dst, 1,
1232
srcs, ARRAY_SIZE(srcs),
1233
0 /* qualifier */,
1234
0 /* tex target */,
1235
0 /* format: unused */
1236
);
1237
}
1238
}
1239
1240
static unsigned
1241
ntt_get_access_qualifier(nir_intrinsic_instr *instr)
1242
{
1243
enum gl_access_qualifier access = nir_intrinsic_access(instr);
1244
unsigned qualifier = 0;
1245
1246
if (access & ACCESS_COHERENT)
1247
qualifier |= TGSI_MEMORY_COHERENT;
1248
if (access & ACCESS_VOLATILE)
1249
qualifier |= TGSI_MEMORY_VOLATILE;
1250
if (access & ACCESS_RESTRICT)
1251
qualifier |= TGSI_MEMORY_RESTRICT;
1252
1253
return qualifier;
1254
}
1255
1256
static void
1257
ntt_emit_mem(struct ntt_compile *c, nir_intrinsic_instr *instr,
1258
nir_variable_mode mode)
1259
{
1260
bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
1261
instr->intrinsic == nir_intrinsic_store_shared);
1262
bool is_load = (instr->intrinsic == nir_intrinsic_load_ssbo ||
1263
instr->intrinsic == nir_intrinsic_load_shared);
1264
unsigned opcode;
1265
struct ureg_src src[4];
1266
int num_src = 0;
1267
int nir_src;
1268
1269
struct ureg_src memory;
1270
switch (mode) {
1271
case nir_var_mem_ssbo:
1272
memory = ntt_ureg_src_indirect(c, ureg_src_register(TGSI_FILE_BUFFER, 0),
1273
instr->src[is_store ? 1 : 0]);
1274
nir_src = 1;
1275
break;
1276
case nir_var_mem_shared:
1277
memory = ureg_src_register(TGSI_FILE_MEMORY, 0);
1278
nir_src = 0;
1279
break;
1280
default:
1281
unreachable("unknown memory type");
1282
}
1283
1284
if (is_store) {
1285
src[num_src++] = ntt_get_src(c, instr->src[nir_src + 1]); /* offset */
1286
src[num_src++] = ntt_get_src(c, instr->src[0]); /* value */
1287
} else {
1288
src[num_src++] = memory;
1289
if (instr->intrinsic != nir_intrinsic_get_ssbo_size) {
1290
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* offset */
1291
if (!is_load)
1292
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]); /* value */
1293
}
1294
}
1295
1296
1297
switch (instr->intrinsic) {
1298
case nir_intrinsic_ssbo_atomic_add:
1299
case nir_intrinsic_shared_atomic_add:
1300
opcode = TGSI_OPCODE_ATOMUADD;
1301
break;
1302
case nir_intrinsic_ssbo_atomic_fadd:
1303
case nir_intrinsic_shared_atomic_fadd:
1304
opcode = TGSI_OPCODE_ATOMFADD;
1305
break;
1306
case nir_intrinsic_ssbo_atomic_imin:
1307
case nir_intrinsic_shared_atomic_imin:
1308
opcode = TGSI_OPCODE_ATOMIMIN;
1309
break;
1310
case nir_intrinsic_ssbo_atomic_imax:
1311
case nir_intrinsic_shared_atomic_imax:
1312
opcode = TGSI_OPCODE_ATOMIMAX;
1313
break;
1314
case nir_intrinsic_ssbo_atomic_umin:
1315
case nir_intrinsic_shared_atomic_umin:
1316
opcode = TGSI_OPCODE_ATOMUMIN;
1317
break;
1318
case nir_intrinsic_ssbo_atomic_umax:
1319
case nir_intrinsic_shared_atomic_umax:
1320
opcode = TGSI_OPCODE_ATOMUMAX;
1321
break;
1322
case nir_intrinsic_ssbo_atomic_and:
1323
case nir_intrinsic_shared_atomic_and:
1324
opcode = TGSI_OPCODE_ATOMAND;
1325
break;
1326
case nir_intrinsic_ssbo_atomic_or:
1327
case nir_intrinsic_shared_atomic_or:
1328
opcode = TGSI_OPCODE_ATOMOR;
1329
break;
1330
case nir_intrinsic_ssbo_atomic_xor:
1331
case nir_intrinsic_shared_atomic_xor:
1332
opcode = TGSI_OPCODE_ATOMXOR;
1333
break;
1334
case nir_intrinsic_ssbo_atomic_exchange:
1335
case nir_intrinsic_shared_atomic_exchange:
1336
opcode = TGSI_OPCODE_ATOMXCHG;
1337
break;
1338
case nir_intrinsic_ssbo_atomic_comp_swap:
1339
case nir_intrinsic_shared_atomic_comp_swap:
1340
opcode = TGSI_OPCODE_ATOMCAS;
1341
src[num_src++] = ntt_get_src(c, instr->src[nir_src++]);
1342
break;
1343
case nir_intrinsic_load_ssbo:
1344
case nir_intrinsic_load_shared:
1345
opcode = TGSI_OPCODE_LOAD;
1346
break;
1347
case nir_intrinsic_store_ssbo:
1348
case nir_intrinsic_store_shared:
1349
opcode = TGSI_OPCODE_STORE;
1350
break;
1351
case nir_intrinsic_get_ssbo_size:
1352
opcode = TGSI_OPCODE_RESQ;
1353
break;
1354
default:
1355
unreachable("unknown memory op");
1356
}
1357
1358
unsigned qualifier = 0;
1359
if (mode == nir_var_mem_ssbo &&
1360
instr->intrinsic != nir_intrinsic_get_ssbo_size) {
1361
qualifier = ntt_get_access_qualifier(instr);
1362
}
1363
1364
struct ureg_dst dst;
1365
if (is_store) {
1366
dst = ureg_dst(memory);
1367
1368
unsigned write_mask = nir_intrinsic_write_mask(instr);
1369
if (nir_src_bit_size(instr->src[0]) == 64)
1370
write_mask = ntt_64bit_write_mask(write_mask);
1371
dst = ureg_writemask(dst, write_mask);
1372
} else {
1373
dst = ntt_get_dest(c, &instr->dest);
1374
}
1375
1376
ureg_memory_insn(c->ureg, opcode,
1377
&dst, 1,
1378
src, num_src,
1379
qualifier,
1380
TGSI_TEXTURE_BUFFER,
1381
0 /* format: unused */);
1382
}
1383
1384
static enum tgsi_texture_type
1385
tgsi_target_from_sampler_dim(enum glsl_sampler_dim dim, bool is_array)
1386
{
1387
switch (dim) {
1388
case GLSL_SAMPLER_DIM_1D:
1389
return is_array ? TGSI_TEXTURE_1D_ARRAY : TGSI_TEXTURE_1D;
1390
case GLSL_SAMPLER_DIM_2D:
1391
return is_array ? TGSI_TEXTURE_2D_ARRAY : TGSI_TEXTURE_2D;
1392
case GLSL_SAMPLER_DIM_3D:
1393
return TGSI_TEXTURE_3D;
1394
case GLSL_SAMPLER_DIM_CUBE:
1395
return is_array ? TGSI_TEXTURE_CUBE_ARRAY : TGSI_TEXTURE_CUBE;
1396
case GLSL_SAMPLER_DIM_RECT:
1397
return TGSI_TEXTURE_RECT;
1398
case GLSL_SAMPLER_DIM_BUF:
1399
return TGSI_TEXTURE_BUFFER;
1400
default:
1401
unreachable("unknown sampler dim");
1402
}
1403
}
1404
1405
static void
1406
ntt_emit_image_load_store(struct ntt_compile *c, nir_intrinsic_instr *instr)
1407
{
1408
unsigned op;
1409
struct ureg_src srcs[4];
1410
int num_src = 0;
1411
enum glsl_sampler_dim dim = nir_intrinsic_image_dim(instr);
1412
bool is_array = nir_intrinsic_image_array(instr);
1413
1414
struct ureg_dst temp = ureg_dst_undef();
1415
1416
enum tgsi_texture_type target = tgsi_target_from_sampler_dim(dim, is_array);
1417
1418
struct ureg_src resource =
1419
ntt_ureg_src_indirect(c, ureg_src_register(TGSI_FILE_IMAGE, 0),
1420
instr->src[0]);
1421
1422
struct ureg_dst dst;
1423
if (instr->intrinsic == nir_intrinsic_image_store) {
1424
dst = ureg_dst(resource);
1425
} else {
1426
srcs[num_src++] = resource;
1427
dst = ntt_get_dest(c, &instr->dest);
1428
}
1429
1430
if (instr->intrinsic != nir_intrinsic_image_size) {
1431
struct ureg_src coord = ntt_get_src(c, instr->src[1]);
1432
1433
if (dim == GLSL_SAMPLER_DIM_MS) {
1434
temp = ureg_DECL_temporary(c->ureg);
1435
ureg_MOV(c->ureg, temp, coord);
1436
ureg_MOV(c->ureg, ureg_writemask(temp, 1 << (is_array ? 3 : 2)),
1437
ureg_scalar(ntt_get_src(c, instr->src[2]), TGSI_SWIZZLE_X));
1438
coord = ureg_src(temp);
1439
}
1440
srcs[num_src++] = coord;
1441
1442
if (instr->intrinsic != nir_intrinsic_image_load) {
1443
srcs[num_src++] = ntt_get_src(c, instr->src[3]); /* data */
1444
if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
1445
srcs[num_src++] = ntt_get_src(c, instr->src[4]); /* data2 */
1446
}
1447
}
1448
1449
switch (instr->intrinsic) {
1450
case nir_intrinsic_image_load:
1451
op = TGSI_OPCODE_LOAD;
1452
break;
1453
case nir_intrinsic_image_store:
1454
op = TGSI_OPCODE_STORE;
1455
break;
1456
case nir_intrinsic_image_size:
1457
op = TGSI_OPCODE_RESQ;
1458
break;
1459
case nir_intrinsic_image_atomic_add:
1460
op = TGSI_OPCODE_ATOMUADD;
1461
break;
1462
case nir_intrinsic_image_atomic_fadd:
1463
op = TGSI_OPCODE_ATOMFADD;
1464
break;
1465
case nir_intrinsic_image_atomic_imin:
1466
op = TGSI_OPCODE_ATOMIMIN;
1467
break;
1468
case nir_intrinsic_image_atomic_umin:
1469
op = TGSI_OPCODE_ATOMUMIN;
1470
break;
1471
case nir_intrinsic_image_atomic_imax:
1472
op = TGSI_OPCODE_ATOMIMAX;
1473
break;
1474
case nir_intrinsic_image_atomic_umax:
1475
op = TGSI_OPCODE_ATOMUMAX;
1476
break;
1477
case nir_intrinsic_image_atomic_and:
1478
op = TGSI_OPCODE_ATOMAND;
1479
break;
1480
case nir_intrinsic_image_atomic_or:
1481
op = TGSI_OPCODE_ATOMOR;
1482
break;
1483
case nir_intrinsic_image_atomic_xor:
1484
op = TGSI_OPCODE_ATOMXOR;
1485
break;
1486
case nir_intrinsic_image_atomic_exchange:
1487
op = TGSI_OPCODE_ATOMXCHG;
1488
break;
1489
case nir_intrinsic_image_atomic_comp_swap:
1490
op = TGSI_OPCODE_ATOMCAS;
1491
break;
1492
default:
1493
unreachable("bad op");
1494
}
1495
1496
ureg_memory_insn(c->ureg, op, &dst, 1, srcs, num_src,
1497
ntt_get_access_qualifier(instr),
1498
target,
1499
nir_intrinsic_format(instr));
1500
1501
if (!ureg_dst_is_undef(temp))
1502
ureg_release_temporary(c->ureg, temp);
1503
}
1504
1505
static void
1506
ntt_emit_load_input(struct ntt_compile *c, nir_intrinsic_instr *instr)
1507
{
1508
uint32_t frac = nir_intrinsic_component(instr);
1509
uint32_t num_components = instr->num_components;
1510
unsigned base = nir_intrinsic_base(instr);
1511
struct ureg_src input;
1512
nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
1513
bool is_64 = nir_dest_bit_size(instr->dest) == 64;
1514
1515
if (c->s->info.stage == MESA_SHADER_VERTEX) {
1516
input = ureg_DECL_vs_input(c->ureg, base);
1517
for (int i = 1; i < semantics.num_slots; i++)
1518
ureg_DECL_vs_input(c->ureg, base + i);
1519
} else if (c->s->info.stage != MESA_SHADER_FRAGMENT) {
1520
unsigned semantic_name, semantic_index;
1521
ntt_get_gl_varying_semantic(c, semantics.location,
1522
&semantic_name, &semantic_index);
1523
1524
/* XXX: ArrayID is used in r600 gs inputs */
1525
uint32_t array_id = 0;
1526
1527
input = ureg_DECL_input_layout(c->ureg,
1528
semantic_name,
1529
semantic_index,
1530
base,
1531
ntt_tgsi_usage_mask(frac,
1532
instr->num_components,
1533
is_64),
1534
array_id,
1535
semantics.num_slots);
1536
} else {
1537
input = c->input_index_map[base];
1538
}
1539
1540
if (is_64)
1541
num_components *= 2;
1542
1543
input = ntt_shift_by_frac(input, frac, num_components);
1544
1545
switch (instr->intrinsic) {
1546
case nir_intrinsic_load_input:
1547
input = ntt_ureg_src_indirect(c, input, instr->src[0]);
1548
ntt_store(c, &instr->dest, input);
1549
break;
1550
1551
case nir_intrinsic_load_per_vertex_input:
1552
input = ntt_ureg_src_indirect(c, input, instr->src[1]);
1553
input = ntt_ureg_src_dimension_indirect(c, input, instr->src[0]);
1554
ntt_store(c, &instr->dest, input);
1555
break;
1556
1557
case nir_intrinsic_load_interpolated_input: {
1558
input = ntt_ureg_src_indirect(c, input, instr->src[1]);
1559
1560
nir_intrinsic_instr *bary_instr =
1561
nir_instr_as_intrinsic(instr->src[0].ssa->parent_instr);
1562
1563
switch (bary_instr->intrinsic) {
1564
case nir_intrinsic_load_barycentric_pixel:
1565
ntt_store(c, &instr->dest, input);
1566
break;
1567
1568
case nir_intrinsic_load_barycentric_centroid:
1569
/* If the input was declared centroid, then there's no need to
1570
* emit the extra TGSI interp instruction, we can just read the
1571
* input.
1572
*/
1573
if (c->centroid_inputs & (1ull << nir_intrinsic_base(instr))) {
1574
ntt_store(c, &instr->dest, input);
1575
} else {
1576
ureg_INTERP_CENTROID(c->ureg, ntt_get_dest(c, &instr->dest),
1577
input);
1578
}
1579
break;
1580
1581
case nir_intrinsic_load_barycentric_at_sample:
1582
ureg_INTERP_SAMPLE(c->ureg, ntt_get_dest(c, &instr->dest), input,
1583
ureg_imm1u(c->ureg,
1584
ntt_src_as_uint(c, bary_instr->src[0])));
1585
break;
1586
1587
case nir_intrinsic_load_barycentric_at_offset:
1588
/* We stored the offset in the fake "bary" dest. */
1589
ureg_INTERP_OFFSET(c->ureg, ntt_get_dest(c, &instr->dest), input,
1590
ntt_get_src(c, instr->src[0]));
1591
break;
1592
1593
default:
1594
unreachable("bad barycentric interp intrinsic\n");
1595
}
1596
break;
1597
}
1598
1599
default:
1600
unreachable("bad load input intrinsic\n");
1601
}
1602
}
1603
1604
static void
1605
ntt_emit_store_output(struct ntt_compile *c, nir_intrinsic_instr *instr)
1606
{
1607
struct ureg_src src = ntt_get_src(c, instr->src[0]);
1608
1609
if (src.File == TGSI_FILE_OUTPUT) {
1610
/* If our src is the output file, that's an indication that we were able
1611
* to emit the output stores in the generating instructions and we have
1612
* nothing to do here.
1613
*/
1614
return;
1615
}
1616
1617
uint32_t frac;
1618
struct ureg_dst out = ntt_store_output_decl(c, instr, &frac);
1619
1620
if (instr->intrinsic == nir_intrinsic_store_per_vertex_output) {
1621
out = ntt_ureg_dst_indirect(c, out, instr->src[2]);
1622
out = ntt_ureg_dst_dimension_indirect(c, out, instr->src[1]);
1623
} else {
1624
out = ntt_ureg_dst_indirect(c, out, instr->src[1]);
1625
}
1626
1627
uint8_t swizzle[4] = { 0, 0, 0, 0 };
1628
for (int i = frac; i <= 4; i++) {
1629
if (out.WriteMask & (1 << i))
1630
swizzle[i] = i - frac;
1631
}
1632
1633
src = ureg_swizzle(src, swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
1634
1635
ureg_MOV(c->ureg, out, src);
1636
ntt_reladdr_dst_put(c, out);
1637
}
1638
1639
static void
1640
ntt_emit_load_sysval(struct ntt_compile *c, nir_intrinsic_instr *instr)
1641
{
1642
gl_system_value sysval = nir_system_value_from_intrinsic(instr->intrinsic);
1643
enum tgsi_semantic semantic = tgsi_get_sysval_semantic(sysval);
1644
struct ureg_src sv = ureg_DECL_system_value(c->ureg, semantic, 0);
1645
1646
/* virglrenderer doesn't like references to channels of the sysval that
1647
* aren't defined, even if they aren't really read. (GLSL compile fails on
1648
* gl_NumWorkGroups.w, for example).
1649
*/
1650
uint32_t write_mask = BITSET_MASK(nir_dest_num_components(instr->dest));
1651
sv = ntt_swizzle_for_write_mask(sv, write_mask);
1652
1653
/* TGSI and NIR define these intrinsics as always loading ints, but they can
1654
* still appear on hardware with non-native-integers fragment shaders using
1655
* the draw path (i915g). In that case, having called nir_lower_int_to_float
1656
* means that we actually want floats instead.
1657
*/
1658
if (!c->native_integers) {
1659
switch (instr->intrinsic) {
1660
case nir_intrinsic_load_vertex_id:
1661
case nir_intrinsic_load_instance_id:
1662
ureg_U2F(c->ureg, ntt_get_dest(c, &instr->dest), sv);
1663
return;
1664
1665
default:
1666
break;
1667
}
1668
}
1669
1670
ntt_store(c, &instr->dest, sv);
1671
}
1672
1673
static void
1674
ntt_emit_intrinsic(struct ntt_compile *c, nir_intrinsic_instr *instr)
1675
{
1676
switch (instr->intrinsic) {
1677
case nir_intrinsic_load_ubo:
1678
case nir_intrinsic_load_ubo_vec4:
1679
ntt_emit_load_ubo(c, instr);
1680
break;
1681
1682
/* Vertex */
1683
case nir_intrinsic_load_vertex_id:
1684
case nir_intrinsic_load_vertex_id_zero_base:
1685
case nir_intrinsic_load_base_vertex:
1686
case nir_intrinsic_load_base_instance:
1687
case nir_intrinsic_load_instance_id:
1688
case nir_intrinsic_load_draw_id:
1689
case nir_intrinsic_load_invocation_id:
1690
case nir_intrinsic_load_frag_coord:
1691
case nir_intrinsic_load_point_coord:
1692
case nir_intrinsic_load_front_face:
1693
case nir_intrinsic_load_sample_id:
1694
case nir_intrinsic_load_sample_mask_in:
1695
case nir_intrinsic_load_helper_invocation:
1696
case nir_intrinsic_load_tess_coord:
1697
case nir_intrinsic_load_patch_vertices_in:
1698
case nir_intrinsic_load_primitive_id:
1699
case nir_intrinsic_load_tess_level_outer:
1700
case nir_intrinsic_load_tess_level_inner:
1701
case nir_intrinsic_load_local_invocation_id:
1702
case nir_intrinsic_load_workgroup_id:
1703
case nir_intrinsic_load_num_workgroups:
1704
case nir_intrinsic_load_workgroup_size:
1705
case nir_intrinsic_load_subgroup_size:
1706
case nir_intrinsic_load_subgroup_invocation:
1707
case nir_intrinsic_load_subgroup_eq_mask:
1708
case nir_intrinsic_load_subgroup_ge_mask:
1709
case nir_intrinsic_load_subgroup_gt_mask:
1710
case nir_intrinsic_load_subgroup_lt_mask:
1711
ntt_emit_load_sysval(c, instr);
1712
break;
1713
1714
case nir_intrinsic_load_input:
1715
case nir_intrinsic_load_per_vertex_input:
1716
case nir_intrinsic_load_interpolated_input:
1717
ntt_emit_load_input(c, instr);
1718
break;
1719
1720
case nir_intrinsic_store_output:
1721
case nir_intrinsic_store_per_vertex_output:
1722
ntt_emit_store_output(c, instr);
1723
break;
1724
1725
case nir_intrinsic_discard:
1726
ureg_KILL(c->ureg);
1727
break;
1728
1729
case nir_intrinsic_discard_if: {
1730
struct ureg_src cond = ureg_scalar(ntt_get_src(c, instr->src[0]), 0);
1731
1732
if (c->native_integers) {
1733
struct ureg_dst temp = ureg_writemask(ureg_DECL_temporary(c->ureg), 1);
1734
ureg_AND(c->ureg, temp, cond, ureg_imm1f(c->ureg, 1.0));
1735
ureg_KILL_IF(c->ureg, ureg_scalar(ureg_negate(ureg_src(temp)), 0));
1736
ureg_release_temporary(c->ureg, temp);
1737
} else {
1738
/* For !native_integers, the bool got lowered to 1.0 or 0.0. */
1739
ureg_KILL_IF(c->ureg, ureg_negate(cond));
1740
}
1741
break;
1742
}
1743
1744
case nir_intrinsic_load_ssbo:
1745
case nir_intrinsic_store_ssbo:
1746
case nir_intrinsic_ssbo_atomic_add:
1747
case nir_intrinsic_ssbo_atomic_fadd:
1748
case nir_intrinsic_ssbo_atomic_imin:
1749
case nir_intrinsic_ssbo_atomic_imax:
1750
case nir_intrinsic_ssbo_atomic_umin:
1751
case nir_intrinsic_ssbo_atomic_umax:
1752
case nir_intrinsic_ssbo_atomic_and:
1753
case nir_intrinsic_ssbo_atomic_or:
1754
case nir_intrinsic_ssbo_atomic_xor:
1755
case nir_intrinsic_ssbo_atomic_exchange:
1756
case nir_intrinsic_ssbo_atomic_comp_swap:
1757
case nir_intrinsic_get_ssbo_size:
1758
ntt_emit_mem(c, instr, nir_var_mem_ssbo);
1759
break;
1760
1761
case nir_intrinsic_load_shared:
1762
case nir_intrinsic_store_shared:
1763
case nir_intrinsic_shared_atomic_add:
1764
case nir_intrinsic_shared_atomic_fadd:
1765
case nir_intrinsic_shared_atomic_imin:
1766
case nir_intrinsic_shared_atomic_imax:
1767
case nir_intrinsic_shared_atomic_umin:
1768
case nir_intrinsic_shared_atomic_umax:
1769
case nir_intrinsic_shared_atomic_and:
1770
case nir_intrinsic_shared_atomic_or:
1771
case nir_intrinsic_shared_atomic_xor:
1772
case nir_intrinsic_shared_atomic_exchange:
1773
case nir_intrinsic_shared_atomic_comp_swap:
1774
ntt_emit_mem(c, instr, nir_var_mem_shared);
1775
break;
1776
1777
case nir_intrinsic_image_load:
1778
case nir_intrinsic_image_store:
1779
case nir_intrinsic_image_size:
1780
case nir_intrinsic_image_atomic_add:
1781
case nir_intrinsic_image_atomic_fadd:
1782
case nir_intrinsic_image_atomic_imin:
1783
case nir_intrinsic_image_atomic_umin:
1784
case nir_intrinsic_image_atomic_imax:
1785
case nir_intrinsic_image_atomic_umax:
1786
case nir_intrinsic_image_atomic_and:
1787
case nir_intrinsic_image_atomic_or:
1788
case nir_intrinsic_image_atomic_xor:
1789
case nir_intrinsic_image_atomic_exchange:
1790
case nir_intrinsic_image_atomic_comp_swap:
1791
ntt_emit_image_load_store(c, instr);
1792
break;
1793
1794
case nir_intrinsic_control_barrier:
1795
ureg_BARRIER(c->ureg);
1796
break;
1797
1798
case nir_intrinsic_memory_barrier:
1799
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
1800
TGSI_MEMBAR_SHADER_BUFFER |
1801
TGSI_MEMBAR_ATOMIC_BUFFER |
1802
TGSI_MEMBAR_SHADER_IMAGE |
1803
TGSI_MEMBAR_SHARED));
1804
break;
1805
1806
case nir_intrinsic_memory_barrier_atomic_counter:
1807
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_ATOMIC_BUFFER));
1808
break;
1809
1810
case nir_intrinsic_memory_barrier_buffer:
1811
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_BUFFER));
1812
break;
1813
1814
case nir_intrinsic_memory_barrier_image:
1815
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHADER_IMAGE));
1816
break;
1817
1818
case nir_intrinsic_memory_barrier_shared:
1819
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg, TGSI_MEMBAR_SHARED));
1820
break;
1821
1822
case nir_intrinsic_group_memory_barrier:
1823
ureg_MEMBAR(c->ureg, ureg_imm1u(c->ureg,
1824
TGSI_MEMBAR_SHADER_BUFFER |
1825
TGSI_MEMBAR_ATOMIC_BUFFER |
1826
TGSI_MEMBAR_SHADER_IMAGE |
1827
TGSI_MEMBAR_SHARED |
1828
TGSI_MEMBAR_THREAD_GROUP));
1829
break;
1830
1831
case nir_intrinsic_end_primitive:
1832
ureg_ENDPRIM(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
1833
break;
1834
1835
case nir_intrinsic_emit_vertex:
1836
ureg_EMIT(c->ureg, ureg_imm1u(c->ureg, nir_intrinsic_stream_id(instr)));
1837
break;
1838
1839
/* In TGSI we don't actually generate the barycentric coords, and emit
1840
* interp intrinsics later. However, we do need to store the _at_offset
1841
* argument so that we can use it at that point.
1842
*/
1843
case nir_intrinsic_load_barycentric_pixel:
1844
case nir_intrinsic_load_barycentric_centroid:
1845
case nir_intrinsic_load_barycentric_at_sample:
1846
break;
1847
1848
case nir_intrinsic_load_barycentric_at_offset:
1849
ntt_store(c, &instr->dest, ntt_get_src(c, instr->src[0]));
1850
break;
1851
1852
default:
1853
fprintf(stderr, "Unknown intrinsic: ");
1854
nir_print_instr(&instr->instr, stderr);
1855
fprintf(stderr, "\n");
1856
break;
1857
}
1858
}
1859
1860
struct ntt_tex_operand_state {
1861
struct ureg_src srcs[4];
1862
unsigned i;
1863
unsigned chan;
1864
bool is_temp[4];
1865
};
1866
1867
static void
1868
ntt_push_tex_arg(struct ntt_compile *c,
1869
nir_tex_instr *instr,
1870
nir_tex_src_type tex_src_type,
1871
struct ntt_tex_operand_state *s)
1872
{
1873
int tex_src = nir_tex_instr_src_index(instr, tex_src_type);
1874
if (tex_src < 0)
1875
return;
1876
1877
struct ureg_src src = ntt_get_src(c, instr->src[tex_src].src);
1878
int num_components = nir_tex_instr_src_size(instr, tex_src);
1879
1880
/* Find which src in the tex args we'll fit in. */
1881
if (s->chan + num_components > 4) {
1882
s->chan = 0;
1883
s->i++;
1884
}
1885
1886
/* Would need to fix up swizzling up to the writemask channel here. */
1887
assert(num_components == 1 || s->chan == 0);
1888
if (num_components == 1)
1889
src = ureg_scalar(src, 0);
1890
1891
if (ureg_src_is_undef(s->srcs[s->i])) {
1892
/* First emit of a tex operand's components, no need for a mov. */
1893
s->srcs[s->i] = src;
1894
} else {
1895
/* Otherwise, we need to have a temporary for all the components that go
1896
* in this operand.
1897
*/
1898
if (!s->is_temp[s->i]) {
1899
struct ureg_src prev_src = s->srcs[s->i];
1900
s->srcs[s->i] = ureg_src(ureg_DECL_temporary(c->ureg));
1901
s->is_temp[s->i] = true;
1902
1903
ureg_MOV(c->ureg,
1904
ureg_writemask(ureg_dst(s->srcs[s->i]),
1905
BITFIELD_MASK(s->chan)), prev_src);
1906
}
1907
1908
ureg_MOV(c->ureg,
1909
ureg_writemask(ureg_dst(s->srcs[s->i]),
1910
BITFIELD_RANGE(s->chan, num_components)),
1911
src);
1912
}
1913
1914
s->chan += num_components;
1915
}
1916
1917
static void
1918
ntt_emit_texture(struct ntt_compile *c, nir_tex_instr *instr)
1919
{
1920
struct ureg_dst dst = ntt_get_dest(c, &instr->dest);
1921
unsigned target;
1922
unsigned tex_opcode;
1923
1924
struct ureg_src sampler = ureg_DECL_sampler(c->ureg, instr->sampler_index);
1925
int sampler_src = nir_tex_instr_src_index(instr, nir_tex_src_sampler_offset);
1926
if (sampler_src >= 0) {
1927
struct ureg_src reladdr = ntt_get_src(c, instr->src[sampler_src].src);
1928
sampler = ureg_src_indirect(sampler, ntt_reladdr(c, reladdr));
1929
}
1930
1931
switch (instr->op) {
1932
case nir_texop_tex:
1933
tex_opcode = TGSI_OPCODE_TEX;
1934
break;
1935
case nir_texop_txf:
1936
case nir_texop_txf_ms:
1937
tex_opcode = TGSI_OPCODE_TXF;
1938
1939
if (c->has_txf_lz) {
1940
int lod_src = nir_tex_instr_src_index(instr, nir_tex_src_lod);
1941
if (lod_src >= 0 &&
1942
nir_src_is_const(instr->src[lod_src].src) &&
1943
ntt_src_as_uint(c, instr->src[lod_src].src) == 0) {
1944
tex_opcode = TGSI_OPCODE_TXF_LZ;
1945
}
1946
}
1947
break;
1948
case nir_texop_txl:
1949
tex_opcode = TGSI_OPCODE_TXL;
1950
break;
1951
case nir_texop_txb:
1952
tex_opcode = TGSI_OPCODE_TXB;
1953
break;
1954
case nir_texop_txd:
1955
tex_opcode = TGSI_OPCODE_TXD;
1956
break;
1957
case nir_texop_txs:
1958
tex_opcode = TGSI_OPCODE_TXQ;
1959
break;
1960
case nir_texop_tg4:
1961
tex_opcode = TGSI_OPCODE_TG4;
1962
break;
1963
case nir_texop_query_levels:
1964
tex_opcode = TGSI_OPCODE_TXQ;
1965
break;
1966
case nir_texop_lod:
1967
tex_opcode = TGSI_OPCODE_LODQ;
1968
break;
1969
case nir_texop_texture_samples:
1970
tex_opcode = TGSI_OPCODE_TXQS;
1971
break;
1972
default:
1973
unreachable("unsupported tex op");
1974
}
1975
1976
struct ntt_tex_operand_state s = { .i = 0 };
1977
ntt_push_tex_arg(c, instr, nir_tex_src_coord, &s);
1978
/* We always have at least two slots for the coordinate, even on 1D. */
1979
s.chan = MAX2(s.chan, 2);
1980
1981
ntt_push_tex_arg(c, instr, nir_tex_src_comparator, &s);
1982
s.chan = MAX2(s.chan, 3);
1983
1984
ntt_push_tex_arg(c, instr, nir_tex_src_bias, &s);
1985
if (tex_opcode != TGSI_OPCODE_TXF_LZ)
1986
ntt_push_tex_arg(c, instr, nir_tex_src_lod, &s);
1987
1988
/* End of packed src setup, everything that follows gets its own operand. */
1989
if (s.chan)
1990
s.i++;
1991
1992
switch (instr->sampler_dim) {
1993
case GLSL_SAMPLER_DIM_1D:
1994
if (instr->is_array) {
1995
if (instr->is_shadow) {
1996
target = TGSI_TEXTURE_SHADOW1D_ARRAY;
1997
} else {
1998
target = TGSI_TEXTURE_1D_ARRAY;
1999
}
2000
} else {
2001
if (instr->is_shadow) {
2002
target = TGSI_TEXTURE_SHADOW1D;
2003
} else {
2004
target = TGSI_TEXTURE_1D;
2005
}
2006
}
2007
break;
2008
case GLSL_SAMPLER_DIM_2D:
2009
case GLSL_SAMPLER_DIM_EXTERNAL:
2010
if (instr->is_array) {
2011
if (instr->is_shadow) {
2012
target = TGSI_TEXTURE_SHADOW2D_ARRAY;
2013
} else {
2014
target = TGSI_TEXTURE_2D_ARRAY;
2015
}
2016
} else {
2017
if (instr->is_shadow) {
2018
target = TGSI_TEXTURE_SHADOW2D;
2019
} else {
2020
target = TGSI_TEXTURE_2D;
2021
}
2022
}
2023
break;
2024
case GLSL_SAMPLER_DIM_MS:
2025
if (instr->is_array) {
2026
target = TGSI_TEXTURE_2D_ARRAY_MSAA;
2027
} else {
2028
target = TGSI_TEXTURE_2D_ARRAY;
2029
}
2030
break;
2031
case GLSL_SAMPLER_DIM_3D:
2032
assert(!instr->is_shadow);
2033
target = TGSI_TEXTURE_3D;
2034
break;
2035
case GLSL_SAMPLER_DIM_RECT:
2036
if (instr->is_shadow) {
2037
target = TGSI_TEXTURE_SHADOWRECT;
2038
} else {
2039
target = TGSI_TEXTURE_RECT;
2040
}
2041
break;
2042
case GLSL_SAMPLER_DIM_CUBE:
2043
if (instr->is_array) {
2044
if (instr->is_shadow) {
2045
target = TGSI_TEXTURE_SHADOWCUBE_ARRAY;
2046
} else {
2047
target = TGSI_TEXTURE_CUBE_ARRAY;
2048
}
2049
} else {
2050
if (instr->is_shadow) {
2051
target = TGSI_TEXTURE_SHADOWCUBE;
2052
} else {
2053
target = TGSI_TEXTURE_CUBE;
2054
}
2055
}
2056
break;
2057
case GLSL_SAMPLER_DIM_BUF:
2058
target = TGSI_TEXTURE_BUFFER;
2059
break;
2060
default:
2061
fprintf(stderr, "Unknown sampler dimensions: %d\n", instr->sampler_dim);
2062
abort();
2063
}
2064
2065
if (s.i > 1) {
2066
if (tex_opcode == TGSI_OPCODE_TEX)
2067
tex_opcode = TGSI_OPCODE_TEX2;
2068
if (tex_opcode == TGSI_OPCODE_TXB)
2069
tex_opcode = TGSI_OPCODE_TXB2;
2070
if (tex_opcode == TGSI_OPCODE_TXL)
2071
tex_opcode = TGSI_OPCODE_TXL2;
2072
}
2073
2074
if (instr->op == nir_texop_txd) {
2075
/* Derivs appear in their own src args */
2076
int ddx = nir_tex_instr_src_index(instr, nir_tex_src_ddx);
2077
int ddy = nir_tex_instr_src_index(instr, nir_tex_src_ddy);
2078
s.srcs[s.i++] = ntt_get_src(c, instr->src[ddx].src);
2079
s.srcs[s.i++] = ntt_get_src(c, instr->src[ddy].src);
2080
}
2081
2082
if (instr->op == nir_texop_tg4 && target != TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
2083
if (c->screen->get_param(c->screen,
2084
PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE)) {
2085
sampler = ureg_scalar(sampler, instr->component);
2086
s.srcs[s.i++] = ureg_src_undef();
2087
} else {
2088
s.srcs[s.i++] = ureg_imm1u(c->ureg, instr->component);
2089
}
2090
}
2091
2092
s.srcs[s.i++] = sampler;
2093
2094
enum tgsi_return_type tex_type;
2095
switch (instr->dest_type) {
2096
case nir_type_float32:
2097
tex_type = TGSI_RETURN_TYPE_FLOAT;
2098
break;
2099
case nir_type_int32:
2100
tex_type = TGSI_RETURN_TYPE_SINT;
2101
break;
2102
case nir_type_uint32:
2103
tex_type = TGSI_RETURN_TYPE_UINT;
2104
break;
2105
default:
2106
unreachable("unknown texture type");
2107
}
2108
2109
struct tgsi_texture_offset tex_offsets[4];
2110
unsigned num_tex_offsets = 0;
2111
int tex_offset_src = nir_tex_instr_src_index(instr, nir_tex_src_offset);
2112
if (tex_offset_src >= 0) {
2113
struct ureg_src offset = ntt_get_src(c, instr->src[tex_offset_src].src);
2114
2115
tex_offsets[0].File = offset.File;
2116
tex_offsets[0].Index = offset.Index;
2117
tex_offsets[0].SwizzleX = offset.SwizzleX;
2118
tex_offsets[0].SwizzleY = offset.SwizzleY;
2119
tex_offsets[0].SwizzleZ = offset.SwizzleZ;
2120
tex_offsets[0].Padding = 0;
2121
2122
num_tex_offsets = 1;
2123
}
2124
2125
struct ureg_dst tex_dst;
2126
if (instr->op == nir_texop_query_levels)
2127
tex_dst = ureg_writemask(ureg_DECL_temporary(c->ureg), TGSI_WRITEMASK_W);
2128
else
2129
tex_dst = dst;
2130
2131
ureg_tex_insn(c->ureg, tex_opcode,
2132
&tex_dst, 1,
2133
target,
2134
tex_type,
2135
tex_offsets, num_tex_offsets,
2136
s.srcs, s.i);
2137
2138
if (instr->op == nir_texop_query_levels) {
2139
ureg_MOV(c->ureg, dst, ureg_scalar(ureg_src(tex_dst), 3));
2140
ureg_release_temporary(c->ureg, tex_dst);
2141
}
2142
2143
for (int i = 0; i < s.i; i++) {
2144
if (s.is_temp[i])
2145
ureg_release_temporary(c->ureg, ureg_dst(s.srcs[i]));
2146
}
2147
}
2148
2149
static void
2150
ntt_emit_jump(struct ntt_compile *c, nir_jump_instr *jump)
2151
{
2152
switch (jump->type) {
2153
case nir_jump_break:
2154
ureg_BRK(c->ureg);
2155
break;
2156
2157
case nir_jump_continue:
2158
ureg_CONT(c->ureg);
2159
break;
2160
2161
default:
2162
fprintf(stderr, "Unknown jump instruction: ");
2163
nir_print_instr(&jump->instr, stderr);
2164
fprintf(stderr, "\n");
2165
abort();
2166
}
2167
}
2168
2169
static void
2170
ntt_emit_ssa_undef(struct ntt_compile *c, nir_ssa_undef_instr *instr)
2171
{
2172
/* Nothing to do but make sure that we have some storage to deref. */
2173
(void)ntt_get_ssa_def_decl(c, &instr->def);
2174
}
2175
2176
static void
2177
ntt_emit_instr(struct ntt_compile *c, nir_instr *instr)
2178
{
2179
/* There is no addr reg in use before we start emitting an instr. */
2180
c->next_addr_reg = 0;
2181
2182
switch (instr->type) {
2183
case nir_instr_type_deref:
2184
/* ignored, will be walked by nir_intrinsic_image_*_deref. */
2185
break;
2186
2187
case nir_instr_type_alu:
2188
ntt_emit_alu(c, nir_instr_as_alu(instr));
2189
break;
2190
2191
case nir_instr_type_intrinsic:
2192
ntt_emit_intrinsic(c, nir_instr_as_intrinsic(instr));
2193
break;
2194
2195
case nir_instr_type_load_const:
2196
/* Nothing to do here, as load consts are done directly from
2197
* ntt_get_src() (since many constant NIR srcs will often get folded
2198
* directly into a register file index instead of as a TGSI src).
2199
*/
2200
break;
2201
2202
case nir_instr_type_tex:
2203
ntt_emit_texture(c, nir_instr_as_tex(instr));
2204
break;
2205
2206
case nir_instr_type_jump:
2207
ntt_emit_jump(c, nir_instr_as_jump(instr));
2208
break;
2209
2210
case nir_instr_type_ssa_undef:
2211
ntt_emit_ssa_undef(c, nir_instr_as_ssa_undef(instr));
2212
break;
2213
2214
default:
2215
fprintf(stderr, "Unknown NIR instr type: ");
2216
nir_print_instr(instr, stderr);
2217
fprintf(stderr, "\n");
2218
abort();
2219
}
2220
}
2221
2222
static void
2223
ntt_emit_if(struct ntt_compile *c, nir_if *if_stmt)
2224
{
2225
unsigned label;
2226
ureg_UIF(c->ureg, c->if_cond, &label);
2227
ntt_emit_cf_list(c, &if_stmt->then_list);
2228
2229
if (!nir_cf_list_is_empty_block(&if_stmt->else_list)) {
2230
ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
2231
ureg_ELSE(c->ureg, &label);
2232
ntt_emit_cf_list(c, &if_stmt->else_list);
2233
}
2234
2235
ureg_fixup_label(c->ureg, label, ureg_get_instruction_number(c->ureg));
2236
ureg_ENDIF(c->ureg);
2237
}
2238
2239
static void
2240
ntt_emit_loop(struct ntt_compile *c, nir_loop *loop)
2241
{
2242
/* GLSL-to-TGSI never set the begin/end labels to anything, even though nvfx
2243
* does reference BGNLOOP's. Follow the former behavior unless something comes up
2244
* with a need.
2245
*/
2246
unsigned begin_label;
2247
ureg_BGNLOOP(c->ureg, &begin_label);
2248
ntt_emit_cf_list(c, &loop->body);
2249
2250
unsigned end_label;
2251
ureg_ENDLOOP(c->ureg, &end_label);
2252
}
2253
2254
static void
2255
ntt_free_ssa_temp_by_index(struct ntt_compile *c, int index)
2256
{
2257
/* We do store CONST/IMM/INPUT/etc. in ssa_temp[] */
2258
if (c->ssa_temp[index].File != TGSI_FILE_TEMPORARY)
2259
return;
2260
2261
ureg_release_temporary(c->ureg, c->ssa_temp[index]);
2262
memset(&c->ssa_temp[index], 0, sizeof(c->ssa_temp[index]));
2263
}
2264
2265
/* Releases any temporaries for SSA defs with a live interval ending at this
2266
* instruction.
2267
*/
2268
static bool
2269
ntt_src_live_interval_end_cb(nir_src *src, void *state)
2270
{
2271
struct ntt_compile *c = state;
2272
2273
if (src->is_ssa) {
2274
nir_ssa_def *def = src->ssa;
2275
2276
if (c->liveness->defs[def->index].end == src->parent_instr->index)
2277
ntt_free_ssa_temp_by_index(c, def->index);
2278
}
2279
2280
return true;
2281
}
2282
2283
static void
2284
ntt_emit_block(struct ntt_compile *c, nir_block *block)
2285
{
2286
nir_foreach_instr(instr, block) {
2287
ntt_emit_instr(c, instr);
2288
2289
nir_foreach_src(instr, ntt_src_live_interval_end_cb, c);
2290
}
2291
2292
/* Set up the if condition for ntt_emit_if(), which we have to do before
2293
* freeing up the temps (the "if" is treated as inside the block for liveness
2294
* purposes, despite not being an instruction)
2295
*
2296
* Note that, while IF and UIF are supposed to look at only .x, virglrenderer
2297
* looks at all of .xyzw. No harm in working around the bug.
2298
*/
2299
nir_if *nif = nir_block_get_following_if(block);
2300
if (nif)
2301
c->if_cond = ureg_scalar(ntt_get_src(c, nif->condition), TGSI_SWIZZLE_X);
2302
2303
/* Free up any SSA temps that are unused at the end of the block. */
2304
unsigned index;
2305
BITSET_FOREACH_SET(index, block->live_out, BITSET_WORDS(c->impl->ssa_alloc)) {
2306
unsigned def_end_ip = c->liveness->defs[index].end;
2307
if (def_end_ip == block->end_ip)
2308
ntt_free_ssa_temp_by_index(c, index);
2309
}
2310
}
2311
2312
static void
2313
ntt_emit_cf_list(struct ntt_compile *c, struct exec_list *list)
2314
{
2315
/* There is no addr reg in use before we start emitting any part of a CF
2316
* node (such as an if condition)
2317
*/
2318
c->next_addr_reg = 0;
2319
2320
foreach_list_typed(nir_cf_node, node, node, list) {
2321
switch (node->type) {
2322
case nir_cf_node_block:
2323
ntt_emit_block(c, nir_cf_node_as_block(node));
2324
break;
2325
2326
case nir_cf_node_if:
2327
ntt_emit_if(c, nir_cf_node_as_if(node));
2328
break;
2329
2330
case nir_cf_node_loop:
2331
ntt_emit_loop(c, nir_cf_node_as_loop(node));
2332
break;
2333
2334
default:
2335
unreachable("unknown CF type");
2336
}
2337
}
2338
}
2339
2340
static void
2341
ntt_emit_impl(struct ntt_compile *c, nir_function_impl *impl)
2342
{
2343
c->impl = impl;
2344
c->liveness = nir_live_ssa_defs_per_instr(impl);
2345
2346
c->ssa_temp = rzalloc_array(c, struct ureg_dst, impl->ssa_alloc);
2347
c->reg_temp = rzalloc_array(c, struct ureg_dst, impl->reg_alloc);
2348
2349
ntt_setup_registers(c, &impl->registers);
2350
ntt_emit_cf_list(c, &impl->body);
2351
2352
ralloc_free(c->liveness);
2353
c->liveness = NULL;
2354
}
2355
2356
static int
2357
type_size(const struct glsl_type *type, bool bindless)
2358
{
2359
return glsl_count_attribute_slots(type, false);
2360
}
2361
2362
/* Allow vectorizing of ALU instructions, but avoid vectorizing past what we
2363
* can handle for 64-bit values in TGSI.
2364
*/
2365
static bool
2366
ntt_should_vectorize_instr(const nir_instr *instr, void *data)
2367
{
2368
if (instr->type != nir_instr_type_alu)
2369
return false;
2370
2371
nir_alu_instr *alu = nir_instr_as_alu(instr);
2372
2373
switch (alu->op) {
2374
case nir_op_ibitfield_extract:
2375
case nir_op_ubitfield_extract:
2376
case nir_op_bitfield_insert:
2377
/* virglrenderer only looks at the .x channel of the offset/bits operands
2378
* when translating to GLSL. tgsi.rst doesn't seem to require scalar
2379
* offset/bits operands.
2380
*
2381
* https://gitlab.freedesktop.org/virgl/virglrenderer/-/issues/195
2382
*/
2383
return false;
2384
2385
default:
2386
break;
2387
}
2388
2389
unsigned num_components = alu->dest.dest.ssa.num_components;
2390
2391
int src_bit_size = nir_src_bit_size(alu->src[0].src);
2392
int dst_bit_size = nir_dest_bit_size(alu->dest.dest);
2393
2394
if (src_bit_size == 64 || dst_bit_size == 64) {
2395
if (num_components > 1)
2396
return false;
2397
}
2398
2399
return true;
2400
}
2401
2402
static bool
2403
ntt_should_vectorize_io(unsigned align, unsigned bit_size,
2404
unsigned num_components, unsigned high_offset,
2405
nir_intrinsic_instr *low, nir_intrinsic_instr *high,
2406
void *data)
2407
{
2408
if (bit_size != 32)
2409
return false;
2410
2411
/* Our offset alignment should aways be at least 4 bytes */
2412
if (align < 4)
2413
return false;
2414
2415
/* No wrapping off the end of a TGSI reg. We could do a bit better by
2416
* looking at low's actual offset. XXX: With LOAD_CONSTBUF maybe we don't
2417
* need this restriction.
2418
*/
2419
unsigned worst_start_component = align == 4 ? 3 : align / 4;
2420
if (worst_start_component + num_components > 4)
2421
return false;
2422
2423
return true;
2424
}
2425
2426
static nir_variable_mode
2427
ntt_no_indirects_mask(nir_shader *s, struct pipe_screen *screen)
2428
{
2429
unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
2430
unsigned indirect_mask = 0;
2431
2432
if (!screen->get_shader_param(screen, pipe_stage,
2433
PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR)) {
2434
indirect_mask |= nir_var_shader_in;
2435
}
2436
2437
if (!screen->get_shader_param(screen, pipe_stage,
2438
PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR)) {
2439
indirect_mask |= nir_var_shader_out;
2440
}
2441
2442
if (!screen->get_shader_param(screen, pipe_stage,
2443
PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR)) {
2444
indirect_mask |= nir_var_function_temp;
2445
}
2446
2447
return indirect_mask;
2448
}
2449
2450
static void
2451
ntt_optimize_nir(struct nir_shader *s, struct pipe_screen *screen)
2452
{
2453
bool progress;
2454
nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
2455
unsigned pipe_stage = pipe_shader_type_from_mesa(s->info.stage);
2456
unsigned control_flow_depth =
2457
screen->get_shader_param(screen, pipe_stage,
2458
PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH);
2459
do {
2460
progress = false;
2461
2462
NIR_PASS_V(s, nir_lower_vars_to_ssa);
2463
2464
NIR_PASS(progress, s, nir_copy_prop);
2465
NIR_PASS(progress, s, nir_opt_algebraic);
2466
NIR_PASS(progress, s, nir_opt_constant_folding);
2467
NIR_PASS(progress, s, nir_opt_remove_phis);
2468
NIR_PASS(progress, s, nir_opt_conditional_discard);
2469
NIR_PASS(progress, s, nir_opt_dce);
2470
NIR_PASS(progress, s, nir_opt_dead_cf);
2471
NIR_PASS(progress, s, nir_opt_cse);
2472
NIR_PASS(progress, s, nir_opt_find_array_copies);
2473
NIR_PASS(progress, s, nir_opt_if, true);
2474
NIR_PASS(progress, s, nir_opt_peephole_select,
2475
control_flow_depth == 0 ? ~0 : 8, true, true);
2476
NIR_PASS(progress, s, nir_opt_algebraic);
2477
NIR_PASS(progress, s, nir_opt_constant_folding);
2478
nir_load_store_vectorize_options vectorize_opts = {
2479
.modes = nir_var_mem_ubo,
2480
.callback = ntt_should_vectorize_io,
2481
.robust_modes = 0,
2482
};
2483
NIR_PASS(progress, s, nir_opt_load_store_vectorize, &vectorize_opts);
2484
NIR_PASS(progress, s, nir_opt_shrink_vectors, true);
2485
NIR_PASS(progress, s, nir_opt_trivial_continues);
2486
NIR_PASS(progress, s, nir_opt_vectorize, ntt_should_vectorize_instr, NULL);
2487
NIR_PASS(progress, s, nir_opt_undef);
2488
NIR_PASS(progress, s, nir_opt_loop_unroll, no_indirects_mask);
2489
2490
} while (progress);
2491
}
2492
2493
/* Scalarizes all 64-bit ALU ops. Note that we only actually need to
2494
* scalarize vec3/vec4s, should probably fix that.
2495
*/
2496
static bool
2497
scalarize_64bit(const nir_instr *instr, const void *data)
2498
{
2499
const nir_alu_instr *alu = nir_instr_as_alu(instr);
2500
2501
return (nir_dest_bit_size(alu->dest.dest) == 64 ||
2502
nir_src_bit_size(alu->src[0].src) == 64);
2503
}
2504
2505
static bool
2506
nir_to_tgsi_lower_64bit_intrinsic(nir_builder *b, nir_intrinsic_instr *instr)
2507
{
2508
b->cursor = nir_after_instr(&instr->instr);
2509
2510
switch (instr->intrinsic) {
2511
case nir_intrinsic_load_ubo:
2512
case nir_intrinsic_load_ubo_vec4:
2513
case nir_intrinsic_load_ssbo:
2514
case nir_intrinsic_load_input:
2515
case nir_intrinsic_load_interpolated_input:
2516
case nir_intrinsic_load_per_vertex_input:
2517
case nir_intrinsic_store_output:
2518
case nir_intrinsic_store_ssbo:
2519
break;
2520
default:
2521
return false;
2522
}
2523
2524
if (instr->num_components <= 2)
2525
return false;
2526
2527
bool has_dest = nir_intrinsic_infos[instr->intrinsic].has_dest;
2528
if (has_dest) {
2529
if (nir_dest_bit_size(instr->dest) != 64)
2530
return false;
2531
} else {
2532
if (nir_src_bit_size(instr->src[0]) != 64)
2533
return false;
2534
}
2535
2536
nir_intrinsic_instr *first =
2537
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
2538
nir_intrinsic_instr *second =
2539
nir_instr_as_intrinsic(nir_instr_clone(b->shader, &instr->instr));
2540
2541
switch (instr->intrinsic) {
2542
case nir_intrinsic_load_ubo:
2543
case nir_intrinsic_load_ubo_vec4:
2544
case nir_intrinsic_load_ssbo:
2545
case nir_intrinsic_store_ssbo:
2546
break;
2547
2548
default: {
2549
nir_io_semantics semantics = nir_intrinsic_io_semantics(second);
2550
semantics.location++;
2551
semantics.num_slots--;
2552
nir_intrinsic_set_io_semantics(second, semantics);
2553
2554
nir_intrinsic_set_base(second, nir_intrinsic_base(second) + 1);
2555
break;
2556
}
2557
}
2558
2559
first->num_components = 2;
2560
second->num_components -= 2;
2561
if (has_dest) {
2562
first->dest.ssa.num_components = 2;
2563
second->dest.ssa.num_components -= 2;
2564
}
2565
2566
nir_builder_instr_insert(b, &first->instr);
2567
nir_builder_instr_insert(b, &second->instr);
2568
2569
if (has_dest) {
2570
/* Merge the two loads' results back into a vector. */
2571
nir_ssa_def *channels[4] = {
2572
nir_channel(b, &first->dest.ssa, 0),
2573
nir_channel(b, &first->dest.ssa, 1),
2574
nir_channel(b, &second->dest.ssa, 0),
2575
second->num_components > 1 ? nir_channel(b, &second->dest.ssa, 1) : NULL,
2576
};
2577
nir_ssa_def *new = nir_vec(b, channels, instr->num_components);
2578
nir_ssa_def_rewrite_uses(&instr->dest.ssa, new);
2579
} else {
2580
/* Split the src value across the two stores. */
2581
b->cursor = nir_before_instr(&instr->instr);
2582
2583
nir_ssa_def *src0 = instr->src[0].ssa;
2584
nir_ssa_def *channels[4] = { 0 };
2585
for (int i = 0; i < instr->num_components; i++)
2586
channels[i] = nir_channel(b, src0, i);
2587
2588
nir_intrinsic_set_write_mask(first, nir_intrinsic_write_mask(instr) & 3);
2589
nir_intrinsic_set_write_mask(second, nir_intrinsic_write_mask(instr) >> 2);
2590
2591
nir_instr_rewrite_src(&first->instr, &first->src[0],
2592
nir_src_for_ssa(nir_vec(b, channels, 2)));
2593
nir_instr_rewrite_src(&second->instr, &second->src[0],
2594
nir_src_for_ssa(nir_vec(b, &channels[2],
2595
second->num_components)));
2596
}
2597
2598
int offset_src = -1;
2599
uint32_t offset_amount = 16;
2600
2601
switch (instr->intrinsic) {
2602
case nir_intrinsic_load_ssbo:
2603
case nir_intrinsic_load_ubo:
2604
offset_src = 1;
2605
break;
2606
case nir_intrinsic_load_ubo_vec4:
2607
offset_src = 1;
2608
offset_amount = 1;
2609
break;
2610
case nir_intrinsic_store_ssbo:
2611
offset_src = 2;
2612
break;
2613
default:
2614
break;
2615
}
2616
if (offset_src != -1) {
2617
b->cursor = nir_before_instr(&second->instr);
2618
nir_ssa_def *second_offset =
2619
nir_iadd_imm(b, second->src[offset_src].ssa, offset_amount);
2620
nir_instr_rewrite_src(&second->instr, &second->src[offset_src],
2621
nir_src_for_ssa(second_offset));
2622
}
2623
2624
/* DCE stores we generated with no writemask (nothing else does this
2625
* currently).
2626
*/
2627
if (!has_dest) {
2628
if (nir_intrinsic_write_mask(first) == 0)
2629
nir_instr_remove(&first->instr);
2630
if (nir_intrinsic_write_mask(second) == 0)
2631
nir_instr_remove(&second->instr);
2632
}
2633
2634
nir_instr_remove(&instr->instr);
2635
2636
return true;
2637
}
2638
2639
static bool
2640
nir_to_tgsi_lower_64bit_load_const(nir_builder *b, nir_load_const_instr *instr)
2641
{
2642
int num_components = instr->def.num_components;
2643
2644
if (instr->def.bit_size != 64 || num_components <= 2)
2645
return false;
2646
2647
b->cursor = nir_before_instr(&instr->instr);
2648
2649
nir_load_const_instr *first =
2650
nir_load_const_instr_create(b->shader, 2, 64);
2651
nir_load_const_instr *second =
2652
nir_load_const_instr_create(b->shader, num_components - 2, 64);
2653
2654
first->value[0] = instr->value[0];
2655
first->value[1] = instr->value[1];
2656
second->value[0] = instr->value[2];
2657
if (num_components == 4)
2658
second->value[1] = instr->value[3];
2659
2660
nir_builder_instr_insert(b, &first->instr);
2661
nir_builder_instr_insert(b, &second->instr);
2662
2663
nir_ssa_def *channels[4] = {
2664
nir_channel(b, &first->def, 0),
2665
nir_channel(b, &first->def, 1),
2666
nir_channel(b, &second->def, 0),
2667
num_components == 4 ? nir_channel(b, &second->def, 1) : NULL,
2668
};
2669
nir_ssa_def *new = nir_vec(b, channels, num_components);
2670
nir_ssa_def_rewrite_uses(&instr->def, new);
2671
nir_instr_remove(&instr->instr);
2672
2673
return true;
2674
}
2675
2676
static bool
2677
nir_to_tgsi_lower_64bit_to_vec2_instr(nir_builder *b, nir_instr *instr,
2678
void *data)
2679
{
2680
switch (instr->type) {
2681
case nir_instr_type_load_const:
2682
return nir_to_tgsi_lower_64bit_load_const(b, nir_instr_as_load_const(instr));
2683
2684
case nir_instr_type_intrinsic:
2685
return nir_to_tgsi_lower_64bit_intrinsic(b, nir_instr_as_intrinsic(instr));
2686
default:
2687
return false;
2688
}
2689
}
2690
2691
static bool
2692
nir_to_tgsi_lower_64bit_to_vec2(nir_shader *s)
2693
{
2694
return nir_shader_instructions_pass(s,
2695
nir_to_tgsi_lower_64bit_to_vec2_instr,
2696
nir_metadata_block_index |
2697
nir_metadata_dominance,
2698
NULL);
2699
}
2700
2701
static void
2702
ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
2703
{
2704
const struct nir_shader_compiler_options *options = s->options;
2705
bool lower_fsqrt =
2706
!screen->get_shader_param(screen, pipe_shader_type_from_mesa(s->info.stage),
2707
PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
2708
2709
if (!options->lower_extract_byte ||
2710
!options->lower_extract_word ||
2711
!options->lower_insert_byte ||
2712
!options->lower_insert_word ||
2713
!options->lower_fdph ||
2714
!options->lower_flrp64 ||
2715
!options->lower_fmod ||
2716
!options->lower_rotate ||
2717
!options->lower_uniforms_to_ubo ||
2718
!options->lower_vector_cmp ||
2719
options->lower_fsqrt != lower_fsqrt) {
2720
nir_shader_compiler_options *new_options = ralloc(s, nir_shader_compiler_options);
2721
*new_options = *s->options;
2722
2723
new_options->lower_extract_byte = true;
2724
new_options->lower_extract_word = true;
2725
new_options->lower_insert_byte = true;
2726
new_options->lower_insert_word = true;
2727
new_options->lower_fdph = true;
2728
new_options->lower_flrp64 = true;
2729
new_options->lower_fmod = true;
2730
new_options->lower_rotate = true;
2731
new_options->lower_uniforms_to_ubo = true,
2732
new_options->lower_vector_cmp = true;
2733
new_options->lower_fsqrt = lower_fsqrt;
2734
2735
s->options = new_options;
2736
}
2737
}
2738
2739
/**
2740
* Translates the NIR shader to TGSI.
2741
*
2742
* This requires some lowering of the NIR shader to prepare it for translation.
2743
* We take ownership of the NIR shader passed, returning a reference to the new
2744
* TGSI tokens instead. If you need to keep the NIR, then pass us a clone.
2745
*/
2746
const void *
2747
nir_to_tgsi(struct nir_shader *s,
2748
struct pipe_screen *screen)
2749
{
2750
struct ntt_compile *c;
2751
const void *tgsi_tokens;
2752
bool debug = env_var_as_boolean("NIR_TO_TGSI_DEBUG", false);
2753
nir_variable_mode no_indirects_mask = ntt_no_indirects_mask(s, screen);
2754
bool native_integers = screen->get_shader_param(screen,
2755
pipe_shader_type_from_mesa(s->info.stage),
2756
PIPE_SHADER_CAP_INTEGERS);
2757
const struct nir_shader_compiler_options *original_options = s->options;
2758
2759
ntt_fix_nir_options(screen, s);
2760
2761
NIR_PASS_V(s, nir_lower_io, nir_var_shader_in | nir_var_shader_out,
2762
type_size, (nir_lower_io_options)0);
2763
NIR_PASS_V(s, nir_lower_regs_to_ssa);
2764
2765
const nir_lower_tex_options lower_tex_options = {
2766
/* XXX: We could skip lowering of TXP for TEX with <=3 coord_compoennts.
2767
*/
2768
.lower_txp = ~0,
2769
};
2770
NIR_PASS_V(s, nir_lower_tex, &lower_tex_options);
2771
2772
if (!original_options->lower_uniforms_to_ubo) {
2773
NIR_PASS_V(s, nir_lower_uniforms_to_ubo,
2774
screen->get_param(screen, PIPE_CAP_PACKED_UNIFORMS),
2775
!native_integers);
2776
}
2777
2778
/* Do lowering so we can directly translate f64/i64 NIR ALU ops to TGSI --
2779
* TGSI stores up to a vec2 in each slot, so to avoid a whole bunch of op
2780
* duplication logic we just make it so that we only see vec2s.
2781
*/
2782
NIR_PASS_V(s, nir_lower_alu_to_scalar, scalarize_64bit, NULL);
2783
NIR_PASS_V(s, nir_to_tgsi_lower_64bit_to_vec2);
2784
2785
if (!screen->get_param(screen, PIPE_CAP_LOAD_CONSTBUF))
2786
NIR_PASS_V(s, nir_lower_ubo_vec4);
2787
2788
ntt_optimize_nir(s, screen);
2789
2790
NIR_PASS_V(s, nir_lower_indirect_derefs, no_indirects_mask, UINT32_MAX);
2791
2792
bool progress;
2793
do {
2794
progress = false;
2795
NIR_PASS(progress, s, nir_opt_algebraic_late);
2796
if (progress) {
2797
NIR_PASS_V(s, nir_copy_prop);
2798
NIR_PASS_V(s, nir_opt_dce);
2799
NIR_PASS_V(s, nir_opt_cse);
2800
}
2801
} while (progress);
2802
2803
if (screen->get_shader_param(screen,
2804
pipe_shader_type_from_mesa(s->info.stage),
2805
PIPE_SHADER_CAP_INTEGERS)) {
2806
NIR_PASS_V(s, nir_lower_bool_to_int32);
2807
} else {
2808
NIR_PASS_V(s, nir_lower_int_to_float);
2809
NIR_PASS_V(s, nir_lower_bool_to_float);
2810
/* bool_to_float generates MOVs for b2f32 that we want to clean up. */
2811
NIR_PASS_V(s, nir_copy_prop);
2812
NIR_PASS_V(s, nir_opt_dce);
2813
}
2814
2815
/* Only lower 32-bit floats. The only other modifier type officially
2816
* supported by TGSI is 32-bit integer negates, but even those are broken on
2817
* virglrenderer, so skip lowering all integer and f64 float mods.
2818
*/
2819
NIR_PASS_V(s, nir_lower_to_source_mods, nir_lower_float_source_mods);
2820
NIR_PASS_V(s, nir_convert_from_ssa, true);
2821
NIR_PASS_V(s, nir_lower_vec_to_movs, NULL, NULL);
2822
2823
/* locals_to_regs will leave dead derefs that are good to clean up. */
2824
NIR_PASS_V(s, nir_lower_locals_to_regs);
2825
NIR_PASS_V(s, nir_opt_dce);
2826
2827
if (debug) {
2828
fprintf(stderr, "NIR before translation to TGSI:\n");
2829
nir_print_shader(s, stderr);
2830
}
2831
2832
c = rzalloc(NULL, struct ntt_compile);
2833
c->screen = screen;
2834
2835
c->needs_texcoord_semantic =
2836
screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD);
2837
c->any_reg_as_address =
2838
screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS);
2839
c->has_txf_lz =
2840
screen->get_param(screen, PIPE_CAP_TGSI_TEX_TXF_LZ);
2841
2842
c->s = s;
2843
c->native_integers = native_integers;
2844
c->ureg = ureg_create(pipe_shader_type_from_mesa(s->info.stage));
2845
ureg_setup_shader_info(c->ureg, &s->info);
2846
2847
ntt_setup_inputs(c);
2848
ntt_setup_uniforms(c);
2849
2850
if (s->info.stage == MESA_SHADER_FRAGMENT) {
2851
/* The draw module's polygon stipple layer doesn't respect the chosen
2852
* coordinate mode, so leave it as unspecified unless we're actually
2853
* reading the position in the shader already. See
2854
* gl-2.1-polygon-stipple-fs on softpipe.
2855
*/
2856
if ((s->info.inputs_read & VARYING_BIT_POS) ||
2857
BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) {
2858
ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
2859
s->info.fs.origin_upper_left ?
2860
TGSI_FS_COORD_ORIGIN_UPPER_LEFT :
2861
TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
2862
2863
ureg_property(c->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
2864
s->info.fs.pixel_center_integer ?
2865
TGSI_FS_COORD_PIXEL_CENTER_INTEGER :
2866
TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
2867
}
2868
}
2869
/* Emit the main function */
2870
nir_function_impl *impl = nir_shader_get_entrypoint(c->s);
2871
ntt_emit_impl(c, impl);
2872
ureg_END(c->ureg);
2873
2874
tgsi_tokens = ureg_get_tokens(c->ureg, NULL);
2875
2876
if (debug) {
2877
fprintf(stderr, "TGSI after translation from NIR:\n");
2878
tgsi_dump(tgsi_tokens, 0);
2879
}
2880
2881
ureg_destroy(c->ureg);
2882
2883
ralloc_free(c);
2884
ralloc_free(s);
2885
2886
return tgsi_tokens;
2887
}
2888
2889
static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
2890
.fuse_ffma32 = true,
2891
.fuse_ffma64 = true,
2892
.lower_extract_byte = true,
2893
.lower_extract_word = true,
2894
.lower_insert_byte = true,
2895
.lower_insert_word = true,
2896
.lower_fdph = true,
2897
.lower_flrp64 = true,
2898
.lower_fmod = true,
2899
.lower_rotate = true,
2900
.lower_uniforms_to_ubo = true,
2901
.lower_vector_cmp = true,
2902
.use_interpolated_input_intrinsics = true,
2903
};
2904
2905
/* Returns a default compiler options for drivers with only nir-to-tgsi-based
2906
* NIR support.
2907
*/
2908
const void *
2909
nir_to_tgsi_get_compiler_options(struct pipe_screen *pscreen,
2910
enum pipe_shader_ir ir,
2911
unsigned shader)
2912
{
2913
assert(ir == PIPE_SHADER_IR_NIR);
2914
return &nir_to_tgsi_compiler_options;
2915
}
2916
2917