Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_context.c
4565 views
1
/*
2
* Copyright (C) 2015-2018 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#include "ir3_context.h"
28
#include "ir3_compiler.h"
29
#include "ir3_image.h"
30
#include "ir3_nir.h"
31
#include "ir3_shader.h"
32
33
struct ir3_context *
34
ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
35
{
36
struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
37
38
if (compiler->gpu_id >= 400) {
39
if (so->type == MESA_SHADER_VERTEX) {
40
ctx->astc_srgb = so->key.vastc_srgb;
41
} else if (so->type == MESA_SHADER_FRAGMENT) {
42
ctx->astc_srgb = so->key.fastc_srgb;
43
}
44
45
} else {
46
if (so->type == MESA_SHADER_VERTEX) {
47
ctx->samples = so->key.vsamples;
48
} else if (so->type == MESA_SHADER_FRAGMENT) {
49
ctx->samples = so->key.fsamples;
50
}
51
}
52
53
if (compiler->gpu_id >= 600) {
54
ctx->funcs = &ir3_a6xx_funcs;
55
} else if (compiler->gpu_id >= 400) {
56
ctx->funcs = &ir3_a4xx_funcs;
57
}
58
59
ctx->compiler = compiler;
60
ctx->so = so;
61
ctx->def_ht =
62
_mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
63
ctx->block_ht =
64
_mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
65
ctx->continue_block_ht =
66
_mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
67
ctx->sel_cond_conversions =
68
_mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
69
70
/* TODO: maybe generate some sort of bitmask of what key
71
* lowers vs what shader has (ie. no need to lower
72
* texture clamp lowering if no texture sample instrs)..
73
* although should be done further up the stack to avoid
74
* creating duplicate variants..
75
*/
76
77
ctx->s = nir_shader_clone(ctx, so->shader->nir);
78
ir3_nir_lower_variant(so, ctx->s);
79
80
/* this needs to be the last pass run, so do this here instead of
81
* in ir3_optimize_nir():
82
*/
83
bool progress = false;
84
NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
85
86
/* we could need cleanup after lower_locals_to_regs */
87
while (progress) {
88
progress = false;
89
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
90
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
91
}
92
93
/* We want to lower nir_op_imul as late as possible, to catch also
94
* those generated by earlier passes (e.g, nir_lower_locals_to_regs).
95
* However, we want a final swing of a few passes to have a chance
96
* at optimizing the result.
97
*/
98
progress = false;
99
NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
100
while (progress) {
101
progress = false;
102
NIR_PASS(progress, ctx->s, nir_opt_algebraic);
103
NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars);
104
NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
105
NIR_PASS(progress, ctx->s, nir_opt_dce);
106
NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
107
}
108
109
/* Enable the texture pre-fetch feature only a4xx onwards. But
110
* only enable it on generations that have been tested:
111
*/
112
if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600))
113
NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
114
115
NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
116
117
/* Super crude heuristic to limit # of tex prefetch in small
118
* shaders. This completely ignores loops.. but that's really
119
* not the worst of it's problems. (A frag shader that has
120
* loops is probably going to be big enough to not trigger a
121
* lower threshold.)
122
*
123
* 1) probably want to do this in terms of ir3 instructions
124
* 2) probably really want to decide this after scheduling
125
* (or at least pre-RA sched) so we have a rough idea about
126
* nops, and don't count things that get cp'd away
127
* 3) blob seems to use higher thresholds with a mix of more
128
* SFU instructions. Which partly makes sense, more SFU
129
* instructions probably means you want to get the real
130
* shader started sooner, but that considers where in the
131
* shader the SFU instructions are, which blob doesn't seem
132
* to do.
133
*
134
* This uses more conservative thresholds assuming a more alu
135
* than sfu heavy instruction mix.
136
*/
137
if (so->type == MESA_SHADER_FRAGMENT) {
138
nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
139
140
unsigned instruction_count = 0;
141
nir_foreach_block (block, fxn) {
142
instruction_count += exec_list_length(&block->instr_list);
143
}
144
145
if (instruction_count < 50) {
146
ctx->prefetch_limit = 2;
147
} else if (instruction_count < 70) {
148
ctx->prefetch_limit = 3;
149
} else {
150
ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH;
151
}
152
}
153
154
if (shader_debug_enabled(so->type)) {
155
mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
156
so->shader->nir->info.name);
157
nir_log_shaderi(ctx->s);
158
}
159
160
ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
161
162
return ctx;
163
}
164
165
void
166
ir3_context_free(struct ir3_context *ctx)
167
{
168
ralloc_free(ctx);
169
}
170
171
/*
172
* Misc helpers
173
*/
174
175
/* allocate a n element value array (to be populated by caller) and
176
* insert in def_ht
177
*/
178
struct ir3_instruction **
179
ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
180
{
181
struct ir3_instruction **value =
182
ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
183
_mesa_hash_table_insert(ctx->def_ht, dst, value);
184
return value;
185
}
186
187
struct ir3_instruction **
188
ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
189
{
190
struct ir3_instruction **value;
191
192
if (dst->is_ssa) {
193
value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
194
} else {
195
value = ralloc_array(ctx, struct ir3_instruction *, n);
196
}
197
198
/* NOTE: in non-ssa case, we don't really need to store last_dst
199
* but this helps us catch cases where put_dst() call is forgotten
200
*/
201
compile_assert(ctx, !ctx->last_dst);
202
ctx->last_dst = value;
203
ctx->last_dst_n = n;
204
205
return value;
206
}
207
208
struct ir3_instruction *const *
209
ir3_get_src(struct ir3_context *ctx, nir_src *src)
210
{
211
if (src->is_ssa) {
212
struct hash_entry *entry;
213
entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
214
compile_assert(ctx, entry);
215
return entry->data;
216
} else {
217
nir_register *reg = src->reg.reg;
218
struct ir3_array *arr = ir3_get_array(ctx, reg);
219
unsigned num_components = arr->r->num_components;
220
struct ir3_instruction *addr = NULL;
221
struct ir3_instruction **value =
222
ralloc_array(ctx, struct ir3_instruction *, num_components);
223
224
if (src->reg.indirect)
225
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
226
reg->num_components);
227
228
for (unsigned i = 0; i < num_components; i++) {
229
unsigned n = src->reg.base_offset * reg->num_components + i;
230
compile_assert(ctx, n < arr->length);
231
value[i] = ir3_create_array_load(ctx, arr, n, addr);
232
}
233
234
return value;
235
}
236
}
237
238
void
239
ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
240
{
241
unsigned bit_size = nir_dest_bit_size(*dst);
242
243
/* add extra mov if dst value is shared reg.. in some cases not all
244
* instructions can read from shared regs, in cases where they can
245
* ir3_cp will clean up the extra mov:
246
*/
247
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
248
if (!ctx->last_dst[i])
249
continue;
250
if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
251
ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
252
}
253
}
254
255
/* Note: 1-bit bools are stored in 32-bit regs */
256
if (bit_size == 16) {
257
for (unsigned i = 0; i < ctx->last_dst_n; i++) {
258
struct ir3_instruction *dst = ctx->last_dst[i];
259
ir3_set_dst_type(dst, true);
260
ir3_fixup_src_type(dst);
261
if (dst->opc == OPC_META_SPLIT) {
262
ir3_set_dst_type(ssa(dst->srcs[0]), true);
263
ir3_fixup_src_type(ssa(dst->srcs[0]));
264
dst->srcs[0]->flags |= IR3_REG_HALF;
265
}
266
}
267
}
268
269
if (!dst->is_ssa) {
270
nir_register *reg = dst->reg.reg;
271
struct ir3_array *arr = ir3_get_array(ctx, reg);
272
unsigned num_components = ctx->last_dst_n;
273
struct ir3_instruction *addr = NULL;
274
275
if (dst->reg.indirect)
276
addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
277
reg->num_components);
278
279
for (unsigned i = 0; i < num_components; i++) {
280
unsigned n = dst->reg.base_offset * reg->num_components + i;
281
compile_assert(ctx, n < arr->length);
282
if (!ctx->last_dst[i])
283
continue;
284
ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
285
}
286
287
ralloc_free(ctx->last_dst);
288
}
289
290
ctx->last_dst = NULL;
291
ctx->last_dst_n = 0;
292
}
293
294
static unsigned
295
dest_flags(struct ir3_instruction *instr)
296
{
297
return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
298
}
299
300
struct ir3_instruction *
301
ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
302
unsigned arrsz)
303
{
304
struct ir3_block *block = ctx->block;
305
struct ir3_instruction *collect;
306
307
if (arrsz == 0)
308
return NULL;
309
310
unsigned flags = dest_flags(arr[0]);
311
312
collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz);
313
__ssa_dst(collect)->flags |= flags;
314
for (unsigned i = 0; i < arrsz; i++) {
315
struct ir3_instruction *elem = arr[i];
316
317
/* Since arrays are pre-colored in RA, we can't assume that
318
* things will end up in the right place. (Ie. if a collect
319
* joins elements from two different arrays.) So insert an
320
* extra mov.
321
*
322
* We could possibly skip this if all the collected elements
323
* are contiguous elements in a single array.. not sure how
324
* likely that is to happen.
325
*
326
* Fixes a problem with glamor shaders, that in effect do
327
* something like:
328
*
329
* if (foo)
330
* texcoord = ..
331
* else
332
* texcoord = ..
333
* color = texture2D(tex, texcoord);
334
*
335
* In this case, texcoord will end up as nir registers (which
336
* translate to ir3 array's of length 1. And we can't assume
337
* the two (or more) arrays will get allocated in consecutive
338
* scalar registers.
339
*
340
*/
341
if (elem->dsts[0]->flags & IR3_REG_ARRAY) {
342
type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
343
elem = ir3_MOV(block, elem, type);
344
}
345
346
compile_assert(ctx, dest_flags(elem) == flags);
347
__ssa_src(collect, elem, flags);
348
}
349
350
collect->dsts[0]->wrmask = MASK(arrsz);
351
352
return collect;
353
}
354
355
/* helper for instructions that produce multiple consecutive scalar
356
* outputs which need to have a split meta instruction inserted
357
*/
358
void
359
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
360
struct ir3_instruction *src, unsigned base, unsigned n)
361
{
362
if ((n == 1) && (src->dsts[0]->wrmask == 0x1) &&
363
/* setup_input needs ir3_split_dest to generate a SPLIT instruction */
364
src->opc != OPC_META_INPUT) {
365
dst[0] = src;
366
return;
367
}
368
369
if (src->opc == OPC_META_COLLECT) {
370
debug_assert((base + n) <= src->srcs_count);
371
372
for (int i = 0; i < n; i++) {
373
dst[i] = ssa(src->srcs[i + base]);
374
}
375
376
return;
377
}
378
379
unsigned flags = dest_flags(src);
380
381
for (int i = 0, j = 0; i < n; i++) {
382
struct ir3_instruction *split =
383
ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
384
__ssa_dst(split)->flags |= flags;
385
__ssa_src(split, src, flags);
386
split->split.off = i + base;
387
388
if (src->dsts[0]->wrmask & (1 << (i + base)))
389
dst[j++] = split;
390
}
391
}
392
393
NORETURN void
394
ir3_context_error(struct ir3_context *ctx, const char *format, ...)
395
{
396
struct hash_table *errors = NULL;
397
va_list ap;
398
va_start(ap, format);
399
if (ctx->cur_instr) {
400
errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
401
_mesa_key_pointer_equal);
402
char *msg = ralloc_vasprintf(errors, format, ap);
403
_mesa_hash_table_insert(errors, ctx->cur_instr, msg);
404
} else {
405
mesa_loge_v(format, ap);
406
}
407
va_end(ap);
408
nir_log_shader_annotated(ctx->s, errors);
409
ralloc_free(errors);
410
ctx->error = true;
411
unreachable("");
412
}
413
414
static struct ir3_instruction *
415
create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
416
{
417
struct ir3_instruction *instr, *immed;
418
419
instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
420
421
switch (align) {
422
case 1:
423
/* src *= 1: */
424
break;
425
case 2:
426
/* src *= 2 => src <<= 1: */
427
immed = create_immed_typed(block, 1, TYPE_S16);
428
instr = ir3_SHL_B(block, instr, 0, immed, 0);
429
break;
430
case 3:
431
/* src *= 3: */
432
immed = create_immed_typed(block, 3, TYPE_S16);
433
instr = ir3_MULL_U(block, instr, 0, immed, 0);
434
break;
435
case 4:
436
/* src *= 4 => src <<= 2: */
437
immed = create_immed_typed(block, 2, TYPE_S16);
438
instr = ir3_SHL_B(block, instr, 0, immed, 0);
439
break;
440
default:
441
unreachable("bad align");
442
return NULL;
443
}
444
445
instr->dsts[0]->flags |= IR3_REG_HALF;
446
447
instr = ir3_MOV(block, instr, TYPE_S16);
448
instr->dsts[0]->num = regid(REG_A0, 0);
449
450
return instr;
451
}
452
453
static struct ir3_instruction *
454
create_addr1(struct ir3_block *block, unsigned const_val)
455
{
456
struct ir3_instruction *immed =
457
create_immed_typed(block, const_val, TYPE_U16);
458
struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16);
459
instr->dsts[0]->num = regid(REG_A0, 1);
460
return instr;
461
}
462
463
/* caches addr values to avoid generating multiple cov/shl/mova
464
* sequences for each use of a given NIR level src as address
465
*/
466
struct ir3_instruction *
467
ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
468
{
469
struct ir3_instruction *addr;
470
unsigned idx = align - 1;
471
472
compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
473
474
if (!ctx->addr0_ht[idx]) {
475
ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
476
_mesa_key_pointer_equal);
477
} else {
478
struct hash_entry *entry;
479
entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
480
if (entry)
481
return entry->data;
482
}
483
484
addr = create_addr0(ctx->block, src, align);
485
_mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
486
487
return addr;
488
}
489
490
/* Similar to ir3_get_addr0, but for a1.x. */
491
struct ir3_instruction *
492
ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
493
{
494
struct ir3_instruction *addr;
495
496
if (!ctx->addr1_ht) {
497
ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
498
} else {
499
addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
500
if (addr)
501
return addr;
502
}
503
504
addr = create_addr1(ctx->block, const_val);
505
_mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
506
507
return addr;
508
}
509
510
struct ir3_instruction *
511
ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
512
{
513
struct ir3_block *b = ctx->block;
514
struct ir3_instruction *cond;
515
516
/* NOTE: only cmps.*.* can write p0.x: */
517
cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
518
cond->cat2.condition = IR3_COND_NE;
519
520
/* condition always goes in predicate register: */
521
cond->dsts[0]->num = regid(REG_P0, 0);
522
cond->dsts[0]->flags &= ~IR3_REG_SSA;
523
524
return cond;
525
}
526
527
/*
528
* Array helpers
529
*/
530
531
void
532
ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
533
{
534
struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
535
arr->id = ++ctx->num_arrays;
536
/* NOTE: sometimes we get non array regs, for example for arrays of
537
* length 1. See fs-const-array-of-struct-of-array.shader_test. So
538
* treat a non-array as if it was an array of length 1.
539
*
540
* It would be nice if there was a nir pass to convert arrays of
541
* length 1 to ssa.
542
*/
543
arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
544
compile_assert(ctx, arr->length > 0);
545
arr->r = reg;
546
arr->half = reg->bit_size <= 16;
547
// HACK one-bit bools still end up as 32b:
548
if (reg->bit_size == 1)
549
arr->half = false;
550
list_addtail(&arr->node, &ctx->ir->array_list);
551
}
552
553
struct ir3_array *
554
ir3_get_array(struct ir3_context *ctx, nir_register *reg)
555
{
556
foreach_array (arr, &ctx->ir->array_list) {
557
if (arr->r == reg)
558
return arr;
559
}
560
ir3_context_error(ctx, "bogus reg: r%d\n", reg->index);
561
return NULL;
562
}
563
564
/* relative (indirect) if address!=NULL */
565
struct ir3_instruction *
566
ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
567
struct ir3_instruction *address)
568
{
569
struct ir3_block *block = ctx->block;
570
struct ir3_instruction *mov;
571
struct ir3_register *src;
572
unsigned flags = 0;
573
574
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
575
if (arr->half) {
576
mov->cat1.src_type = TYPE_U16;
577
mov->cat1.dst_type = TYPE_U16;
578
flags |= IR3_REG_HALF;
579
} else {
580
mov->cat1.src_type = TYPE_U32;
581
mov->cat1.dst_type = TYPE_U32;
582
}
583
584
mov->barrier_class = IR3_BARRIER_ARRAY_R;
585
mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
586
__ssa_dst(mov)->flags |= flags;
587
src = ir3_src_create(mov, 0,
588
IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags);
589
src->def = (arr->last_write && arr->last_write->instr->block == block)
590
? arr->last_write
591
: NULL;
592
src->size = arr->length;
593
src->array.id = arr->id;
594
src->array.offset = n;
595
src->array.base = INVALID_REG;
596
597
if (address)
598
ir3_instr_set_address(mov, address);
599
600
return mov;
601
}
602
603
/* relative (indirect) if address!=NULL */
604
void
605
ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
606
struct ir3_instruction *src,
607
struct ir3_instruction *address)
608
{
609
struct ir3_block *block = ctx->block;
610
struct ir3_instruction *mov;
611
struct ir3_register *dst;
612
unsigned flags = 0;
613
614
/* if not relative store, don't create an extra mov, since that
615
* ends up being difficult for cp to remove.
616
*
617
* Also, don't skip the mov if the src is meta (like fanout/split),
618
* since that creates a situation that RA can't really handle properly.
619
*/
620
if (!address && !is_meta(src)) {
621
dst = src->dsts[0];
622
623
src->barrier_class |= IR3_BARRIER_ARRAY_W;
624
src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
625
626
dst->flags |= IR3_REG_ARRAY;
627
dst->size = arr->length;
628
dst->array.id = arr->id;
629
dst->array.offset = n;
630
dst->array.base = INVALID_REG;
631
632
if (arr->last_write && arr->last_write->instr->block == src->block)
633
ir3_reg_set_last_array(src, dst, arr->last_write);
634
635
arr->last_write = dst;
636
637
array_insert(block, block->keeps, src);
638
639
return;
640
}
641
642
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
643
if (arr->half) {
644
mov->cat1.src_type = TYPE_U16;
645
mov->cat1.dst_type = TYPE_U16;
646
flags |= IR3_REG_HALF;
647
} else {
648
mov->cat1.src_type = TYPE_U32;
649
mov->cat1.dst_type = TYPE_U32;
650
}
651
mov->barrier_class = IR3_BARRIER_ARRAY_W;
652
mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
653
dst = ir3_dst_create(
654
mov, 0,
655
IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV));
656
dst->instr = mov;
657
dst->size = arr->length;
658
dst->array.id = arr->id;
659
dst->array.offset = n;
660
dst->array.base = INVALID_REG;
661
ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0];
662
663
if (arr->last_write && arr->last_write->instr->block == block)
664
ir3_reg_set_last_array(mov, dst, arr->last_write);
665
666
if (address)
667
ir3_instr_set_address(mov, address);
668
669
arr->last_write = dst;
670
671
/* the array store may only matter to something in an earlier
672
* block (ie. loops), but since arrays are not in SSA, depth
673
* pass won't know this.. so keep all array stores:
674
*/
675
array_insert(block, block->keeps, mov);
676
}
677
678