CoCalc -- ir3_context.c

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_context.c
⁴⁵⁶⁵ views
1
/*
2
 * Copyright (C) 2015-2018 Rob Clark <[email protected]>
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 *
23
 * Authors:
24
 *    Rob Clark <[email protected]>
25
 */
26

27
#include "ir3_context.h"
28
#include "ir3_compiler.h"
29
#include "ir3_image.h"
30
#include "ir3_nir.h"
31
#include "ir3_shader.h"
32

33
struct ir3_context *
34
ir3_context_init(struct ir3_compiler *compiler, struct ir3_shader_variant *so)
35
{
36
   struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
37

38
   if (compiler->gpu_id >= 400) {
39
      if (so->type == MESA_SHADER_VERTEX) {
40
         ctx->astc_srgb = so->key.vastc_srgb;
41
      } else if (so->type == MESA_SHADER_FRAGMENT) {
42
         ctx->astc_srgb = so->key.fastc_srgb;
43
      }
44

45
   } else {
46
      if (so->type == MESA_SHADER_VERTEX) {
47
         ctx->samples = so->key.vsamples;
48
      } else if (so->type == MESA_SHADER_FRAGMENT) {
49
         ctx->samples = so->key.fsamples;
50
      }
51
   }
52

53
   if (compiler->gpu_id >= 600) {
54
      ctx->funcs = &ir3_a6xx_funcs;
55
   } else if (compiler->gpu_id >= 400) {
56
      ctx->funcs = &ir3_a4xx_funcs;
57
   }
58

59
   ctx->compiler = compiler;
60
   ctx->so = so;
61
   ctx->def_ht =
62
      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
63
   ctx->block_ht =
64
      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
65
   ctx->continue_block_ht =
66
      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
67
   ctx->sel_cond_conversions =
68
      _mesa_hash_table_create(ctx, _mesa_hash_pointer, _mesa_key_pointer_equal);
69

70
   /* TODO: maybe generate some sort of bitmask of what key
71
    * lowers vs what shader has (ie. no need to lower
72
    * texture clamp lowering if no texture sample instrs)..
73
    * although should be done further up the stack to avoid
74
    * creating duplicate variants..
75
    */
76

77
   ctx->s = nir_shader_clone(ctx, so->shader->nir);
78
   ir3_nir_lower_variant(so, ctx->s);
79

80
   /* this needs to be the last pass run, so do this here instead of
81
    * in ir3_optimize_nir():
82
    */
83
   bool progress = false;
84
   NIR_PASS(progress, ctx->s, nir_lower_locals_to_regs);
85

86
   /* we could need cleanup after lower_locals_to_regs */
87
   while (progress) {
88
      progress = false;
89
      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
90
      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
91
   }
92

93
   /* We want to lower nir_op_imul as late as possible, to catch also
94
    * those generated by earlier passes (e.g, nir_lower_locals_to_regs).
95
    * However, we want a final swing of a few passes to have a chance
96
    * at optimizing the result.
97
    */
98
   progress = false;
99
   NIR_PASS(progress, ctx->s, ir3_nir_lower_imul);
100
   while (progress) {
101
      progress = false;
102
      NIR_PASS(progress, ctx->s, nir_opt_algebraic);
103
      NIR_PASS(progress, ctx->s, nir_opt_copy_prop_vars);
104
      NIR_PASS(progress, ctx->s, nir_opt_dead_write_vars);
105
      NIR_PASS(progress, ctx->s, nir_opt_dce);
106
      NIR_PASS(progress, ctx->s, nir_opt_constant_folding);
107
   }
108

109
   /* Enable the texture pre-fetch feature only a4xx onwards.  But
110
    * only enable it on generations that have been tested:
111
    */
112
   if ((so->type == MESA_SHADER_FRAGMENT) && (compiler->gpu_id >= 600))
113
      NIR_PASS_V(ctx->s, ir3_nir_lower_tex_prefetch);
114

115
   NIR_PASS(progress, ctx->s, nir_lower_phis_to_scalar, true);
116

117
   /* Super crude heuristic to limit # of tex prefetch in small
118
    * shaders.  This completely ignores loops.. but that's really
119
    * not the worst of it's problems.  (A frag shader that has
120
    * loops is probably going to be big enough to not trigger a
121
    * lower threshold.)
122
    *
123
    *   1) probably want to do this in terms of ir3 instructions
124
    *   2) probably really want to decide this after scheduling
125
    *      (or at least pre-RA sched) so we have a rough idea about
126
    *      nops, and don't count things that get cp'd away
127
    *   3) blob seems to use higher thresholds with a mix of more
128
    *      SFU instructions.  Which partly makes sense, more SFU
129
    *      instructions probably means you want to get the real
130
    *      shader started sooner, but that considers where in the
131
    *      shader the SFU instructions are, which blob doesn't seem
132
    *      to do.
133
    *
134
    * This uses more conservative thresholds assuming a more alu
135
    * than sfu heavy instruction mix.
136
    */
137
   if (so->type == MESA_SHADER_FRAGMENT) {
138
      nir_function_impl *fxn = nir_shader_get_entrypoint(ctx->s);
139

140
      unsigned instruction_count = 0;
141
      nir_foreach_block (block, fxn) {
142
         instruction_count += exec_list_length(&block->instr_list);
143
      }
144

145
      if (instruction_count < 50) {
146
         ctx->prefetch_limit = 2;
147
      } else if (instruction_count < 70) {
148
         ctx->prefetch_limit = 3;
149
      } else {
150
         ctx->prefetch_limit = IR3_MAX_SAMPLER_PREFETCH;
151
      }
152
   }
153

154
   if (shader_debug_enabled(so->type)) {
155
      mesa_logi("NIR (final form) for %s shader %s:", ir3_shader_stage(so),
156
                so->shader->nir->info.name);
157
      nir_log_shaderi(ctx->s);
158
   }
159

160
   ir3_ibo_mapping_init(&so->image_mapping, ctx->s->info.num_textures);
161

162
   return ctx;
163
}
164

165
void
166
ir3_context_free(struct ir3_context *ctx)
167
{
168
   ralloc_free(ctx);
169
}
170

171
/*
172
 * Misc helpers
173
 */
174

175
/* allocate a n element value array (to be populated by caller) and
176
 * insert in def_ht
177
 */
178
struct ir3_instruction **
179
ir3_get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
180
{
181
   struct ir3_instruction **value =
182
      ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
183
   _mesa_hash_table_insert(ctx->def_ht, dst, value);
184
   return value;
185
}
186

187
struct ir3_instruction **
188
ir3_get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
189
{
190
   struct ir3_instruction **value;
191

192
   if (dst->is_ssa) {
193
      value = ir3_get_dst_ssa(ctx, &dst->ssa, n);
194
   } else {
195
      value = ralloc_array(ctx, struct ir3_instruction *, n);
196
   }
197

198
   /* NOTE: in non-ssa case, we don't really need to store last_dst
199
    * but this helps us catch cases where put_dst() call is forgotten
200
    */
201
   compile_assert(ctx, !ctx->last_dst);
202
   ctx->last_dst = value;
203
   ctx->last_dst_n = n;
204

205
   return value;
206
}
207

208
struct ir3_instruction *const *
209
ir3_get_src(struct ir3_context *ctx, nir_src *src)
210
{
211
   if (src->is_ssa) {
212
      struct hash_entry *entry;
213
      entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
214
      compile_assert(ctx, entry);
215
      return entry->data;
216
   } else {
217
      nir_register *reg = src->reg.reg;
218
      struct ir3_array *arr = ir3_get_array(ctx, reg);
219
      unsigned num_components = arr->r->num_components;
220
      struct ir3_instruction *addr = NULL;
221
      struct ir3_instruction **value =
222
         ralloc_array(ctx, struct ir3_instruction *, num_components);
223

224
      if (src->reg.indirect)
225
         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, src->reg.indirect)[0],
226
                              reg->num_components);
227

228
      for (unsigned i = 0; i < num_components; i++) {
229
         unsigned n = src->reg.base_offset * reg->num_components + i;
230
         compile_assert(ctx, n < arr->length);
231
         value[i] = ir3_create_array_load(ctx, arr, n, addr);
232
      }
233

234
      return value;
235
   }
236
}
237

238
void
239
ir3_put_dst(struct ir3_context *ctx, nir_dest *dst)
240
{
241
   unsigned bit_size = nir_dest_bit_size(*dst);
242

243
   /* add extra mov if dst value is shared reg.. in some cases not all
244
    * instructions can read from shared regs, in cases where they can
245
    * ir3_cp will clean up the extra mov:
246
    */
247
   for (unsigned i = 0; i < ctx->last_dst_n; i++) {
248
      if (!ctx->last_dst[i])
249
         continue;
250
      if (ctx->last_dst[i]->dsts[0]->flags & IR3_REG_SHARED) {
251
         ctx->last_dst[i] = ir3_MOV(ctx->block, ctx->last_dst[i], TYPE_U32);
252
      }
253
   }
254

255
   /* Note: 1-bit bools are stored in 32-bit regs */
256
   if (bit_size == 16) {
257
      for (unsigned i = 0; i < ctx->last_dst_n; i++) {
258
         struct ir3_instruction *dst = ctx->last_dst[i];
259
         ir3_set_dst_type(dst, true);
260
         ir3_fixup_src_type(dst);
261
         if (dst->opc == OPC_META_SPLIT) {
262
            ir3_set_dst_type(ssa(dst->srcs[0]), true);
263
            ir3_fixup_src_type(ssa(dst->srcs[0]));
264
            dst->srcs[0]->flags |= IR3_REG_HALF;
265
         }
266
      }
267
   }
268

269
   if (!dst->is_ssa) {
270
      nir_register *reg = dst->reg.reg;
271
      struct ir3_array *arr = ir3_get_array(ctx, reg);
272
      unsigned num_components = ctx->last_dst_n;
273
      struct ir3_instruction *addr = NULL;
274

275
      if (dst->reg.indirect)
276
         addr = ir3_get_addr0(ctx, ir3_get_src(ctx, dst->reg.indirect)[0],
277
                              reg->num_components);
278

279
      for (unsigned i = 0; i < num_components; i++) {
280
         unsigned n = dst->reg.base_offset * reg->num_components + i;
281
         compile_assert(ctx, n < arr->length);
282
         if (!ctx->last_dst[i])
283
            continue;
284
         ir3_create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
285
      }
286

287
      ralloc_free(ctx->last_dst);
288
   }
289

290
   ctx->last_dst = NULL;
291
   ctx->last_dst_n = 0;
292
}
293

294
static unsigned
295
dest_flags(struct ir3_instruction *instr)
296
{
297
   return instr->dsts[0]->flags & (IR3_REG_HALF | IR3_REG_SHARED);
298
}
299

300
struct ir3_instruction *
301
ir3_create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
302
                   unsigned arrsz)
303
{
304
   struct ir3_block *block = ctx->block;
305
   struct ir3_instruction *collect;
306

307
   if (arrsz == 0)
308
      return NULL;
309

310
   unsigned flags = dest_flags(arr[0]);
311

312
   collect = ir3_instr_create(block, OPC_META_COLLECT, 1, arrsz);
313
   __ssa_dst(collect)->flags |= flags;
314
   for (unsigned i = 0; i < arrsz; i++) {
315
      struct ir3_instruction *elem = arr[i];
316

317
      /* Since arrays are pre-colored in RA, we can't assume that
318
       * things will end up in the right place.  (Ie. if a collect
319
       * joins elements from two different arrays.)  So insert an
320
       * extra mov.
321
       *
322
       * We could possibly skip this if all the collected elements
323
       * are contiguous elements in a single array.. not sure how
324
       * likely that is to happen.
325
       *
326
       * Fixes a problem with glamor shaders, that in effect do
327
       * something like:
328
       *
329
       *   if (foo)
330
       *     texcoord = ..
331
       *   else
332
       *     texcoord = ..
333
       *   color = texture2D(tex, texcoord);
334
       *
335
       * In this case, texcoord will end up as nir registers (which
336
       * translate to ir3 array's of length 1.  And we can't assume
337
       * the two (or more) arrays will get allocated in consecutive
338
       * scalar registers.
339
       *
340
       */
341
      if (elem->dsts[0]->flags & IR3_REG_ARRAY) {
342
         type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
343
         elem = ir3_MOV(block, elem, type);
344
      }
345

346
      compile_assert(ctx, dest_flags(elem) == flags);
347
      __ssa_src(collect, elem, flags);
348
   }
349

350
   collect->dsts[0]->wrmask = MASK(arrsz);
351

352
   return collect;
353
}
354

355
/* helper for instructions that produce multiple consecutive scalar
356
 * outputs which need to have a split meta instruction inserted
357
 */
358
void
359
ir3_split_dest(struct ir3_block *block, struct ir3_instruction **dst,
360
               struct ir3_instruction *src, unsigned base, unsigned n)
361
{
362
   if ((n == 1) && (src->dsts[0]->wrmask == 0x1) &&
363
       /* setup_input needs ir3_split_dest to generate a SPLIT instruction */
364
       src->opc != OPC_META_INPUT) {
365
      dst[0] = src;
366
      return;
367
   }
368

369
   if (src->opc == OPC_META_COLLECT) {
370
      debug_assert((base + n) <= src->srcs_count);
371

372
      for (int i = 0; i < n; i++) {
373
         dst[i] = ssa(src->srcs[i + base]);
374
      }
375

376
      return;
377
   }
378

379
   unsigned flags = dest_flags(src);
380

381
   for (int i = 0, j = 0; i < n; i++) {
382
      struct ir3_instruction *split =
383
         ir3_instr_create(block, OPC_META_SPLIT, 1, 1);
384
      __ssa_dst(split)->flags |= flags;
385
      __ssa_src(split, src, flags);
386
      split->split.off = i + base;
387

388
      if (src->dsts[0]->wrmask & (1 << (i + base)))
389
         dst[j++] = split;
390
   }
391
}
392

393
NORETURN void
394
ir3_context_error(struct ir3_context *ctx, const char *format, ...)
395
{
396
   struct hash_table *errors = NULL;
397
   va_list ap;
398
   va_start(ap, format);
399
   if (ctx->cur_instr) {
400
      errors = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
401
                                       _mesa_key_pointer_equal);
402
      char *msg = ralloc_vasprintf(errors, format, ap);
403
      _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
404
   } else {
405
      mesa_loge_v(format, ap);
406
   }
407
   va_end(ap);
408
   nir_log_shader_annotated(ctx->s, errors);
409
   ralloc_free(errors);
410
   ctx->error = true;
411
   unreachable("");
412
}
413

414
static struct ir3_instruction *
415
create_addr0(struct ir3_block *block, struct ir3_instruction *src, int align)
416
{
417
   struct ir3_instruction *instr, *immed;
418

419
   instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
420

421
   switch (align) {
422
   case 1:
423
      /* src *= 1: */
424
      break;
425
   case 2:
426
      /* src *= 2	=> src <<= 1: */
427
      immed = create_immed_typed(block, 1, TYPE_S16);
428
      instr = ir3_SHL_B(block, instr, 0, immed, 0);
429
      break;
430
   case 3:
431
      /* src *= 3: */
432
      immed = create_immed_typed(block, 3, TYPE_S16);
433
      instr = ir3_MULL_U(block, instr, 0, immed, 0);
434
      break;
435
   case 4:
436
      /* src *= 4 => src <<= 2: */
437
      immed = create_immed_typed(block, 2, TYPE_S16);
438
      instr = ir3_SHL_B(block, instr, 0, immed, 0);
439
      break;
440
   default:
441
      unreachable("bad align");
442
      return NULL;
443
   }
444

445
   instr->dsts[0]->flags |= IR3_REG_HALF;
446

447
   instr = ir3_MOV(block, instr, TYPE_S16);
448
   instr->dsts[0]->num = regid(REG_A0, 0);
449

450
   return instr;
451
}
452

453
static struct ir3_instruction *
454
create_addr1(struct ir3_block *block, unsigned const_val)
455
{
456
   struct ir3_instruction *immed =
457
      create_immed_typed(block, const_val, TYPE_U16);
458
   struct ir3_instruction *instr = ir3_MOV(block, immed, TYPE_U16);
459
   instr->dsts[0]->num = regid(REG_A0, 1);
460
   return instr;
461
}
462

463
/* caches addr values to avoid generating multiple cov/shl/mova
464
 * sequences for each use of a given NIR level src as address
465
 */
466
struct ir3_instruction *
467
ir3_get_addr0(struct ir3_context *ctx, struct ir3_instruction *src, int align)
468
{
469
   struct ir3_instruction *addr;
470
   unsigned idx = align - 1;
471

472
   compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr0_ht));
473

474
   if (!ctx->addr0_ht[idx]) {
475
      ctx->addr0_ht[idx] = _mesa_hash_table_create(ctx, _mesa_hash_pointer,
476
                                                   _mesa_key_pointer_equal);
477
   } else {
478
      struct hash_entry *entry;
479
      entry = _mesa_hash_table_search(ctx->addr0_ht[idx], src);
480
      if (entry)
481
         return entry->data;
482
   }
483

484
   addr = create_addr0(ctx->block, src, align);
485
   _mesa_hash_table_insert(ctx->addr0_ht[idx], src, addr);
486

487
   return addr;
488
}
489

490
/* Similar to ir3_get_addr0, but for a1.x. */
491
struct ir3_instruction *
492
ir3_get_addr1(struct ir3_context *ctx, unsigned const_val)
493
{
494
   struct ir3_instruction *addr;
495

496
   if (!ctx->addr1_ht) {
497
      ctx->addr1_ht = _mesa_hash_table_u64_create(ctx);
498
   } else {
499
      addr = _mesa_hash_table_u64_search(ctx->addr1_ht, const_val);
500
      if (addr)
501
         return addr;
502
   }
503

504
   addr = create_addr1(ctx->block, const_val);
505
   _mesa_hash_table_u64_insert(ctx->addr1_ht, const_val, addr);
506

507
   return addr;
508
}
509

510
struct ir3_instruction *
511
ir3_get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
512
{
513
   struct ir3_block *b = ctx->block;
514
   struct ir3_instruction *cond;
515

516
   /* NOTE: only cmps.*.* can write p0.x: */
517
   cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
518
   cond->cat2.condition = IR3_COND_NE;
519

520
   /* condition always goes in predicate register: */
521
   cond->dsts[0]->num = regid(REG_P0, 0);
522
   cond->dsts[0]->flags &= ~IR3_REG_SSA;
523

524
   return cond;
525
}
526

527
/*
528
 * Array helpers
529
 */
530

531
void
532
ir3_declare_array(struct ir3_context *ctx, nir_register *reg)
533
{
534
   struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
535
   arr->id = ++ctx->num_arrays;
536
   /* NOTE: sometimes we get non array regs, for example for arrays of
537
    * length 1.  See fs-const-array-of-struct-of-array.shader_test.  So
538
    * treat a non-array as if it was an array of length 1.
539
    *
540
    * It would be nice if there was a nir pass to convert arrays of
541
    * length 1 to ssa.
542
    */
543
   arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
544
   compile_assert(ctx, arr->length > 0);
545
   arr->r = reg;
546
   arr->half = reg->bit_size <= 16;
547
   // HACK one-bit bools still end up as 32b:
548
   if (reg->bit_size == 1)
549
      arr->half = false;
550
   list_addtail(&arr->node, &ctx->ir->array_list);
551
}
552

553
struct ir3_array *
554
ir3_get_array(struct ir3_context *ctx, nir_register *reg)
555
{
556
   foreach_array (arr, &ctx->ir->array_list) {
557
      if (arr->r == reg)
558
         return arr;
559
   }
560
   ir3_context_error(ctx, "bogus reg: r%d\n", reg->index);
561
   return NULL;
562
}
563

564
/* relative (indirect) if address!=NULL */
565
struct ir3_instruction *
566
ir3_create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
567
                      struct ir3_instruction *address)
568
{
569
   struct ir3_block *block = ctx->block;
570
   struct ir3_instruction *mov;
571
   struct ir3_register *src;
572
   unsigned flags = 0;
573

574
   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
575
   if (arr->half) {
576
      mov->cat1.src_type = TYPE_U16;
577
      mov->cat1.dst_type = TYPE_U16;
578
      flags |= IR3_REG_HALF;
579
   } else {
580
      mov->cat1.src_type = TYPE_U32;
581
      mov->cat1.dst_type = TYPE_U32;
582
   }
583

584
   mov->barrier_class = IR3_BARRIER_ARRAY_R;
585
   mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
586
   __ssa_dst(mov)->flags |= flags;
587
   src = ir3_src_create(mov, 0,
588
                        IR3_REG_ARRAY | COND(address, IR3_REG_RELATIV) | flags);
589
   src->def = (arr->last_write && arr->last_write->instr->block == block)
590
                 ? arr->last_write
591
                 : NULL;
592
   src->size = arr->length;
593
   src->array.id = arr->id;
594
   src->array.offset = n;
595
   src->array.base = INVALID_REG;
596

597
   if (address)
598
      ir3_instr_set_address(mov, address);
599

600
   return mov;
601
}
602

603
/* relative (indirect) if address!=NULL */
604
void
605
ir3_create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
606
                       struct ir3_instruction *src,
607
                       struct ir3_instruction *address)
608
{
609
   struct ir3_block *block = ctx->block;
610
   struct ir3_instruction *mov;
611
   struct ir3_register *dst;
612
   unsigned flags = 0;
613

614
   /* if not relative store, don't create an extra mov, since that
615
    * ends up being difficult for cp to remove.
616
    *
617
    * Also, don't skip the mov if the src is meta (like fanout/split),
618
    * since that creates a situation that RA can't really handle properly.
619
    */
620
   if (!address && !is_meta(src)) {
621
      dst = src->dsts[0];
622

623
      src->barrier_class |= IR3_BARRIER_ARRAY_W;
624
      src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
625

626
      dst->flags |= IR3_REG_ARRAY;
627
      dst->size = arr->length;
628
      dst->array.id = arr->id;
629
      dst->array.offset = n;
630
      dst->array.base = INVALID_REG;
631

632
      if (arr->last_write && arr->last_write->instr->block == src->block)
633
         ir3_reg_set_last_array(src, dst, arr->last_write);
634

635
      arr->last_write = dst;
636

637
      array_insert(block, block->keeps, src);
638

639
      return;
640
   }
641

642
   mov = ir3_instr_create(block, OPC_MOV, 1, 1);
643
   if (arr->half) {
644
      mov->cat1.src_type = TYPE_U16;
645
      mov->cat1.dst_type = TYPE_U16;
646
      flags |= IR3_REG_HALF;
647
   } else {
648
      mov->cat1.src_type = TYPE_U32;
649
      mov->cat1.dst_type = TYPE_U32;
650
   }
651
   mov->barrier_class = IR3_BARRIER_ARRAY_W;
652
   mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
653
   dst = ir3_dst_create(
654
      mov, 0,
655
      IR3_REG_SSA | IR3_REG_ARRAY | flags | COND(address, IR3_REG_RELATIV));
656
   dst->instr = mov;
657
   dst->size = arr->length;
658
   dst->array.id = arr->id;
659
   dst->array.offset = n;
660
   dst->array.base = INVALID_REG;
661
   ir3_src_create(mov, 0, IR3_REG_SSA | flags)->def = src->dsts[0];
662

663
   if (arr->last_write && arr->last_write->instr->block == block)
664
      ir3_reg_set_last_array(mov, dst, arr->last_write);
665

666
   if (address)
667
      ir3_instr_set_address(mov, address);
668

669
   arr->last_write = dst;
670

671
   /* the array store may only matter to something in an earlier
672
    * block (ie. loops), but since arrays are not in SSA, depth
673
    * pass won't know this.. so keep all array stores:
674
    */
675
   array_insert(block, block->keeps, mov);
676
}
677

678
Product

Resources

Company