Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3.h
4565 views
1
/*
2
* Copyright (c) 2013 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#ifndef IR3_H_
25
#define IR3_H_
26
27
#include <stdbool.h>
28
#include <stdint.h>
29
30
#include "compiler/shader_enums.h"
31
32
#include "util/bitscan.h"
33
#include "util/list.h"
34
#include "util/set.h"
35
#include "util/u_debug.h"
36
37
#include "instr-a3xx.h"
38
39
/* low level intermediate representation of an adreno shader program */
40
41
struct ir3_compiler;
42
struct ir3;
43
struct ir3_instruction;
44
struct ir3_block;
45
46
struct ir3_info {
47
void *data; /* used internally in ir3 assembler */
48
/* Size in bytes of the shader binary, including NIR constants and
49
* padding
50
*/
51
uint32_t size;
52
/* byte offset from start of the shader to the NIR constant data. */
53
uint32_t constant_data_offset;
54
/* Size in dwords of the instructions. */
55
uint16_t sizedwords;
56
uint16_t instrs_count; /* expanded to account for rpt's */
57
uint16_t nops_count; /* # of nop instructions, including nopN */
58
uint16_t mov_count;
59
uint16_t cov_count;
60
/* NOTE: max_reg, etc, does not include registers not touched
61
* by the shader (ie. vertex fetched via VFD_DECODE but not
62
* touched by shader)
63
*/
64
int8_t max_reg; /* highest GPR # used by shader */
65
int8_t max_half_reg;
66
int16_t max_const;
67
/* This is the maximum # of waves that can executed at once in one core,
68
* assuming that they are all executing this shader.
69
*/
70
int8_t max_waves;
71
bool double_threadsize;
72
bool multi_dword_ldp_stp;
73
74
/* number of sync bits: */
75
uint16_t ss, sy;
76
77
/* estimate of number of cycles stalled on (ss) */
78
uint16_t sstall;
79
80
uint16_t last_baryf; /* instruction # of last varying fetch */
81
82
/* Number of instructions of a given category: */
83
uint16_t instrs_per_cat[8];
84
};
85
86
struct ir3_merge_set {
87
uint16_t preferred_reg;
88
uint16_t size;
89
uint16_t alignment;
90
91
unsigned interval_start;
92
93
unsigned regs_count;
94
struct ir3_register **regs;
95
};
96
97
struct ir3_register {
98
enum {
99
IR3_REG_CONST = 0x001,
100
IR3_REG_IMMED = 0x002,
101
IR3_REG_HALF = 0x004,
102
/* Shared registers have the same value for all threads when read.
103
* They can only be written when one thread is active (that is, inside
104
* a "getone" block).
105
*/
106
IR3_REG_SHARED = 0x008,
107
IR3_REG_RELATIV = 0x010,
108
IR3_REG_R = 0x020,
109
/* Most instructions, it seems, can do float abs/neg but not
110
* integer. The CP pass needs to know what is intended (int or
111
* float) in order to do the right thing. For this reason the
112
* abs/neg flags are split out into float and int variants. In
113
* addition, .b (bitwise) operations, the negate is actually a
114
* bitwise not, so split that out into a new flag to make it
115
* more clear.
116
*/
117
IR3_REG_FNEG = 0x040,
118
IR3_REG_FABS = 0x080,
119
IR3_REG_SNEG = 0x100,
120
IR3_REG_SABS = 0x200,
121
IR3_REG_BNOT = 0x400,
122
/* (ei) flag, end-input? Set on last bary, presumably to signal
123
* that the shader needs no more input:
124
*/
125
IR3_REG_EI = 0x2000,
126
/* meta-flags, for intermediate stages of IR, ie.
127
* before register assignment is done:
128
*/
129
IR3_REG_SSA = 0x4000, /* 'instr' is ptr to assigning instr */
130
IR3_REG_ARRAY = 0x8000,
131
132
IR3_REG_KILL = 0x10000,
133
IR3_REG_FIRST_KILL = 0x20000,
134
IR3_REG_UNUSED = 0x40000,
135
} flags;
136
137
/* used for cat5 instructions, but also for internal/IR level
138
* tracking of what registers are read/written by an instruction.
139
* wrmask may be a bad name since it is used to represent both
140
* src and dst that touch multiple adjacent registers.
141
*/
142
unsigned wrmask : 16; /* up to vec16 */
143
144
/* for relative addressing, 32bits for array size is too small,
145
* but otoh we don't need to deal with disjoint sets, so instead
146
* use a simple size field (number of scalar components).
147
*
148
* Note the size field isn't important for relative const (since
149
* we don't have to do register allocation for constants).
150
*/
151
unsigned size : 16;
152
153
/* normal registers:
154
* the component is in the low two bits of the reg #, so
155
* rN.x becomes: (N << 2) | x
156
*/
157
uint16_t num;
158
uint16_t name;
159
union {
160
/* immediate: */
161
int32_t iim_val;
162
uint32_t uim_val;
163
float fim_val;
164
/* relative: */
165
struct {
166
uint16_t id;
167
int16_t offset;
168
uint16_t base;
169
} array;
170
};
171
172
/* For IR3_REG_DEST, pointer back to the instruction containing this
173
* register.
174
*/
175
struct ir3_instruction *instr;
176
177
/* For IR3_REG_SSA, src registers contain ptr back to assigning
178
* instruction.
179
*
180
* For IR3_REG_ARRAY, the pointer is back to the last dependent
181
* array access (although the net effect is the same, it points
182
* back to a previous instruction that we depend on).
183
*/
184
struct ir3_register *def;
185
186
/* Pointer to another register in the instruction that must share the same
187
* physical register. Each destination can be tied with one source, and
188
* they must have "tied" pointing to each other.
189
*/
190
struct ir3_register *tied;
191
192
unsigned merge_set_offset;
193
struct ir3_merge_set *merge_set;
194
unsigned interval_start, interval_end;
195
};
196
197
/*
198
* Stupid/simple growable array implementation:
199
*/
200
#define DECLARE_ARRAY(type, name) \
201
unsigned name##_count, name##_sz; \
202
type *name;
203
204
#define array_insert(ctx, arr, ...) \
205
do { \
206
if (arr##_count == arr##_sz) { \
207
arr##_sz = MAX2(2 * arr##_sz, 16); \
208
arr = reralloc_size(ctx, arr, arr##_sz * sizeof(arr[0])); \
209
} \
210
arr[arr##_count++] = __VA_ARGS__; \
211
} while (0)
212
213
struct ir3_instruction {
214
struct ir3_block *block;
215
opc_t opc;
216
enum {
217
/* (sy) flag is set on first instruction, and after sample
218
* instructions (probably just on RAW hazard).
219
*/
220
IR3_INSTR_SY = 0x001,
221
/* (ss) flag is set on first instruction, and first instruction
222
* to depend on the result of "long" instructions (RAW hazard):
223
*
224
* rcp, rsq, log2, exp2, sin, cos, sqrt
225
*
226
* It seems to synchronize until all in-flight instructions are
227
* completed, for example:
228
*
229
* rsq hr1.w, hr1.w
230
* add.f hr2.z, (neg)hr2.z, hc0.y
231
* mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
232
* rsq hr2.x, hr2.x
233
* (rpt1)nop
234
* mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
235
* nop
236
* mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
237
* (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
238
* (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
239
*
240
* The last mul.f does not have (ss) set, presumably because the
241
* (ss) on the previous instruction does the job.
242
*
243
* The blob driver also seems to set it on WAR hazards, although
244
* not really clear if this is needed or just blob compiler being
245
* sloppy. So far I haven't found a case where removing the (ss)
246
* causes problems for WAR hazard, but I could just be getting
247
* lucky:
248
*
249
* rcp r1.y, r3.y
250
* (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
251
*
252
*/
253
IR3_INSTR_SS = 0x002,
254
/* (jp) flag is set on jump targets:
255
*/
256
IR3_INSTR_JP = 0x004,
257
IR3_INSTR_UL = 0x008,
258
IR3_INSTR_3D = 0x010,
259
IR3_INSTR_A = 0x020,
260
IR3_INSTR_O = 0x040,
261
IR3_INSTR_P = 0x080,
262
IR3_INSTR_S = 0x100,
263
IR3_INSTR_S2EN = 0x200,
264
IR3_INSTR_G = 0x400,
265
IR3_INSTR_SAT = 0x800,
266
/* (cat5/cat6) Bindless */
267
IR3_INSTR_B = 0x1000,
268
/* (cat5/cat6) nonuniform */
269
IR3_INSTR_NONUNIF = 0x02000,
270
/* (cat5-only) Get some parts of the encoding from a1.x */
271
IR3_INSTR_A1EN = 0x04000,
272
/* meta-flags, for intermediate stages of IR, ie.
273
* before register assignment is done:
274
*/
275
IR3_INSTR_MARK = 0x08000,
276
IR3_INSTR_UNUSED = 0x10000,
277
} flags;
278
uint8_t repeat;
279
uint8_t nop;
280
#ifdef DEBUG
281
unsigned srcs_max, dsts_max;
282
#endif
283
unsigned srcs_count, dsts_count;
284
struct ir3_register **dsts;
285
struct ir3_register **srcs;
286
union {
287
struct {
288
char inv1, inv2;
289
char comp1, comp2;
290
int immed;
291
struct ir3_block *target;
292
const char *target_label;
293
brtype_t brtype;
294
unsigned idx; /* for brac.N */
295
} cat0;
296
struct {
297
type_t src_type, dst_type;
298
round_t round;
299
} cat1;
300
struct {
301
enum {
302
IR3_COND_LT = 0,
303
IR3_COND_LE = 1,
304
IR3_COND_GT = 2,
305
IR3_COND_GE = 3,
306
IR3_COND_EQ = 4,
307
IR3_COND_NE = 5,
308
} condition;
309
} cat2;
310
struct {
311
unsigned samp, tex;
312
unsigned tex_base : 3;
313
type_t type;
314
} cat5;
315
struct {
316
type_t type;
317
/* TODO remove dst_offset and handle as a ir3_register
318
* which might be IMMED, similar to how src_offset is
319
* handled.
320
*/
321
int dst_offset;
322
int iim_val : 3; /* for ldgb/stgb, # of components */
323
unsigned d : 3; /* for ldc, component offset */
324
bool typed : 1;
325
unsigned base : 3;
326
} cat6;
327
struct {
328
unsigned w : 1; /* write */
329
unsigned r : 1; /* read */
330
unsigned l : 1; /* local */
331
unsigned g : 1; /* global */
332
} cat7;
333
/* for meta-instructions, just used to hold extra data
334
* before instruction scheduling, etc
335
*/
336
struct {
337
int off; /* component/offset */
338
} split;
339
struct {
340
/* Per-source index back to the entry in the
341
* ir3_shader_variant::outputs table.
342
*/
343
unsigned *outidxs;
344
} end;
345
struct {
346
/* used to temporarily hold reference to nir_phi_instr
347
* until we resolve the phi srcs
348
*/
349
void *nphi;
350
} phi;
351
struct {
352
unsigned samp, tex;
353
unsigned input_offset;
354
unsigned samp_base : 3;
355
unsigned tex_base : 3;
356
} prefetch;
357
struct {
358
/* maps back to entry in ir3_shader_variant::inputs table: */
359
int inidx;
360
/* for sysvals, identifies the sysval type. Mostly so we can
361
* identify the special cases where a sysval should not be DCE'd
362
* (currently, just pre-fs texture fetch)
363
*/
364
gl_system_value sysval;
365
} input;
366
};
367
368
/* When we get to the RA stage, we need instruction's position/name: */
369
uint16_t ip;
370
uint16_t name;
371
372
/* used for per-pass extra instruction data.
373
*
374
* TODO we should remove the per-pass data like this and 'use_count'
375
* and do something similar to what RA does w/ ir3_ra_instr_data..
376
* ie. use the ir3_count_instructions pass, and then use instr->ip
377
* to index into a table of pass-private data.
378
*/
379
void *data;
380
381
/**
382
* Valid if pass calls ir3_find_ssa_uses().. see foreach_ssa_use()
383
*/
384
struct set *uses;
385
386
int use_count; /* currently just updated/used by cp */
387
388
/* an instruction can reference at most one address register amongst
389
* it's src/dst registers. Beyond that, you need to insert mov's.
390
*
391
* NOTE: do not write this directly, use ir3_instr_set_address()
392
*/
393
struct ir3_register *address;
394
395
/* Tracking for additional dependent instructions. Used to handle
396
* barriers, WAR hazards for arrays/SSBOs/etc.
397
*/
398
DECLARE_ARRAY(struct ir3_instruction *, deps);
399
400
/*
401
* From PoV of instruction scheduling, not execution (ie. ignores global/
402
* local distinction):
403
* shared image atomic SSBO everything
404
* barrier()/ - R/W R/W R/W R/W X
405
* groupMemoryBarrier()
406
* memoryBarrier()
407
* (but only images declared coherent?)
408
* memoryBarrierAtomic() - R/W
409
* memoryBarrierBuffer() - R/W
410
* memoryBarrierImage() - R/W
411
* memoryBarrierShared() - R/W
412
*
413
* TODO I think for SSBO/image/shared, in cases where we can determine
414
* which variable is accessed, we don't need to care about accesses to
415
* different variables (unless declared coherent??)
416
*/
417
enum {
418
IR3_BARRIER_EVERYTHING = 1 << 0,
419
IR3_BARRIER_SHARED_R = 1 << 1,
420
IR3_BARRIER_SHARED_W = 1 << 2,
421
IR3_BARRIER_IMAGE_R = 1 << 3,
422
IR3_BARRIER_IMAGE_W = 1 << 4,
423
IR3_BARRIER_BUFFER_R = 1 << 5,
424
IR3_BARRIER_BUFFER_W = 1 << 6,
425
IR3_BARRIER_ARRAY_R = 1 << 7,
426
IR3_BARRIER_ARRAY_W = 1 << 8,
427
IR3_BARRIER_PRIVATE_R = 1 << 9,
428
IR3_BARRIER_PRIVATE_W = 1 << 10,
429
} barrier_class,
430
barrier_conflict;
431
432
/* Entry in ir3_block's instruction list: */
433
struct list_head node;
434
435
uint32_t serialno;
436
437
// TODO only computerator/assembler:
438
int line;
439
};
440
441
struct ir3 {
442
struct ir3_compiler *compiler;
443
gl_shader_stage type;
444
445
DECLARE_ARRAY(struct ir3_instruction *, inputs);
446
447
/* Track bary.f (and ldlv) instructions.. this is needed in
448
* scheduling to ensure that all varying fetches happen before
449
* any potential kill instructions. The hw gets grumpy if all
450
* threads in a group are killed before the last bary.f gets
451
* a chance to signal end of input (ei).
452
*/
453
DECLARE_ARRAY(struct ir3_instruction *, baryfs);
454
455
/* Track all indirect instructions (read and write). To avoid
456
* deadlock scenario where an address register gets scheduled,
457
* but other dependent src instructions cannot be scheduled due
458
* to dependency on a *different* address register value, the
459
* scheduler needs to ensure that all dependencies other than
460
* the instruction other than the address register are scheduled
461
* before the one that writes the address register. Having a
462
* convenient list of instructions that reference some address
463
* register simplifies this.
464
*/
465
DECLARE_ARRAY(struct ir3_instruction *, a0_users);
466
467
/* same for a1.x: */
468
DECLARE_ARRAY(struct ir3_instruction *, a1_users);
469
470
/* and same for instructions that consume predicate register: */
471
DECLARE_ARRAY(struct ir3_instruction *, predicates);
472
473
/* Track texture sample instructions which need texture state
474
* patched in (for astc-srgb workaround):
475
*/
476
DECLARE_ARRAY(struct ir3_instruction *, astc_srgb);
477
478
/* List of blocks: */
479
struct list_head block_list;
480
481
/* List of ir3_array's: */
482
struct list_head array_list;
483
484
#ifdef DEBUG
485
unsigned block_count;
486
#endif
487
unsigned instr_count;
488
};
489
490
struct ir3_array {
491
struct list_head node;
492
unsigned length;
493
unsigned id;
494
495
struct nir_register *r;
496
497
/* To avoid array write's from getting DCE'd, keep track of the
498
* most recent write. Any array access depends on the most
499
* recent write. This way, nothing depends on writes after the
500
* last read. But all the writes that happen before that have
501
* something depending on them
502
*/
503
struct ir3_register *last_write;
504
505
/* extra stuff used in RA pass: */
506
unsigned base; /* base vreg name */
507
unsigned reg; /* base physical reg */
508
uint16_t start_ip, end_ip;
509
510
/* Indicates if half-precision */
511
bool half;
512
513
bool unused;
514
};
515
516
struct ir3_array *ir3_lookup_array(struct ir3 *ir, unsigned id);
517
518
enum ir3_branch_type {
519
IR3_BRANCH_COND, /* condition */
520
IR3_BRANCH_ANY, /* subgroupAny(condition) */
521
IR3_BRANCH_ALL, /* subgroupAll(condition) */
522
IR3_BRANCH_GETONE, /* subgroupElect() */
523
};
524
525
struct ir3_block {
526
struct list_head node;
527
struct ir3 *shader;
528
529
const struct nir_block *nblock;
530
531
struct list_head instr_list; /* list of ir3_instruction */
532
533
/* The actual branch condition, if there are two successors */
534
enum ir3_branch_type brtype;
535
536
/* each block has either one or two successors.. in case of two
537
* successors, 'condition' decides which one to follow. A block preceding
538
* an if/else has two successors.
539
*
540
* In some cases the path that the machine actually takes through the
541
* program may not match the per-thread view of the CFG. In particular
542
* this is the case for if/else, where the machine jumps from the end of
543
* the if to the beginning of the else and switches active lanes. While
544
* most things only care about the per-thread view, we need to use the
545
* "physical" view when allocating shared registers. "successors" contains
546
* the per-thread successors, and "physical_successors" contains the
547
* physical successors which includes the fallthrough edge from the if to
548
* the else.
549
*/
550
struct ir3_instruction *condition;
551
struct ir3_block *successors[2];
552
struct ir3_block *physical_successors[2];
553
554
DECLARE_ARRAY(struct ir3_block *, predecessors);
555
DECLARE_ARRAY(struct ir3_block *, physical_predecessors);
556
557
uint16_t start_ip, end_ip;
558
559
/* Track instructions which do not write a register but other-
560
* wise must not be discarded (such as kill, stg, etc)
561
*/
562
DECLARE_ARRAY(struct ir3_instruction *, keeps);
563
564
/* used for per-pass extra block data. Mainly used right
565
* now in RA step to track livein/liveout.
566
*/
567
void *data;
568
569
uint32_t index;
570
571
struct ir3_block *imm_dom;
572
DECLARE_ARRAY(struct ir3_block *, dom_children);
573
574
uint32_t dom_pre_index;
575
uint32_t dom_post_index;
576
577
uint32_t loop_id;
578
579
#ifdef DEBUG
580
uint32_t serialno;
581
#endif
582
};
583
584
static inline uint32_t
585
block_id(struct ir3_block *block)
586
{
587
#ifdef DEBUG
588
return block->serialno;
589
#else
590
return (uint32_t)(unsigned long)block;
591
#endif
592
}
593
594
static inline struct ir3_block *
595
ir3_start_block(struct ir3 *ir)
596
{
597
return list_first_entry(&ir->block_list, struct ir3_block, node);
598
}
599
600
void ir3_block_add_predecessor(struct ir3_block *block, struct ir3_block *pred);
601
void ir3_block_add_physical_predecessor(struct ir3_block *block,
602
struct ir3_block *pred);
603
void ir3_block_remove_predecessor(struct ir3_block *block,
604
struct ir3_block *pred);
605
unsigned ir3_block_get_pred_index(struct ir3_block *block,
606
struct ir3_block *pred);
607
608
void ir3_calc_dominance(struct ir3 *ir);
609
bool ir3_block_dominates(struct ir3_block *a, struct ir3_block *b);
610
611
struct ir3_shader_variant;
612
613
struct ir3 *ir3_create(struct ir3_compiler *compiler,
614
struct ir3_shader_variant *v);
615
void ir3_destroy(struct ir3 *shader);
616
617
void ir3_collect_info(struct ir3_shader_variant *v);
618
void *ir3_alloc(struct ir3 *shader, int sz);
619
620
unsigned ir3_get_reg_dependent_max_waves(const struct ir3_compiler *compiler,
621
unsigned reg_count,
622
bool double_threadsize);
623
624
unsigned ir3_get_reg_independent_max_waves(struct ir3_shader_variant *v,
625
bool double_threadsize);
626
627
bool ir3_should_double_threadsize(struct ir3_shader_variant *v,
628
unsigned regs_count);
629
630
struct ir3_block *ir3_block_create(struct ir3 *shader);
631
632
struct ir3_instruction *ir3_instr_create(struct ir3_block *block, opc_t opc,
633
int ndst, int nsrc);
634
struct ir3_instruction *ir3_instr_clone(struct ir3_instruction *instr);
635
void ir3_instr_add_dep(struct ir3_instruction *instr,
636
struct ir3_instruction *dep);
637
const char *ir3_instr_name(struct ir3_instruction *instr);
638
639
struct ir3_register *ir3_src_create(struct ir3_instruction *instr, int num,
640
int flags);
641
struct ir3_register *ir3_dst_create(struct ir3_instruction *instr, int num,
642
int flags);
643
struct ir3_register *ir3_reg_clone(struct ir3 *shader,
644
struct ir3_register *reg);
645
646
static inline void
647
ir3_reg_tie(struct ir3_register *dst, struct ir3_register *src)
648
{
649
assert(!dst->tied && !src->tied);
650
dst->tied = src;
651
src->tied = dst;
652
}
653
654
void ir3_reg_set_last_array(struct ir3_instruction *instr,
655
struct ir3_register *reg,
656
struct ir3_register *last_write);
657
658
void ir3_instr_set_address(struct ir3_instruction *instr,
659
struct ir3_instruction *addr);
660
661
static inline bool
662
ir3_instr_check_mark(struct ir3_instruction *instr)
663
{
664
if (instr->flags & IR3_INSTR_MARK)
665
return true; /* already visited */
666
instr->flags |= IR3_INSTR_MARK;
667
return false;
668
}
669
670
void ir3_block_clear_mark(struct ir3_block *block);
671
void ir3_clear_mark(struct ir3 *shader);
672
673
unsigned ir3_count_instructions(struct ir3 *ir);
674
unsigned ir3_count_instructions_ra(struct ir3 *ir);
675
676
/**
677
* Move 'instr' to just before 'after'
678
*/
679
static inline void
680
ir3_instr_move_before(struct ir3_instruction *instr,
681
struct ir3_instruction *after)
682
{
683
list_delinit(&instr->node);
684
list_addtail(&instr->node, &after->node);
685
}
686
687
/**
688
* Move 'instr' to just after 'before':
689
*/
690
static inline void
691
ir3_instr_move_after(struct ir3_instruction *instr,
692
struct ir3_instruction *before)
693
{
694
list_delinit(&instr->node);
695
list_add(&instr->node, &before->node);
696
}
697
698
void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps);
699
700
void ir3_set_dst_type(struct ir3_instruction *instr, bool half);
701
void ir3_fixup_src_type(struct ir3_instruction *instr);
702
703
int ir3_flut(struct ir3_register *src_reg);
704
705
bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags);
706
707
#include "util/set.h"
708
#define foreach_ssa_use(__use, __instr) \
709
for (struct ir3_instruction *__use = (void *)~0; __use && (__instr)->uses; \
710
__use = NULL) \
711
set_foreach ((__instr)->uses, __entry) \
712
if ((__use = (void *)__entry->key))
713
714
static inline uint32_t
715
reg_num(const struct ir3_register *reg)
716
{
717
return reg->num >> 2;
718
}
719
720
static inline uint32_t
721
reg_comp(const struct ir3_register *reg)
722
{
723
return reg->num & 0x3;
724
}
725
726
static inline bool
727
is_flow(struct ir3_instruction *instr)
728
{
729
return (opc_cat(instr->opc) == 0);
730
}
731
732
static inline bool
733
is_kill_or_demote(struct ir3_instruction *instr)
734
{
735
return instr->opc == OPC_KILL || instr->opc == OPC_DEMOTE;
736
}
737
738
static inline bool
739
is_nop(struct ir3_instruction *instr)
740
{
741
return instr->opc == OPC_NOP;
742
}
743
744
static inline bool
745
is_same_type_reg(struct ir3_register *dst, struct ir3_register *src)
746
{
747
unsigned dst_type = (dst->flags & IR3_REG_HALF);
748
unsigned src_type = (src->flags & IR3_REG_HALF);
749
750
/* Treat shared->normal copies as same-type, because they can generally be
751
* folded, but not normal->shared copies.
752
*/
753
if (dst_type != src_type ||
754
((dst->flags & IR3_REG_SHARED) && !(src->flags & IR3_REG_SHARED)))
755
return false;
756
else
757
return true;
758
}
759
760
/* Is it a non-transformative (ie. not type changing) mov? This can
761
* also include absneg.s/absneg.f, which for the most part can be
762
* treated as a mov (single src argument).
763
*/
764
static inline bool
765
is_same_type_mov(struct ir3_instruction *instr)
766
{
767
struct ir3_register *dst;
768
769
switch (instr->opc) {
770
case OPC_MOV:
771
if (instr->cat1.src_type != instr->cat1.dst_type)
772
return false;
773
/* If the type of dest reg and src reg are different,
774
* it shouldn't be considered as same type mov
775
*/
776
if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
777
return false;
778
break;
779
case OPC_ABSNEG_F:
780
case OPC_ABSNEG_S:
781
if (instr->flags & IR3_INSTR_SAT)
782
return false;
783
/* If the type of dest reg and src reg are different,
784
* it shouldn't be considered as same type mov
785
*/
786
if (!is_same_type_reg(instr->dsts[0], instr->srcs[0]))
787
return false;
788
break;
789
default:
790
return false;
791
}
792
793
dst = instr->dsts[0];
794
795
/* mov's that write to a0 or p0.x are special: */
796
if (dst->num == regid(REG_P0, 0))
797
return false;
798
if (reg_num(dst) == REG_A0)
799
return false;
800
801
if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
802
return false;
803
804
return true;
805
}
806
807
/* A move from const, which changes size but not type, can also be
808
* folded into dest instruction in some cases.
809
*/
810
static inline bool
811
is_const_mov(struct ir3_instruction *instr)
812
{
813
if (instr->opc != OPC_MOV)
814
return false;
815
816
if (!(instr->srcs[0]->flags & IR3_REG_CONST))
817
return false;
818
819
type_t src_type = instr->cat1.src_type;
820
type_t dst_type = instr->cat1.dst_type;
821
822
return (type_float(src_type) && type_float(dst_type)) ||
823
(type_uint(src_type) && type_uint(dst_type)) ||
824
(type_sint(src_type) && type_sint(dst_type));
825
}
826
827
static inline bool
828
is_alu(struct ir3_instruction *instr)
829
{
830
return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
831
}
832
833
static inline bool
834
is_sfu(struct ir3_instruction *instr)
835
{
836
return (opc_cat(instr->opc) == 4);
837
}
838
839
static inline bool
840
is_tex(struct ir3_instruction *instr)
841
{
842
return (opc_cat(instr->opc) == 5);
843
}
844
845
static inline bool
846
is_tex_or_prefetch(struct ir3_instruction *instr)
847
{
848
return is_tex(instr) || (instr->opc == OPC_META_TEX_PREFETCH);
849
}
850
851
static inline bool
852
is_mem(struct ir3_instruction *instr)
853
{
854
return (opc_cat(instr->opc) == 6);
855
}
856
857
static inline bool
858
is_barrier(struct ir3_instruction *instr)
859
{
860
return (opc_cat(instr->opc) == 7);
861
}
862
863
static inline bool
864
is_half(struct ir3_instruction *instr)
865
{
866
return !!(instr->dsts[0]->flags & IR3_REG_HALF);
867
}
868
869
static inline bool
870
is_shared(struct ir3_instruction *instr)
871
{
872
return !!(instr->dsts[0]->flags & IR3_REG_SHARED);
873
}
874
875
static inline bool
876
is_store(struct ir3_instruction *instr)
877
{
878
/* these instructions, the "destination" register is
879
* actually a source, the address to store to.
880
*/
881
switch (instr->opc) {
882
case OPC_STG:
883
case OPC_STG_A:
884
case OPC_STGB:
885
case OPC_STIB:
886
case OPC_STP:
887
case OPC_STL:
888
case OPC_STLW:
889
case OPC_L2G:
890
case OPC_G2L:
891
return true;
892
default:
893
return false;
894
}
895
}
896
897
static inline bool
898
is_load(struct ir3_instruction *instr)
899
{
900
switch (instr->opc) {
901
case OPC_LDG:
902
case OPC_LDG_A:
903
case OPC_LDGB:
904
case OPC_LDIB:
905
case OPC_LDL:
906
case OPC_LDP:
907
case OPC_L2G:
908
case OPC_LDLW:
909
case OPC_LDC:
910
case OPC_LDLV:
911
/* probably some others too.. */
912
return true;
913
default:
914
return false;
915
}
916
}
917
918
static inline bool
919
is_input(struct ir3_instruction *instr)
920
{
921
/* in some cases, ldlv is used to fetch varying without
922
* interpolation.. fortunately inloc is the first src
923
* register in either case
924
*/
925
switch (instr->opc) {
926
case OPC_LDLV:
927
case OPC_BARY_F:
928
return true;
929
default:
930
return false;
931
}
932
}
933
934
static inline bool
935
is_bool(struct ir3_instruction *instr)
936
{
937
switch (instr->opc) {
938
case OPC_CMPS_F:
939
case OPC_CMPS_S:
940
case OPC_CMPS_U:
941
return true;
942
default:
943
return false;
944
}
945
}
946
947
static inline opc_t
948
cat3_half_opc(opc_t opc)
949
{
950
switch (opc) {
951
case OPC_MAD_F32:
952
return OPC_MAD_F16;
953
case OPC_SEL_B32:
954
return OPC_SEL_B16;
955
case OPC_SEL_S32:
956
return OPC_SEL_S16;
957
case OPC_SEL_F32:
958
return OPC_SEL_F16;
959
case OPC_SAD_S32:
960
return OPC_SAD_S16;
961
default:
962
return opc;
963
}
964
}
965
966
static inline opc_t
967
cat3_full_opc(opc_t opc)
968
{
969
switch (opc) {
970
case OPC_MAD_F16:
971
return OPC_MAD_F32;
972
case OPC_SEL_B16:
973
return OPC_SEL_B32;
974
case OPC_SEL_S16:
975
return OPC_SEL_S32;
976
case OPC_SEL_F16:
977
return OPC_SEL_F32;
978
case OPC_SAD_S16:
979
return OPC_SAD_S32;
980
default:
981
return opc;
982
}
983
}
984
985
static inline opc_t
986
cat4_half_opc(opc_t opc)
987
{
988
switch (opc) {
989
case OPC_RSQ:
990
return OPC_HRSQ;
991
case OPC_LOG2:
992
return OPC_HLOG2;
993
case OPC_EXP2:
994
return OPC_HEXP2;
995
default:
996
return opc;
997
}
998
}
999
1000
static inline opc_t
1001
cat4_full_opc(opc_t opc)
1002
{
1003
switch (opc) {
1004
case OPC_HRSQ:
1005
return OPC_RSQ;
1006
case OPC_HLOG2:
1007
return OPC_LOG2;
1008
case OPC_HEXP2:
1009
return OPC_EXP2;
1010
default:
1011
return opc;
1012
}
1013
}
1014
1015
static inline bool
1016
is_meta(struct ir3_instruction *instr)
1017
{
1018
return (opc_cat(instr->opc) == -1);
1019
}
1020
1021
static inline unsigned
1022
reg_elems(const struct ir3_register *reg)
1023
{
1024
if (reg->flags & IR3_REG_ARRAY)
1025
return reg->size;
1026
else
1027
return util_last_bit(reg->wrmask);
1028
}
1029
1030
static inline unsigned
1031
reg_elem_size(const struct ir3_register *reg)
1032
{
1033
return (reg->flags & IR3_REG_HALF) ? 1 : 2;
1034
}
1035
1036
static inline unsigned
1037
reg_size(const struct ir3_register *reg)
1038
{
1039
return reg_elems(reg) * reg_elem_size(reg);
1040
}
1041
1042
static inline unsigned
1043
dest_regs(struct ir3_instruction *instr)
1044
{
1045
if (instr->dsts_count == 0)
1046
return 0;
1047
1048
debug_assert(instr->dsts_count == 1);
1049
return util_last_bit(instr->dsts[0]->wrmask);
1050
}
1051
1052
/* is dst a normal temp register: */
1053
static inline bool
1054
is_dest_gpr(struct ir3_register *dst)
1055
{
1056
if (dst->wrmask == 0)
1057
return false;
1058
if ((reg_num(dst) == REG_A0) || (dst->num == regid(REG_P0, 0)))
1059
return false;
1060
return true;
1061
}
1062
1063
static inline bool
1064
writes_gpr(struct ir3_instruction *instr)
1065
{
1066
if (dest_regs(instr) == 0)
1067
return false;
1068
return is_dest_gpr(instr->dsts[0]);
1069
}
1070
1071
static inline bool
1072
writes_addr0(struct ir3_instruction *instr)
1073
{
1074
/* Note: only the first dest can write to a0.x */
1075
if (instr->dsts_count > 0) {
1076
struct ir3_register *dst = instr->dsts[0];
1077
return dst->num == regid(REG_A0, 0);
1078
}
1079
return false;
1080
}
1081
1082
static inline bool
1083
writes_addr1(struct ir3_instruction *instr)
1084
{
1085
/* Note: only the first dest can write to a1.x */
1086
if (instr->dsts_count > 0) {
1087
struct ir3_register *dst = instr->dsts[0];
1088
return dst->num == regid(REG_A0, 1);
1089
}
1090
return false;
1091
}
1092
1093
static inline bool
1094
writes_pred(struct ir3_instruction *instr)
1095
{
1096
/* Note: only the first dest can write to p0.x */
1097
if (instr->dsts_count > 0) {
1098
struct ir3_register *dst = instr->dsts[0];
1099
return reg_num(dst) == REG_P0;
1100
}
1101
return false;
1102
}
1103
1104
/* Is it something other than a normal register. Shared regs, p0, and a0/a1
1105
* are considered special here. Special registers are always accessed with one
1106
* size and never alias normal registers, even though a naive calculation
1107
* would sometimes make it seem like e.g. r30.z aliases a0.x.
1108
*/
1109
static inline bool
1110
is_reg_special(const struct ir3_register *reg)
1111
{
1112
return (reg->flags & IR3_REG_SHARED) || (reg_num(reg) == REG_A0) ||
1113
(reg_num(reg) == REG_P0);
1114
}
1115
1116
/* returns defining instruction for reg */
1117
/* TODO better name */
1118
static inline struct ir3_instruction *
1119
ssa(struct ir3_register *reg)
1120
{
1121
if ((reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) && reg->def)
1122
return reg->def->instr;
1123
return NULL;
1124
}
1125
1126
static inline bool
1127
conflicts(struct ir3_register *a, struct ir3_register *b)
1128
{
1129
return (a && b) && (a->def != b->def);
1130
}
1131
1132
static inline bool
1133
reg_gpr(struct ir3_register *r)
1134
{
1135
if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
1136
return false;
1137
if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
1138
return false;
1139
return true;
1140
}
1141
1142
static inline type_t
1143
half_type(type_t type)
1144
{
1145
switch (type) {
1146
case TYPE_F32:
1147
return TYPE_F16;
1148
case TYPE_U32:
1149
return TYPE_U16;
1150
case TYPE_S32:
1151
return TYPE_S16;
1152
case TYPE_F16:
1153
case TYPE_U16:
1154
case TYPE_S16:
1155
return type;
1156
default:
1157
assert(0);
1158
return ~0;
1159
}
1160
}
1161
1162
static inline type_t
1163
full_type(type_t type)
1164
{
1165
switch (type) {
1166
case TYPE_F16:
1167
return TYPE_F32;
1168
case TYPE_U16:
1169
return TYPE_U32;
1170
case TYPE_S16:
1171
return TYPE_S32;
1172
case TYPE_F32:
1173
case TYPE_U32:
1174
case TYPE_S32:
1175
return type;
1176
default:
1177
assert(0);
1178
return ~0;
1179
}
1180
}
1181
1182
/* some cat2 instructions (ie. those which are not float) can embed an
1183
* immediate:
1184
*/
1185
static inline bool
1186
ir3_cat2_int(opc_t opc)
1187
{
1188
switch (opc) {
1189
case OPC_ADD_U:
1190
case OPC_ADD_S:
1191
case OPC_SUB_U:
1192
case OPC_SUB_S:
1193
case OPC_CMPS_U:
1194
case OPC_CMPS_S:
1195
case OPC_MIN_U:
1196
case OPC_MIN_S:
1197
case OPC_MAX_U:
1198
case OPC_MAX_S:
1199
case OPC_CMPV_U:
1200
case OPC_CMPV_S:
1201
case OPC_MUL_U24:
1202
case OPC_MUL_S24:
1203
case OPC_MULL_U:
1204
case OPC_CLZ_S:
1205
case OPC_ABSNEG_S:
1206
case OPC_AND_B:
1207
case OPC_OR_B:
1208
case OPC_NOT_B:
1209
case OPC_XOR_B:
1210
case OPC_BFREV_B:
1211
case OPC_CLZ_B:
1212
case OPC_SHL_B:
1213
case OPC_SHR_B:
1214
case OPC_ASHR_B:
1215
case OPC_MGEN_B:
1216
case OPC_GETBIT_B:
1217
case OPC_CBITS_B:
1218
case OPC_BARY_F:
1219
return true;
1220
1221
default:
1222
return false;
1223
}
1224
}
1225
1226
/* map cat2 instruction to valid abs/neg flags: */
1227
static inline unsigned
1228
ir3_cat2_absneg(opc_t opc)
1229
{
1230
switch (opc) {
1231
case OPC_ADD_F:
1232
case OPC_MIN_F:
1233
case OPC_MAX_F:
1234
case OPC_MUL_F:
1235
case OPC_SIGN_F:
1236
case OPC_CMPS_F:
1237
case OPC_ABSNEG_F:
1238
case OPC_CMPV_F:
1239
case OPC_FLOOR_F:
1240
case OPC_CEIL_F:
1241
case OPC_RNDNE_F:
1242
case OPC_RNDAZ_F:
1243
case OPC_TRUNC_F:
1244
case OPC_BARY_F:
1245
return IR3_REG_FABS | IR3_REG_FNEG;
1246
1247
case OPC_ADD_U:
1248
case OPC_ADD_S:
1249
case OPC_SUB_U:
1250
case OPC_SUB_S:
1251
case OPC_CMPS_U:
1252
case OPC_CMPS_S:
1253
case OPC_MIN_U:
1254
case OPC_MIN_S:
1255
case OPC_MAX_U:
1256
case OPC_MAX_S:
1257
case OPC_CMPV_U:
1258
case OPC_CMPV_S:
1259
case OPC_MUL_U24:
1260
case OPC_MUL_S24:
1261
case OPC_MULL_U:
1262
case OPC_CLZ_S:
1263
return 0;
1264
1265
case OPC_ABSNEG_S:
1266
return IR3_REG_SABS | IR3_REG_SNEG;
1267
1268
case OPC_AND_B:
1269
case OPC_OR_B:
1270
case OPC_NOT_B:
1271
case OPC_XOR_B:
1272
case OPC_BFREV_B:
1273
case OPC_CLZ_B:
1274
case OPC_SHL_B:
1275
case OPC_SHR_B:
1276
case OPC_ASHR_B:
1277
case OPC_MGEN_B:
1278
case OPC_GETBIT_B:
1279
case OPC_CBITS_B:
1280
return IR3_REG_BNOT;
1281
1282
default:
1283
return 0;
1284
}
1285
}
1286
1287
/* map cat3 instructions to valid abs/neg flags: */
1288
static inline unsigned
1289
ir3_cat3_absneg(opc_t opc)
1290
{
1291
switch (opc) {
1292
case OPC_MAD_F16:
1293
case OPC_MAD_F32:
1294
case OPC_SEL_F16:
1295
case OPC_SEL_F32:
1296
return IR3_REG_FNEG;
1297
1298
case OPC_MAD_U16:
1299
case OPC_MADSH_U16:
1300
case OPC_MAD_S16:
1301
case OPC_MADSH_M16:
1302
case OPC_MAD_U24:
1303
case OPC_MAD_S24:
1304
case OPC_SEL_S16:
1305
case OPC_SEL_S32:
1306
case OPC_SAD_S16:
1307
case OPC_SAD_S32:
1308
/* neg *may* work on 3rd src.. */
1309
1310
case OPC_SEL_B16:
1311
case OPC_SEL_B32:
1312
1313
case OPC_SHLG_B16:
1314
1315
default:
1316
return 0;
1317
}
1318
}
1319
1320
/* Return the type (float, int, or uint) the op uses when converting from the
1321
* internal result of the op (which is assumed to be the same size as the
1322
* sources) to the destination when they are not the same size. If F32 it does
1323
* a floating-point conversion, if U32 it does a truncation/zero-extension, if
1324
* S32 it does a truncation/sign-extension. "can_fold" will be false if it
1325
* doesn't do anything sensible or is unknown.
1326
*/
1327
static inline type_t
1328
ir3_output_conv_type(struct ir3_instruction *instr, bool *can_fold)
1329
{
1330
*can_fold = true;
1331
switch (instr->opc) {
1332
case OPC_ADD_F:
1333
case OPC_MUL_F:
1334
case OPC_BARY_F:
1335
case OPC_MAD_F32:
1336
case OPC_MAD_F16:
1337
return TYPE_F32;
1338
1339
case OPC_ADD_U:
1340
case OPC_SUB_U:
1341
case OPC_MIN_U:
1342
case OPC_MAX_U:
1343
case OPC_AND_B:
1344
case OPC_OR_B:
1345
case OPC_NOT_B:
1346
case OPC_XOR_B:
1347
case OPC_MUL_U24:
1348
case OPC_MULL_U:
1349
case OPC_SHL_B:
1350
case OPC_SHR_B:
1351
case OPC_ASHR_B:
1352
case OPC_MAD_U24:
1353
/* Comparison ops zero-extend/truncate their results, so consider them as
1354
* unsigned here.
1355
*/
1356
case OPC_CMPS_F:
1357
case OPC_CMPV_F:
1358
case OPC_CMPS_U:
1359
case OPC_CMPS_S:
1360
return TYPE_U32;
1361
1362
case OPC_ADD_S:
1363
case OPC_SUB_S:
1364
case OPC_MIN_S:
1365
case OPC_MAX_S:
1366
case OPC_ABSNEG_S:
1367
case OPC_MUL_S24:
1368
case OPC_MAD_S24:
1369
return TYPE_S32;
1370
1371
/* We assume that any move->move folding that could be done was done by
1372
* NIR.
1373
*/
1374
case OPC_MOV:
1375
default:
1376
*can_fold = false;
1377
return TYPE_U32;
1378
}
1379
}
1380
1381
/* Return the src and dst types for the conversion which is already folded
1382
* into the op. We can assume that instr has folded in a conversion from
1383
* ir3_output_conv_src_type() to ir3_output_conv_dst_type(). Only makes sense
1384
* to call if ir3_output_conv_type() returns can_fold = true.
1385
*/
1386
static inline type_t
1387
ir3_output_conv_src_type(struct ir3_instruction *instr, type_t base_type)
1388
{
1389
switch (instr->opc) {
1390
case OPC_CMPS_F:
1391
case OPC_CMPV_F:
1392
case OPC_CMPS_U:
1393
case OPC_CMPS_S:
1394
/* Comparisons only return 0/1 and the size of the comparison sources
1395
* is irrelevant, never consider them as having an output conversion
1396
* by returning a type with the dest size here:
1397
*/
1398
return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
1399
: full_type(base_type);
1400
1401
case OPC_BARY_F:
1402
/* bary.f doesn't have an explicit source, but we can assume here that
1403
* the varying data it reads is in fp32.
1404
*
1405
* This may be fp16 on older gen's depending on some register
1406
* settings, but it's probably not worth plumbing that through for a
1407
* small improvement that NIR would hopefully handle for us anyway.
1408
*/
1409
return TYPE_F32;
1410
1411
default:
1412
return (instr->dsts[1]->flags & IR3_REG_HALF) ? half_type(base_type)
1413
: full_type(base_type);
1414
}
1415
}
1416
1417
static inline type_t
1418
ir3_output_conv_dst_type(struct ir3_instruction *instr, type_t base_type)
1419
{
1420
return (instr->dsts[0]->flags & IR3_REG_HALF) ? half_type(base_type)
1421
: full_type(base_type);
1422
}
1423
1424
/* Some instructions have signed/unsigned variants which are identical except
1425
* for whether the folded conversion sign-extends or zero-extends, and we can
1426
* fold in a mismatching move by rewriting the opcode. Return the opcode to
1427
* switch signedness, and whether one exists.
1428
*/
1429
static inline opc_t
1430
ir3_try_swap_signedness(opc_t opc, bool *can_swap)
1431
{
1432
switch (opc) {
1433
#define PAIR(u, s) \
1434
case OPC_##u: \
1435
return OPC_##s; \
1436
case OPC_##s: \
1437
return OPC_##u;
1438
PAIR(ADD_U, ADD_S)
1439
PAIR(SUB_U, SUB_S)
1440
/* Note: these are only identical when the sources are half, but that's
1441
* the only case we call this function for anyway.
1442
*/
1443
PAIR(MUL_U24, MUL_S24)
1444
1445
default:
1446
*can_swap = false;
1447
return opc;
1448
}
1449
}
1450
1451
#define MASK(n) ((1 << (n)) - 1)
1452
1453
/* iterator for an instructions's sources (reg), also returns src #: */
1454
#define foreach_src_n(__srcreg, __n, __instr) \
1455
if ((__instr)->srcs_count) \
1456
for (struct ir3_register *__srcreg = (void *)~0; __srcreg; \
1457
__srcreg = NULL) \
1458
for (unsigned __cnt = (__instr)->srcs_count, __n = 0; __n < __cnt; \
1459
__n++) \
1460
if ((__srcreg = (__instr)->srcs[__n]))
1461
1462
/* iterator for an instructions's sources (reg): */
1463
#define foreach_src(__srcreg, __instr) foreach_src_n (__srcreg, __i, __instr)
1464
1465
/* iterator for an instructions's destinations (reg), also returns dst #: */
1466
#define foreach_dst_n(__dstreg, __n, __instr) \
1467
if ((__instr)->dsts_count) \
1468
for (struct ir3_register *__dstreg = (void *)~0; __dstreg; \
1469
__dstreg = NULL) \
1470
for (unsigned __cnt = (__instr)->dsts_count, __n = 0; __n < __cnt; \
1471
__n++) \
1472
if ((__dstreg = (__instr)->dsts[__n]))
1473
1474
/* iterator for an instructions's destinations (reg): */
1475
#define foreach_dst(__dstreg, __instr) foreach_dst_n (__dstreg, __i, __instr)
1476
1477
static inline unsigned
1478
__ssa_src_cnt(struct ir3_instruction *instr)
1479
{
1480
return instr->srcs_count + instr->deps_count;
1481
}
1482
1483
static inline bool
1484
__is_false_dep(struct ir3_instruction *instr, unsigned n)
1485
{
1486
if (n >= instr->srcs_count)
1487
return true;
1488
return false;
1489
}
1490
1491
static inline struct ir3_instruction **
1492
__ssa_srcp_n(struct ir3_instruction *instr, unsigned n)
1493
{
1494
if (__is_false_dep(instr, n))
1495
return &instr->deps[n - instr->srcs_count];
1496
if (ssa(instr->srcs[n]))
1497
return &instr->srcs[n]->def->instr;
1498
return NULL;
1499
}
1500
1501
#define foreach_ssa_srcp_n(__srcp, __n, __instr) \
1502
for (struct ir3_instruction **__srcp = (void *)~0; __srcp; __srcp = NULL) \
1503
for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; \
1504
__n++) \
1505
if ((__srcp = __ssa_srcp_n(__instr, __n)))
1506
1507
#define foreach_ssa_srcp(__srcp, __instr) \
1508
foreach_ssa_srcp_n (__srcp, __i, __instr)
1509
1510
/* iterator for an instruction's SSA sources (instr), also returns src #: */
1511
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
1512
for (struct ir3_instruction *__srcinst = (void *)~0; __srcinst; \
1513
__srcinst = NULL) \
1514
foreach_ssa_srcp_n (__srcp, __n, __instr) \
1515
if ((__srcinst = *__srcp))
1516
1517
/* iterator for an instruction's SSA sources (instr): */
1518
#define foreach_ssa_src(__srcinst, __instr) \
1519
foreach_ssa_src_n (__srcinst, __i, __instr)
1520
1521
/* iterators for shader inputs: */
1522
#define foreach_input_n(__ininstr, __cnt, __ir) \
1523
for (struct ir3_instruction *__ininstr = (void *)~0; __ininstr; \
1524
__ininstr = NULL) \
1525
for (unsigned __cnt = 0; __cnt < (__ir)->inputs_count; __cnt++) \
1526
if ((__ininstr = (__ir)->inputs[__cnt]))
1527
#define foreach_input(__ininstr, __ir) foreach_input_n (__ininstr, __i, __ir)
1528
1529
/* iterators for instructions: */
1530
#define foreach_instr(__instr, __list) \
1531
list_for_each_entry (struct ir3_instruction, __instr, __list, node)
1532
#define foreach_instr_rev(__instr, __list) \
1533
list_for_each_entry_rev (struct ir3_instruction, __instr, __list, node)
1534
#define foreach_instr_safe(__instr, __list) \
1535
list_for_each_entry_safe (struct ir3_instruction, __instr, __list, node)
1536
#define foreach_instr_from_safe(__instr, __start, __list) \
1537
list_for_each_entry_from_safe(struct ir3_instruction, __instr, __start, \
1538
__list, node)
1539
1540
/* iterators for blocks: */
1541
#define foreach_block(__block, __list) \
1542
list_for_each_entry (struct ir3_block, __block, __list, node)
1543
#define foreach_block_safe(__block, __list) \
1544
list_for_each_entry_safe (struct ir3_block, __block, __list, node)
1545
#define foreach_block_rev(__block, __list) \
1546
list_for_each_entry_rev (struct ir3_block, __block, __list, node)
1547
1548
/* iterators for arrays: */
1549
#define foreach_array(__array, __list) \
1550
list_for_each_entry (struct ir3_array, __array, __list, node)
1551
#define foreach_array_safe(__array, __list) \
1552
list_for_each_entry_safe (struct ir3_array, __array, __list, node)
1553
1554
#define IR3_PASS(ir, pass, ...) \
1555
({ \
1556
bool progress = pass(ir, ##__VA_ARGS__); \
1557
if (progress) { \
1558
ir3_debug_print(ir, "AFTER: " #pass); \
1559
ir3_validate(ir); \
1560
} \
1561
progress; \
1562
})
1563
1564
/* validate: */
1565
void ir3_validate(struct ir3 *ir);
1566
1567
/* dump: */
1568
void ir3_print(struct ir3 *ir);
1569
void ir3_print_instr(struct ir3_instruction *instr);
1570
1571
/* delay calculation: */
1572
int ir3_delayslots(struct ir3_instruction *assigner,
1573
struct ir3_instruction *consumer, unsigned n, bool soft);
1574
unsigned ir3_delay_calc_prera(struct ir3_block *block,
1575
struct ir3_instruction *instr);
1576
unsigned ir3_delay_calc_postra(struct ir3_block *block,
1577
struct ir3_instruction *instr, bool soft,
1578
bool mergedregs);
1579
unsigned ir3_delay_calc_exact(struct ir3_block *block,
1580
struct ir3_instruction *instr, bool mergedregs);
1581
void ir3_remove_nops(struct ir3 *ir);
1582
1583
/* dead code elimination: */
1584
struct ir3_shader_variant;
1585
bool ir3_dce(struct ir3 *ir, struct ir3_shader_variant *so);
1586
1587
/* fp16 conversion folding */
1588
bool ir3_cf(struct ir3 *ir);
1589
1590
/* copy-propagate: */
1591
bool ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
1592
bool ir3_cp_postsched(struct ir3 *ir);
1593
1594
/* common subexpression elimination: */
1595
bool ir3_cse(struct ir3 *ir);
1596
1597
/* Make arrays SSA */
1598
bool ir3_array_to_ssa(struct ir3 *ir);
1599
1600
/* scheduling: */
1601
bool ir3_sched_add_deps(struct ir3 *ir);
1602
int ir3_sched(struct ir3 *ir);
1603
1604
struct ir3_context;
1605
bool ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v);
1606
1607
/* register assignment: */
1608
int ir3_ra(struct ir3_shader_variant *v);
1609
1610
/* lower subgroup ops: */
1611
bool ir3_lower_subgroups(struct ir3 *ir);
1612
1613
/* legalize: */
1614
bool ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary);
1615
1616
static inline bool
1617
ir3_has_latency_to_hide(struct ir3 *ir)
1618
{
1619
/* VS/GS/TCS/TESS co-exist with frag shader invocations, but we don't
1620
* know the nature of the fragment shader. Just assume it will have
1621
* latency to hide:
1622
*/
1623
if (ir->type != MESA_SHADER_FRAGMENT)
1624
return true;
1625
1626
foreach_block (block, &ir->block_list) {
1627
foreach_instr (instr, &block->instr_list) {
1628
if (is_tex_or_prefetch(instr))
1629
return true;
1630
1631
if (is_load(instr)) {
1632
switch (instr->opc) {
1633
case OPC_LDLV:
1634
case OPC_LDL:
1635
case OPC_LDLW:
1636
break;
1637
default:
1638
return true;
1639
}
1640
}
1641
}
1642
}
1643
1644
return false;
1645
}
1646
1647
/* ************************************************************************* */
1648
/* instruction helpers */
1649
1650
/* creates SSA src of correct type (ie. half vs full precision) */
1651
static inline struct ir3_register *
1652
__ssa_src(struct ir3_instruction *instr, struct ir3_instruction *src,
1653
unsigned flags)
1654
{
1655
struct ir3_register *reg;
1656
if (src->dsts[0]->flags & IR3_REG_HALF)
1657
flags |= IR3_REG_HALF;
1658
reg = ir3_src_create(instr, INVALID_REG, IR3_REG_SSA | flags);
1659
reg->def = src->dsts[0];
1660
reg->wrmask = src->dsts[0]->wrmask;
1661
return reg;
1662
}
1663
1664
static inline struct ir3_register *
1665
__ssa_dst(struct ir3_instruction *instr)
1666
{
1667
struct ir3_register *reg = ir3_dst_create(instr, INVALID_REG, IR3_REG_SSA);
1668
reg->instr = instr;
1669
return reg;
1670
}
1671
1672
static inline struct ir3_instruction *
1673
create_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
1674
{
1675
struct ir3_instruction *mov;
1676
unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1677
1678
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1679
mov->cat1.src_type = type;
1680
mov->cat1.dst_type = type;
1681
__ssa_dst(mov)->flags |= flags;
1682
ir3_src_create(mov, 0, IR3_REG_IMMED | flags)->uim_val = val;
1683
1684
return mov;
1685
}
1686
1687
static inline struct ir3_instruction *
1688
create_immed(struct ir3_block *block, uint32_t val)
1689
{
1690
return create_immed_typed(block, val, TYPE_U32);
1691
}
1692
1693
static inline struct ir3_instruction *
1694
create_uniform_typed(struct ir3_block *block, unsigned n, type_t type)
1695
{
1696
struct ir3_instruction *mov;
1697
unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1698
1699
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1700
mov->cat1.src_type = type;
1701
mov->cat1.dst_type = type;
1702
__ssa_dst(mov)->flags |= flags;
1703
ir3_src_create(mov, n, IR3_REG_CONST | flags);
1704
1705
return mov;
1706
}
1707
1708
static inline struct ir3_instruction *
1709
create_uniform(struct ir3_block *block, unsigned n)
1710
{
1711
return create_uniform_typed(block, n, TYPE_F32);
1712
}
1713
1714
static inline struct ir3_instruction *
1715
create_uniform_indirect(struct ir3_block *block, int n, type_t type,
1716
struct ir3_instruction *address)
1717
{
1718
struct ir3_instruction *mov;
1719
1720
mov = ir3_instr_create(block, OPC_MOV, 1, 1);
1721
mov->cat1.src_type = type;
1722
mov->cat1.dst_type = type;
1723
__ssa_dst(mov);
1724
ir3_src_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
1725
1726
ir3_instr_set_address(mov, address);
1727
1728
return mov;
1729
}
1730
1731
static inline struct ir3_instruction *
1732
ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
1733
{
1734
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
1735
unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
1736
1737
__ssa_dst(instr)->flags |= flags;
1738
if (src->dsts[0]->flags & IR3_REG_ARRAY) {
1739
struct ir3_register *src_reg = __ssa_src(instr, src, IR3_REG_ARRAY);
1740
src_reg->array = src->dsts[0]->array;
1741
} else {
1742
__ssa_src(instr, src, src->dsts[0]->flags & IR3_REG_SHARED);
1743
}
1744
debug_assert(!(src->dsts[0]->flags & IR3_REG_RELATIV));
1745
instr->cat1.src_type = type;
1746
instr->cat1.dst_type = type;
1747
return instr;
1748
}
1749
1750
static inline struct ir3_instruction *
1751
ir3_COV(struct ir3_block *block, struct ir3_instruction *src, type_t src_type,
1752
type_t dst_type)
1753
{
1754
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV, 1, 1);
1755
unsigned dst_flags = (type_size(dst_type) < 32) ? IR3_REG_HALF : 0;
1756
unsigned src_flags = (type_size(src_type) < 32) ? IR3_REG_HALF : 0;
1757
1758
debug_assert((src->dsts[0]->flags & IR3_REG_HALF) == src_flags);
1759
1760
__ssa_dst(instr)->flags |= dst_flags;
1761
__ssa_src(instr, src, 0);
1762
instr->cat1.src_type = src_type;
1763
instr->cat1.dst_type = dst_type;
1764
debug_assert(!(src->dsts[0]->flags & IR3_REG_ARRAY));
1765
return instr;
1766
}
1767
1768
static inline struct ir3_instruction *
1769
ir3_MOVMSK(struct ir3_block *block, unsigned components)
1770
{
1771
struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOVMSK, 1, 0);
1772
1773
struct ir3_register *dst = __ssa_dst(instr);
1774
dst->flags |= IR3_REG_SHARED;
1775
dst->wrmask = (1 << components) - 1;
1776
instr->repeat = components - 1;
1777
return instr;
1778
}
1779
1780
static inline struct ir3_instruction *
1781
ir3_BALLOT_MACRO(struct ir3_block *block, struct ir3_instruction *src,
1782
unsigned components)
1783
{
1784
struct ir3_instruction *instr =
1785
ir3_instr_create(block, OPC_BALLOT_MACRO, 1, 1);
1786
1787
struct ir3_register *dst = __ssa_dst(instr);
1788
dst->flags |= IR3_REG_SHARED;
1789
dst->wrmask = (1 << components) - 1;
1790
1791
__ssa_src(instr, src, 0);
1792
1793
return instr;
1794
}
1795
1796
static inline struct ir3_instruction *
1797
ir3_NOP(struct ir3_block *block)
1798
{
1799
return ir3_instr_create(block, OPC_NOP, 0, 0);
1800
}
1801
1802
#define IR3_INSTR_0 0
1803
1804
/* clang-format off */
1805
#define __INSTR0(flag, name, opc) \
1806
static inline struct ir3_instruction *ir3_##name(struct ir3_block *block) \
1807
{ \
1808
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 0); \
1809
instr->flags |= flag; \
1810
return instr; \
1811
}
1812
/* clang-format on */
1813
#define INSTR0F(f, name) __INSTR0(IR3_INSTR_##f, name##_##f, OPC_##name)
1814
#define INSTR0(name) __INSTR0(0, name, OPC_##name)
1815
1816
/* clang-format off */
1817
#define __INSTR1(flag, dst_count, name, opc) \
1818
static inline struct ir3_instruction *ir3_##name( \
1819
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags) \
1820
{ \
1821
struct ir3_instruction *instr = \
1822
ir3_instr_create(block, opc, dst_count, 1); \
1823
for (unsigned i = 0; i < dst_count; i++) \
1824
__ssa_dst(instr); \
1825
__ssa_src(instr, a, aflags); \
1826
instr->flags |= flag; \
1827
return instr; \
1828
}
1829
/* clang-format on */
1830
#define INSTR1F(f, name) __INSTR1(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
1831
#define INSTR1(name) __INSTR1(0, 1, name, OPC_##name)
1832
#define INSTR1NODST(name) __INSTR1(0, 0, name, OPC_##name)
1833
1834
/* clang-format off */
1835
#define __INSTR2(flag, name, opc) \
1836
static inline struct ir3_instruction *ir3_##name( \
1837
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
1838
struct ir3_instruction *b, unsigned bflags) \
1839
{ \
1840
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 2); \
1841
__ssa_dst(instr); \
1842
__ssa_src(instr, a, aflags); \
1843
__ssa_src(instr, b, bflags); \
1844
instr->flags |= flag; \
1845
return instr; \
1846
}
1847
/* clang-format on */
1848
#define INSTR2F(f, name) __INSTR2(IR3_INSTR_##f, name##_##f, OPC_##name)
1849
#define INSTR2(name) __INSTR2(0, name, OPC_##name)
1850
1851
/* clang-format off */
1852
#define __INSTR3(flag, dst_count, name, opc) \
1853
static inline struct ir3_instruction *ir3_##name( \
1854
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
1855
struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
1856
unsigned cflags) \
1857
{ \
1858
struct ir3_instruction *instr = \
1859
ir3_instr_create(block, opc, dst_count, 3); \
1860
for (unsigned i = 0; i < dst_count; i++) \
1861
__ssa_dst(instr); \
1862
__ssa_src(instr, a, aflags); \
1863
__ssa_src(instr, b, bflags); \
1864
__ssa_src(instr, c, cflags); \
1865
instr->flags |= flag; \
1866
return instr; \
1867
}
1868
/* clang-format on */
1869
#define INSTR3F(f, name) __INSTR3(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
1870
#define INSTR3(name) __INSTR3(0, 1, name, OPC_##name)
1871
#define INSTR3NODST(name) __INSTR3(0, 0, name, OPC_##name)
1872
1873
/* clang-format off */
1874
#define __INSTR4(flag, dst_count, name, opc) \
1875
static inline struct ir3_instruction *ir3_##name( \
1876
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
1877
struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
1878
unsigned cflags, struct ir3_instruction *d, unsigned dflags) \
1879
{ \
1880
struct ir3_instruction *instr = \
1881
ir3_instr_create(block, opc, dst_count, 4); \
1882
for (unsigned i = 0; i < dst_count; i++) \
1883
__ssa_dst(instr); \
1884
__ssa_src(instr, a, aflags); \
1885
__ssa_src(instr, b, bflags); \
1886
__ssa_src(instr, c, cflags); \
1887
__ssa_src(instr, d, dflags); \
1888
instr->flags |= flag; \
1889
return instr; \
1890
}
1891
/* clang-format on */
1892
#define INSTR4F(f, name) __INSTR4(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
1893
#define INSTR4(name) __INSTR4(0, 1, name, OPC_##name)
1894
#define INSTR4NODST(name) __INSTR4(0, 0, name, OPC_##name)
1895
1896
/* clang-format off */
1897
#define __INSTR5(flag, name, opc) \
1898
static inline struct ir3_instruction *ir3_##name( \
1899
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
1900
struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
1901
unsigned cflags, struct ir3_instruction *d, unsigned dflags, \
1902
struct ir3_instruction *e, unsigned eflags) \
1903
{ \
1904
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 5); \
1905
__ssa_dst(instr); \
1906
__ssa_src(instr, a, aflags); \
1907
__ssa_src(instr, b, bflags); \
1908
__ssa_src(instr, c, cflags); \
1909
__ssa_src(instr, d, dflags); \
1910
__ssa_src(instr, e, eflags); \
1911
instr->flags |= flag; \
1912
return instr; \
1913
}
1914
/* clang-format on */
1915
#define INSTR5F(f, name) __INSTR5(IR3_INSTR_##f, name##_##f, OPC_##name)
1916
#define INSTR5(name) __INSTR5(0, name, OPC_##name)
1917
1918
/* clang-format off */
1919
#define __INSTR6(flag, dst_count, name, opc) \
1920
static inline struct ir3_instruction *ir3_##name( \
1921
struct ir3_block *block, struct ir3_instruction *a, unsigned aflags, \
1922
struct ir3_instruction *b, unsigned bflags, struct ir3_instruction *c, \
1923
unsigned cflags, struct ir3_instruction *d, unsigned dflags, \
1924
struct ir3_instruction *e, unsigned eflags, struct ir3_instruction *f, \
1925
unsigned fflags) \
1926
{ \
1927
struct ir3_instruction *instr = ir3_instr_create(block, opc, 1, 6); \
1928
for (unsigned i = 0; i < dst_count; i++) \
1929
__ssa_dst(instr); \
1930
__ssa_src(instr, a, aflags); \
1931
__ssa_src(instr, b, bflags); \
1932
__ssa_src(instr, c, cflags); \
1933
__ssa_src(instr, d, dflags); \
1934
__ssa_src(instr, e, eflags); \
1935
__ssa_src(instr, f, fflags); \
1936
instr->flags |= flag; \
1937
return instr; \
1938
}
1939
/* clang-format on */
1940
#define INSTR6F(f, name) __INSTR6(IR3_INSTR_##f, 1, name##_##f, OPC_##name)
1941
#define INSTR6(name) __INSTR6(0, 1, name, OPC_##name)
1942
#define INSTR6NODST(name) __INSTR6(0, 0, name, OPC_##name)
1943
1944
/* cat0 instructions: */
1945
INSTR1NODST(B)
1946
INSTR0(JUMP)
1947
INSTR1NODST(KILL)
1948
INSTR1NODST(DEMOTE)
1949
INSTR0(END)
1950
INSTR0(CHSH)
1951
INSTR0(CHMASK)
1952
INSTR1NODST(PREDT)
1953
INSTR0(PREDF)
1954
INSTR0(PREDE)
1955
INSTR0(GETONE)
1956
1957
/* cat1 macros */
1958
INSTR1(ANY_MACRO)
1959
INSTR1(ALL_MACRO)
1960
INSTR1(READ_FIRST_MACRO)
1961
INSTR2(READ_COND_MACRO)
1962
1963
static inline struct ir3_instruction *
1964
ir3_ELECT_MACRO(struct ir3_block *block)
1965
{
1966
struct ir3_instruction *instr =
1967
ir3_instr_create(block, OPC_ELECT_MACRO, 1, 0);
1968
__ssa_dst(instr);
1969
return instr;
1970
}
1971
1972
/* cat2 instructions, most 2 src but some 1 src: */
1973
INSTR2(ADD_F)
1974
INSTR2(MIN_F)
1975
INSTR2(MAX_F)
1976
INSTR2(MUL_F)
1977
INSTR1(SIGN_F)
1978
INSTR2(CMPS_F)
1979
INSTR1(ABSNEG_F)
1980
INSTR2(CMPV_F)
1981
INSTR1(FLOOR_F)
1982
INSTR1(CEIL_F)
1983
INSTR1(RNDNE_F)
1984
INSTR1(RNDAZ_F)
1985
INSTR1(TRUNC_F)
1986
INSTR2(ADD_U)
1987
INSTR2(ADD_S)
1988
INSTR2(SUB_U)
1989
INSTR2(SUB_S)
1990
INSTR2(CMPS_U)
1991
INSTR2(CMPS_S)
1992
INSTR2(MIN_U)
1993
INSTR2(MIN_S)
1994
INSTR2(MAX_U)
1995
INSTR2(MAX_S)
1996
INSTR1(ABSNEG_S)
1997
INSTR2(AND_B)
1998
INSTR2(OR_B)
1999
INSTR1(NOT_B)
2000
INSTR2(XOR_B)
2001
INSTR2(CMPV_U)
2002
INSTR2(CMPV_S)
2003
INSTR2(MUL_U24)
2004
INSTR2(MUL_S24)
2005
INSTR2(MULL_U)
2006
INSTR1(BFREV_B)
2007
INSTR1(CLZ_S)
2008
INSTR1(CLZ_B)
2009
INSTR2(SHL_B)
2010
INSTR2(SHR_B)
2011
INSTR2(ASHR_B)
2012
INSTR2(BARY_F)
2013
INSTR2(MGEN_B)
2014
INSTR2(GETBIT_B)
2015
INSTR1(SETRM)
2016
INSTR1(CBITS_B)
2017
INSTR2(SHB)
2018
INSTR2(MSAD)
2019
2020
/* cat3 instructions: */
2021
INSTR3(MAD_U16)
2022
INSTR3(MADSH_U16)
2023
INSTR3(MAD_S16)
2024
INSTR3(MADSH_M16)
2025
INSTR3(MAD_U24)
2026
INSTR3(MAD_S24)
2027
INSTR3(MAD_F16)
2028
INSTR3(MAD_F32)
2029
/* NOTE: SEL_B32 checks for zero vs nonzero */
2030
INSTR3(SEL_B16)
2031
INSTR3(SEL_B32)
2032
INSTR3(SEL_S16)
2033
INSTR3(SEL_S32)
2034
INSTR3(SEL_F16)
2035
INSTR3(SEL_F32)
2036
INSTR3(SAD_S16)
2037
INSTR3(SAD_S32)
2038
2039
/* cat4 instructions: */
2040
INSTR1(RCP)
2041
INSTR1(RSQ)
2042
INSTR1(HRSQ)
2043
INSTR1(LOG2)
2044
INSTR1(HLOG2)
2045
INSTR1(EXP2)
2046
INSTR1(HEXP2)
2047
INSTR1(SIN)
2048
INSTR1(COS)
2049
INSTR1(SQRT)
2050
2051
/* cat5 instructions: */
2052
INSTR1(DSX)
2053
INSTR1(DSXPP_MACRO)
2054
INSTR1(DSY)
2055
INSTR1(DSYPP_MACRO)
2056
INSTR1F(3D, DSX)
2057
INSTR1F(3D, DSY)
2058
INSTR1(RGETPOS)
2059
2060
static inline struct ir3_instruction *
2061
ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
2062
unsigned flags, struct ir3_instruction *samp_tex,
2063
struct ir3_instruction *src0, struct ir3_instruction *src1)
2064
{
2065
struct ir3_instruction *sam;
2066
unsigned nreg = 0;
2067
2068
if (flags & IR3_INSTR_S2EN) {
2069
nreg++;
2070
}
2071
if (src0) {
2072
nreg++;
2073
}
2074
if (src1) {
2075
nreg++;
2076
}
2077
2078
sam = ir3_instr_create(block, opc, 1, nreg);
2079
sam->flags |= flags;
2080
__ssa_dst(sam)->wrmask = wrmask;
2081
if (flags & IR3_INSTR_S2EN) {
2082
__ssa_src(sam, samp_tex, (flags & IR3_INSTR_B) ? 0 : IR3_REG_HALF);
2083
}
2084
if (src0) {
2085
__ssa_src(sam, src0, 0);
2086
}
2087
if (src1) {
2088
__ssa_src(sam, src1, 0);
2089
}
2090
sam->cat5.type = type;
2091
2092
return sam;
2093
}
2094
2095
/* cat6 instructions: */
2096
INSTR2(LDLV)
2097
INSTR3(LDG)
2098
INSTR3(LDL)
2099
INSTR3(LDLW)
2100
INSTR3(LDP)
2101
INSTR4NODST(STG)
2102
INSTR3NODST(STL)
2103
INSTR3NODST(STLW)
2104
INSTR3NODST(STP)
2105
INSTR1(RESINFO)
2106
INSTR1(RESFMT)
2107
INSTR2(ATOMIC_ADD)
2108
INSTR2(ATOMIC_SUB)
2109
INSTR2(ATOMIC_XCHG)
2110
INSTR2(ATOMIC_INC)
2111
INSTR2(ATOMIC_DEC)
2112
INSTR2(ATOMIC_CMPXCHG)
2113
INSTR2(ATOMIC_MIN)
2114
INSTR2(ATOMIC_MAX)
2115
INSTR2(ATOMIC_AND)
2116
INSTR2(ATOMIC_OR)
2117
INSTR2(ATOMIC_XOR)
2118
INSTR2(LDC)
2119
#if GPU >= 600
2120
INSTR3NODST(STIB);
2121
INSTR2(LDIB);
2122
INSTR5(LDG_A);
2123
INSTR6NODST(STG_A);
2124
INSTR3F(G, ATOMIC_ADD)
2125
INSTR3F(G, ATOMIC_SUB)
2126
INSTR3F(G, ATOMIC_XCHG)
2127
INSTR3F(G, ATOMIC_INC)
2128
INSTR3F(G, ATOMIC_DEC)
2129
INSTR3F(G, ATOMIC_CMPXCHG)
2130
INSTR3F(G, ATOMIC_MIN)
2131
INSTR3F(G, ATOMIC_MAX)
2132
INSTR3F(G, ATOMIC_AND)
2133
INSTR3F(G, ATOMIC_OR)
2134
INSTR3F(G, ATOMIC_XOR)
2135
#elif GPU >= 400
2136
INSTR3(LDGB)
2137
INSTR4NODST(STGB)
2138
INSTR4NODST(STIB)
2139
INSTR4F(G, ATOMIC_ADD)
2140
INSTR4F(G, ATOMIC_SUB)
2141
INSTR4F(G, ATOMIC_XCHG)
2142
INSTR4F(G, ATOMIC_INC)
2143
INSTR4F(G, ATOMIC_DEC)
2144
INSTR4F(G, ATOMIC_CMPXCHG)
2145
INSTR4F(G, ATOMIC_MIN)
2146
INSTR4F(G, ATOMIC_MAX)
2147
INSTR4F(G, ATOMIC_AND)
2148
INSTR4F(G, ATOMIC_OR)
2149
INSTR4F(G, ATOMIC_XOR)
2150
#endif
2151
2152
/* cat7 instructions: */
2153
INSTR0(BAR)
2154
INSTR0(FENCE)
2155
2156
/* ************************************************************************* */
2157
#include "regmask.h"
2158
2159
static inline void
2160
regmask_set(regmask_t *regmask, struct ir3_register *reg)
2161
{
2162
bool half = reg->flags & IR3_REG_HALF;
2163
if (reg->flags & IR3_REG_RELATIV) {
2164
for (unsigned i = 0; i < reg->size; i++)
2165
__regmask_set(regmask, half, reg->array.base + i);
2166
} else {
2167
for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
2168
if (mask & 1)
2169
__regmask_set(regmask, half, n);
2170
}
2171
}
2172
2173
static inline bool
2174
regmask_get(regmask_t *regmask, struct ir3_register *reg)
2175
{
2176
bool half = reg->flags & IR3_REG_HALF;
2177
if (reg->flags & IR3_REG_RELATIV) {
2178
for (unsigned i = 0; i < reg->size; i++)
2179
if (__regmask_get(regmask, half, reg->array.base + i))
2180
return true;
2181
} else {
2182
for (unsigned mask = reg->wrmask, n = reg->num; mask; mask >>= 1, n++)
2183
if (mask & 1)
2184
if (__regmask_get(regmask, half, n))
2185
return true;
2186
}
2187
return false;
2188
}
2189
/* ************************************************************************* */
2190
2191
#endif /* IR3_H_ */
2192
2193