Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_qir.h
4570 views
1
/*
2
* Copyright © 2014 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#ifndef VC4_QIR_H
25
#define VC4_QIR_H
26
27
#include <assert.h>
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <stdbool.h>
31
#include <stdint.h>
32
#include <string.h>
33
34
#include "util/macros.h"
35
#include "compiler/nir/nir.h"
36
#include "util/list.h"
37
#include "util/u_math.h"
38
39
#include "vc4_screen.h"
40
#include "vc4_qpu_defines.h"
41
#include "vc4_qpu.h"
42
#include "kernel/vc4_packet.h"
43
#include "pipe/p_state.h"
44
45
struct nir_builder;
46
47
enum qfile {
48
QFILE_NULL,
49
QFILE_TEMP,
50
QFILE_VARY,
51
QFILE_UNIF,
52
QFILE_VPM,
53
QFILE_TLB_COLOR_WRITE,
54
QFILE_TLB_COLOR_WRITE_MS,
55
QFILE_TLB_Z_WRITE,
56
QFILE_TLB_STENCIL_SETUP,
57
58
/* If tex_s is written on its own without preceding t/r/b setup, it's
59
* a direct memory access using the input value, without the sideband
60
* uniform load. We represent these in QIR as a separate write
61
* destination so we can tell if the sideband uniform is present.
62
*/
63
QFILE_TEX_S_DIRECT,
64
65
QFILE_TEX_S,
66
QFILE_TEX_T,
67
QFILE_TEX_R,
68
QFILE_TEX_B,
69
70
/* Payload registers that aren't in the physical register file, so we
71
* can just use the corresponding qpu_reg at qpu_emit time.
72
*/
73
QFILE_FRAG_X,
74
QFILE_FRAG_Y,
75
QFILE_FRAG_REV_FLAG,
76
QFILE_QPU_ELEMENT,
77
78
/**
79
* Stores an immediate value in the index field that will be used
80
* directly by qpu_load_imm().
81
*/
82
QFILE_LOAD_IMM,
83
84
/**
85
* Stores an immediate value in the index field that can be turned
86
* into a small immediate field by qpu_encode_small_immediate().
87
*/
88
QFILE_SMALL_IMM,
89
};
90
91
struct qreg {
92
enum qfile file;
93
uint32_t index;
94
int pack;
95
};
96
97
static inline struct qreg qir_reg(enum qfile file, uint32_t index)
98
{
99
return (struct qreg){file, index};
100
}
101
102
enum qop {
103
QOP_UNDEF,
104
QOP_MOV,
105
QOP_FMOV,
106
QOP_MMOV,
107
QOP_FADD,
108
QOP_FSUB,
109
QOP_FMUL,
110
QOP_V8MULD,
111
QOP_V8MIN,
112
QOP_V8MAX,
113
QOP_V8ADDS,
114
QOP_V8SUBS,
115
QOP_MUL24,
116
QOP_FMIN,
117
QOP_FMAX,
118
QOP_FMINABS,
119
QOP_FMAXABS,
120
QOP_ADD,
121
QOP_SUB,
122
QOP_SHL,
123
QOP_SHR,
124
QOP_ASR,
125
QOP_MIN,
126
QOP_MIN_NOIMM,
127
QOP_MAX,
128
QOP_AND,
129
QOP_OR,
130
QOP_XOR,
131
QOP_NOT,
132
133
QOP_FTOI,
134
QOP_ITOF,
135
QOP_RCP,
136
QOP_RSQ,
137
QOP_EXP2,
138
QOP_LOG2,
139
QOP_VW_SETUP,
140
QOP_VR_SETUP,
141
QOP_TLB_COLOR_READ,
142
QOP_MS_MASK,
143
QOP_VARY_ADD_C,
144
145
QOP_FRAG_Z,
146
QOP_FRAG_W,
147
148
/**
149
* Signal of texture read being necessary and then reading r4 into
150
* the destination
151
*/
152
QOP_TEX_RESULT,
153
154
/**
155
* Insert the signal for switching threads in a threaded fragment
156
* shader. No value can be live in an accumulator across a thrsw.
157
*
158
* At the QPU level, this will have several delay slots before the
159
* switch happens. Those slots are the responsibility of the
160
* scheduler.
161
*/
162
QOP_THRSW,
163
164
/* 32-bit immediate loaded to each SIMD channel */
165
QOP_LOAD_IMM,
166
167
/* 32-bit immediate divided into 16 2-bit unsigned int values and
168
* loaded to each corresponding SIMD channel.
169
*/
170
QOP_LOAD_IMM_U2,
171
/* 32-bit immediate divided into 16 2-bit signed int values and
172
* loaded to each corresponding SIMD channel.
173
*/
174
QOP_LOAD_IMM_I2,
175
176
QOP_ROT_MUL,
177
178
/* Jumps to block->successor[0] if the qinst->cond (as a
179
* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note
180
* that block->successor[1] may be unset if the condition is ALWAYS.
181
*/
182
QOP_BRANCH,
183
184
/* Emits an ADD from src[0] to src[1], where src[0] must be a
185
* QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,
186
* required by the kernel as part of its branch validation.
187
*/
188
QOP_UNIFORMS_RESET,
189
};
190
191
struct queued_qpu_inst {
192
struct list_head link;
193
uint64_t inst;
194
};
195
196
struct qinst {
197
struct list_head link;
198
199
enum qop op;
200
struct qreg dst;
201
struct qreg src[3];
202
bool sf;
203
bool cond_is_exec_mask;
204
uint8_t cond;
205
};
206
207
enum qstage {
208
/**
209
* Coordinate shader, runs during binning, before the VS, and just
210
* outputs position.
211
*/
212
QSTAGE_COORD,
213
QSTAGE_VERT,
214
QSTAGE_FRAG,
215
};
216
217
enum quniform_contents {
218
/**
219
* Indicates that a constant 32-bit value is copied from the program's
220
* uniform contents.
221
*/
222
QUNIFORM_CONSTANT,
223
/**
224
* Indicates that the program's uniform contents are used as an index
225
* into the GL uniform storage.
226
*/
227
QUNIFORM_UNIFORM,
228
229
/** @{
230
* Scaling factors from clip coordinates to relative to the viewport
231
* center.
232
*
233
* This is used by the coordinate and vertex shaders to produce the
234
* 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed
235
* point offsets from the viewport ccenter.
236
*/
237
QUNIFORM_VIEWPORT_X_SCALE,
238
QUNIFORM_VIEWPORT_Y_SCALE,
239
/** @} */
240
241
QUNIFORM_VIEWPORT_Z_OFFSET,
242
QUNIFORM_VIEWPORT_Z_SCALE,
243
244
QUNIFORM_USER_CLIP_PLANE,
245
246
/**
247
* A reference to a texture config parameter 0 uniform.
248
*
249
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
250
* defines texture type, miplevels, and such. It will be found as a
251
* parameter to the first QOP_TEX_[STRB] instruction in a sequence.
252
*/
253
QUNIFORM_TEXTURE_CONFIG_P0,
254
255
/**
256
* A reference to a texture config parameter 1 uniform.
257
*
258
* This is a uniform implicitly loaded with a QPU_W_TMU* write, which
259
* defines texture width, height, filters, and wrap modes. It will be
260
* found as a parameter to the second QOP_TEX_[STRB] instruction in a
261
* sequence.
262
*/
263
QUNIFORM_TEXTURE_CONFIG_P1,
264
265
/** A reference to a texture config parameter 2 cubemap stride uniform */
266
QUNIFORM_TEXTURE_CONFIG_P2,
267
268
QUNIFORM_TEXTURE_FIRST_LEVEL,
269
270
QUNIFORM_TEXTURE_MSAA_ADDR,
271
272
QUNIFORM_UBO0_ADDR,
273
QUNIFORM_UBO1_ADDR,
274
275
QUNIFORM_TEXRECT_SCALE_X,
276
QUNIFORM_TEXRECT_SCALE_Y,
277
278
QUNIFORM_TEXTURE_BORDER_COLOR,
279
280
QUNIFORM_BLEND_CONST_COLOR_X,
281
QUNIFORM_BLEND_CONST_COLOR_Y,
282
QUNIFORM_BLEND_CONST_COLOR_Z,
283
QUNIFORM_BLEND_CONST_COLOR_W,
284
QUNIFORM_BLEND_CONST_COLOR_RGBA,
285
QUNIFORM_BLEND_CONST_COLOR_AAAA,
286
287
QUNIFORM_STENCIL,
288
289
QUNIFORM_SAMPLE_MASK,
290
291
/* Placeholder uniform that will be updated by the kernel when used by
292
* an instruction writing to QPU_W_UNIFORMS_ADDRESS.
293
*/
294
QUNIFORM_UNIFORMS_ADDRESS,
295
};
296
297
struct vc4_varying_slot {
298
uint8_t slot;
299
uint8_t swizzle;
300
};
301
302
struct vc4_key {
303
struct vc4_uncompiled_shader *shader_state;
304
struct {
305
enum pipe_format format;
306
uint8_t swizzle[4];
307
union {
308
struct {
309
unsigned compare_mode:1;
310
unsigned compare_func:3;
311
unsigned wrap_s:3;
312
unsigned wrap_t:3;
313
bool force_first_level:1;
314
};
315
struct {
316
uint16_t msaa_width, msaa_height;
317
};
318
};
319
} tex[VC4_MAX_TEXTURE_SAMPLERS];
320
uint8_t ucp_enables;
321
};
322
323
struct vc4_fs_key {
324
struct vc4_key base;
325
enum pipe_format color_format;
326
bool depth_enabled;
327
bool stencil_enabled;
328
bool stencil_twoside;
329
bool stencil_full_writemasks;
330
bool is_points;
331
bool is_lines;
332
bool point_coord_upper_left;
333
bool msaa;
334
bool sample_coverage;
335
bool sample_alpha_to_coverage;
336
bool sample_alpha_to_one;
337
uint8_t logicop_func;
338
uint32_t point_sprite_mask;
339
uint32_t ubo_1_size;
340
341
struct pipe_rt_blend_state blend;
342
};
343
344
struct vc4_vs_key {
345
struct vc4_key base;
346
347
const struct vc4_fs_inputs *fs_inputs;
348
enum pipe_format attr_formats[8];
349
bool is_coord;
350
bool per_vertex_point_size;
351
};
352
353
/** A basic block of QIR intructions. */
354
struct qblock {
355
struct list_head link;
356
357
struct list_head instructions;
358
struct list_head qpu_inst_list;
359
360
struct set *predecessors;
361
struct qblock *successors[2];
362
363
int index;
364
365
/* Instruction IPs for the first and last instruction of the block.
366
* Set by vc4_qpu_schedule.c.
367
*/
368
uint32_t start_qpu_ip;
369
uint32_t end_qpu_ip;
370
371
/* Instruction IP for the branch instruction of the block. Set by
372
* vc4_qpu_schedule.c.
373
*/
374
uint32_t branch_qpu_ip;
375
376
/** @{ used by vc4_qir_live_variables.c */
377
BITSET_WORD *def;
378
BITSET_WORD *use;
379
BITSET_WORD *live_in;
380
BITSET_WORD *live_out;
381
int start_ip, end_ip;
382
/** @} */
383
};
384
385
struct vc4_compile {
386
struct vc4_context *vc4;
387
nir_shader *s;
388
nir_function_impl *impl;
389
struct exec_list *cf_node_list;
390
391
/**
392
* Mapping from nir_register * or nir_ssa_def * to array of struct
393
* qreg for the values.
394
*/
395
struct hash_table *def_ht;
396
397
/* For each temp, the instruction generating its value. */
398
struct qinst **defs;
399
uint32_t defs_array_size;
400
401
/**
402
* Inputs to the shader, arranged by TGSI declaration order.
403
*
404
* Not all fragment shader QFILE_VARY reads are present in this array.
405
*/
406
struct qreg *inputs;
407
struct qreg *outputs;
408
bool msaa_per_sample_output;
409
struct qreg color_reads[VC4_MAX_SAMPLES];
410
struct qreg sample_colors[VC4_MAX_SAMPLES];
411
uint32_t inputs_array_size;
412
uint32_t outputs_array_size;
413
uint32_t uniforms_array_size;
414
415
/* State for whether we're executing on each channel currently. 0 if
416
* yes, otherwise a block number + 1 that the channel jumped to.
417
*/
418
struct qreg execute;
419
420
struct qreg line_x, point_x, point_y;
421
/** boolean (~0 -> true) if the fragment has been discarded. */
422
struct qreg discard;
423
struct qreg payload_FRAG_Z;
424
struct qreg payload_FRAG_W;
425
426
uint8_t vattr_sizes[8];
427
428
/**
429
* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.
430
*
431
* This includes those that aren't part of the VPM varyings, like
432
* point/line coordinates.
433
*/
434
struct vc4_varying_slot *input_slots;
435
uint32_t num_input_slots;
436
uint32_t input_slots_array_size;
437
438
/**
439
* An entry per outputs[] in the VS indicating what the VARYING_SLOT_*
440
* of the output is. Used to emit from the VS in the order that the
441
* FS needs.
442
*/
443
struct vc4_varying_slot *output_slots;
444
445
struct pipe_shader_state *shader_state;
446
struct vc4_key *key;
447
struct vc4_fs_key *fs_key;
448
struct vc4_vs_key *vs_key;
449
450
/* Live ranges of temps. */
451
int *temp_start, *temp_end;
452
453
uint32_t *uniform_data;
454
enum quniform_contents *uniform_contents;
455
uint32_t uniform_array_size;
456
uint32_t num_uniforms;
457
uint32_t num_outputs;
458
uint32_t num_texture_samples;
459
uint32_t output_position_index;
460
uint32_t output_color_index;
461
uint32_t output_point_size_index;
462
uint32_t output_sample_mask_index;
463
464
struct qreg undef;
465
enum qstage stage;
466
uint32_t num_temps;
467
468
struct list_head blocks;
469
int next_block_index;
470
struct qblock *cur_block;
471
struct qblock *loop_cont_block;
472
struct qblock *loop_break_block;
473
struct qblock *last_top_block;
474
475
struct list_head qpu_inst_list;
476
477
/* Pre-QPU-scheduled instruction containing the last THRSW */
478
uint64_t *last_thrsw;
479
480
uint64_t *qpu_insts;
481
uint32_t qpu_inst_count;
482
uint32_t qpu_inst_size;
483
uint32_t num_inputs;
484
485
/**
486
* Number of inputs from num_inputs remaining to be queued to the read
487
* FIFO in the VS/CS.
488
*/
489
uint32_t num_inputs_remaining;
490
491
/* Number of inputs currently in the read FIFO for the VS/CS */
492
uint32_t num_inputs_in_fifo;
493
494
/** Next offset in the VPM to read from in the VS/CS */
495
uint32_t vpm_read_offset;
496
497
uint32_t program_id;
498
uint32_t variant_id;
499
500
/* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH
501
* is used to hide texturing latency at the cost of limiting ourselves
502
* to the bottom half of physical reg space.
503
*/
504
bool fs_threaded;
505
506
bool last_thrsw_at_top_level;
507
508
bool failed;
509
};
510
511
/* Special nir_load_input intrinsic index for loading the current TLB
512
* destination color.
513
*/
514
#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
515
516
#define VC4_NIR_MS_MASK_OUTPUT 2000000000
517
518
struct vc4_compile *qir_compile_init(void);
519
void qir_compile_destroy(struct vc4_compile *c);
520
struct qblock *qir_new_block(struct vc4_compile *c);
521
void qir_set_emit_block(struct vc4_compile *c, struct qblock *block);
522
void qir_link_blocks(struct qblock *predecessor, struct qblock *successor);
523
struct qblock *qir_entry_block(struct vc4_compile *c);
524
struct qblock *qir_exit_block(struct vc4_compile *c);
525
struct qinst *qir_inst(enum qop op, struct qreg dst,
526
struct qreg src0, struct qreg src1);
527
void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
528
struct qreg qir_uniform(struct vc4_compile *c,
529
enum quniform_contents contents,
530
uint32_t data);
531
void qir_schedule_instructions(struct vc4_compile *c);
532
void qir_reorder_uniforms(struct vc4_compile *c);
533
void qir_emit_uniform_stream_resets(struct vc4_compile *c);
534
535
struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);
536
struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
537
538
struct qreg qir_get_temp(struct vc4_compile *c);
539
void qir_calculate_live_intervals(struct vc4_compile *c);
540
int qir_get_nsrc(struct qinst *inst);
541
int qir_get_non_sideband_nsrc(struct qinst *inst);
542
int qir_get_tex_uniform_src(struct qinst *inst);
543
bool qir_reg_equals(struct qreg a, struct qreg b);
544
bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
545
bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
546
bool qir_has_uniform_read(struct qinst *inst);
547
bool qir_is_mul(struct qinst *inst);
548
bool qir_is_raw_mov(struct qinst *inst);
549
bool qir_is_tex(struct qinst *inst);
550
bool qir_has_implicit_tex_uniform(struct qinst *inst);
551
bool qir_is_float_input(struct qinst *inst);
552
bool qir_depends_on_flags(struct qinst *inst);
553
bool qir_writes_r4(struct qinst *inst);
554
struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);
555
uint8_t qir_channels_written(struct qinst *inst);
556
557
void qir_dump(struct vc4_compile *c);
558
void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);
559
char *qir_describe_uniform(enum quniform_contents contents, uint32_t data,
560
const uint32_t *uniforms);
561
const char *qir_get_stage_name(enum qstage stage);
562
563
void qir_validate(struct vc4_compile *c);
564
565
void qir_optimize(struct vc4_compile *c);
566
bool qir_opt_algebraic(struct vc4_compile *c);
567
bool qir_opt_coalesce_ff_writes(struct vc4_compile *c);
568
bool qir_opt_constant_folding(struct vc4_compile *c);
569
bool qir_opt_copy_propagation(struct vc4_compile *c);
570
bool qir_opt_dead_code(struct vc4_compile *c);
571
bool qir_opt_peephole_sf(struct vc4_compile *c);
572
bool qir_opt_small_immediates(struct vc4_compile *c);
573
bool qir_opt_vpm(struct vc4_compile *c);
574
void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c);
575
void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c);
576
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
577
nir_ssa_def **srcs, int swiz);
578
void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c);
579
void qir_lower_uniforms(struct vc4_compile *c);
580
581
uint32_t qpu_schedule_instructions(struct vc4_compile *c);
582
583
void qir_SF(struct vc4_compile *c, struct qreg src);
584
585
static inline struct qreg
586
qir_uniform_ui(struct vc4_compile *c, uint32_t ui)
587
{
588
return qir_uniform(c, QUNIFORM_CONSTANT, ui);
589
}
590
591
static inline struct qreg
592
qir_uniform_f(struct vc4_compile *c, float f)
593
{
594
return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));
595
}
596
597
#define QIR_ALU0(name) \
598
static inline struct qreg \
599
qir_##name(struct vc4_compile *c) \
600
{ \
601
return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \
602
c->undef, c->undef)); \
603
} \
604
static inline struct qinst * \
605
qir_##name##_dest(struct vc4_compile *c, struct qreg dest) \
606
{ \
607
return qir_emit_nondef(c, qir_inst(QOP_##name, dest, \
608
c->undef, c->undef)); \
609
}
610
611
#define QIR_ALU1(name) \
612
static inline struct qreg \
613
qir_##name(struct vc4_compile *c, struct qreg a) \
614
{ \
615
return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \
616
a, c->undef)); \
617
} \
618
static inline struct qinst * \
619
qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
620
struct qreg a) \
621
{ \
622
return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, \
623
c->undef)); \
624
}
625
626
#define QIR_ALU2(name) \
627
static inline struct qreg \
628
qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
629
{ \
630
return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b)); \
631
} \
632
static inline struct qinst * \
633
qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \
634
struct qreg a, struct qreg b) \
635
{ \
636
return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b)); \
637
}
638
639
#define QIR_NODST_1(name) \
640
static inline struct qinst * \
641
qir_##name(struct vc4_compile *c, struct qreg a) \
642
{ \
643
return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \
644
a, c->undef)); \
645
}
646
647
#define QIR_NODST_2(name) \
648
static inline struct qinst * \
649
qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \
650
{ \
651
return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \
652
a, b)); \
653
}
654
655
#define QIR_PAYLOAD(name) \
656
static inline struct qreg \
657
qir_##name(struct vc4_compile *c) \
658
{ \
659
struct qreg *payload = &c->payload_##name; \
660
if (payload->file != QFILE_NULL) \
661
return *payload; \
662
*payload = qir_get_temp(c); \
663
struct qinst *inst = qir_inst(QOP_##name, *payload, \
664
c->undef, c->undef); \
665
struct qblock *entry = qir_entry_block(c); \
666
list_add(&inst->link, &entry->instructions); \
667
c->defs[payload->index] = inst; \
668
return *payload; \
669
}
670
671
QIR_ALU1(MOV)
672
QIR_ALU1(FMOV)
673
QIR_ALU1(MMOV)
674
QIR_ALU2(FADD)
675
QIR_ALU2(FSUB)
676
QIR_ALU2(FMUL)
677
QIR_ALU2(V8MULD)
678
QIR_ALU2(V8MIN)
679
QIR_ALU2(V8MAX)
680
QIR_ALU2(V8ADDS)
681
QIR_ALU2(V8SUBS)
682
QIR_ALU2(MUL24)
683
QIR_ALU2(FMIN)
684
QIR_ALU2(FMAX)
685
QIR_ALU2(FMINABS)
686
QIR_ALU2(FMAXABS)
687
QIR_ALU1(FTOI)
688
QIR_ALU1(ITOF)
689
690
QIR_ALU2(ADD)
691
QIR_ALU2(SUB)
692
QIR_ALU2(SHL)
693
QIR_ALU2(SHR)
694
QIR_ALU2(ASR)
695
QIR_ALU2(MIN)
696
QIR_ALU2(MIN_NOIMM)
697
QIR_ALU2(MAX)
698
QIR_ALU2(AND)
699
QIR_ALU2(OR)
700
QIR_ALU2(XOR)
701
QIR_ALU1(NOT)
702
703
QIR_ALU1(RCP)
704
QIR_ALU1(RSQ)
705
QIR_ALU1(EXP2)
706
QIR_ALU1(LOG2)
707
QIR_ALU1(VARY_ADD_C)
708
QIR_PAYLOAD(FRAG_Z)
709
QIR_PAYLOAD(FRAG_W)
710
QIR_ALU0(TEX_RESULT)
711
QIR_ALU0(TLB_COLOR_READ)
712
QIR_NODST_1(MS_MASK)
713
714
static inline struct qreg
715
qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)
716
{
717
struct qreg t = qir_get_temp(c);
718
qir_MOV_dest(c, t, src1);
719
qir_MOV_dest(c, t, src0)->cond = cond;
720
return t;
721
}
722
723
static inline struct qreg
724
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
725
{
726
struct qreg t = qir_FMOV(c, src);
727
c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
728
return t;
729
}
730
731
static inline struct qreg
732
qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
733
{
734
struct qreg t = qir_MOV(c, src);
735
c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;
736
return t;
737
}
738
739
static inline struct qreg
740
qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
741
{
742
struct qreg t = qir_FMOV(c, src);
743
c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
744
return t;
745
}
746
747
static inline struct qreg
748
qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
749
{
750
struct qreg t = qir_MOV(c, src);
751
c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;
752
return t;
753
}
754
755
static inline void
756
qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
757
{
758
assert(!dest.pack);
759
dest.pack = QPU_PACK_MUL_8A + chan;
760
qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef));
761
}
762
763
static inline struct qreg
764
qir_PACK_8888_F(struct vc4_compile *c, struct qreg val)
765
{
766
struct qreg dest = qir_MMOV(c, val);
767
c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888;
768
return dest;
769
}
770
771
static inline struct qreg
772
qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
773
{
774
return qir_EXP2(c, qir_FMUL(c,
775
y,
776
qir_LOG2(c, x)));
777
}
778
779
static inline void
780
qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)
781
{
782
qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val);
783
}
784
785
static inline struct qreg
786
qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)
787
{
788
return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef,
789
qir_reg(QFILE_LOAD_IMM, val), c->undef));
790
}
791
792
static inline struct qreg
793
qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)
794
{
795
return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,
796
qir_reg(QFILE_LOAD_IMM, val),
797
c->undef));
798
}
799
800
static inline struct qreg
801
qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
802
{
803
return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,
804
qir_reg(QFILE_LOAD_IMM, val),
805
c->undef));
806
}
807
808
/** Shifts the multiply output to the right by rot channels */
809
static inline struct qreg
810
qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)
811
{
812
return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,
813
val,
814
qir_reg(QFILE_LOAD_IMM,
815
QPU_SMALL_IMM_MUL_ROT + rot)));
816
}
817
818
static inline struct qinst *
819
qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
820
struct qreg dest, struct qreg src)
821
{
822
struct qinst *mov = qir_MOV_dest(c, dest, src);
823
mov->cond = cond;
824
return mov;
825
}
826
827
static inline struct qinst *
828
qir_BRANCH(struct vc4_compile *c, uint8_t cond)
829
{
830
struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef);
831
inst->cond = cond;
832
qir_emit_nondef(c, inst);
833
return inst;
834
}
835
836
#define qir_for_each_block(block, c) \
837
list_for_each_entry(struct qblock, block, &c->blocks, link)
838
839
#define qir_for_each_block_rev(block, c) \
840
list_for_each_entry_rev(struct qblock, block, &c->blocks, link)
841
842
/* Loop over the non-NULL members of the successors array. */
843
#define qir_for_each_successor(succ, block) \
844
for (struct qblock *succ = block->successors[0]; \
845
succ != NULL; \
846
succ = (succ == block->successors[1] ? NULL : \
847
block->successors[1]))
848
849
#define qir_for_each_inst(inst, block) \
850
list_for_each_entry(struct qinst, inst, &block->instructions, link)
851
852
#define qir_for_each_inst_rev(inst, block) \
853
list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)
854
855
#define qir_for_each_inst_safe(inst, block) \
856
list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)
857
858
#define qir_for_each_inst_inorder(inst, c) \
859
qir_for_each_block(_block, c) \
860
qir_for_each_inst_safe(inst, _block)
861
862
#endif /* VC4_QIR_H */
863
864