Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_eu.h
4550 views
1
/*
2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics to
4
develop this 3D driver.
5
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
13
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
17
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26
**********************************************************************/
27
/*
28
* Authors:
29
* Keith Whitwell <[email protected]>
30
*/
31
32
33
#ifndef BRW_EU_H
34
#define BRW_EU_H
35
36
#include <stdbool.h>
37
#include <stdio.h>
38
#include "brw_inst.h"
39
#include "brw_compiler.h"
40
#include "brw_eu_defines.h"
41
#include "brw_reg.h"
42
#include "brw_disasm_info.h"
43
44
#ifdef __cplusplus
45
extern "C" {
46
#endif
47
48
#define BRW_EU_MAX_INSN_STACK 5
49
50
struct brw_insn_state {
51
/* One of BRW_EXECUTE_* */
52
unsigned exec_size:3;
53
54
/* Group in units of channels */
55
unsigned group:5;
56
57
/* Compression control on gfx4-5 */
58
bool compressed:1;
59
60
/* One of BRW_MASK_* */
61
unsigned mask_control:1;
62
63
/* Scheduling info for Gfx12+ */
64
struct tgl_swsb swsb;
65
66
bool saturate:1;
67
68
/* One of BRW_ALIGN_* */
69
unsigned access_mode:1;
70
71
/* One of BRW_PREDICATE_* */
72
enum brw_predicate predicate:4;
73
74
bool pred_inv:1;
75
76
/* Flag subreg. Bottom bit is subreg, top bit is reg */
77
unsigned flag_subreg:2;
78
79
bool acc_wr_control:1;
80
};
81
82
83
/* A helper for accessing the last instruction emitted. This makes it easy
84
* to set various bits on an instruction without having to create temporary
85
* variable and assign the emitted instruction to those.
86
*/
87
#define brw_last_inst (&p->store[p->nr_insn - 1])
88
89
struct brw_codegen {
90
brw_inst *store;
91
int store_size;
92
unsigned nr_insn;
93
unsigned int next_insn_offset;
94
95
void *mem_ctx;
96
97
/* Allow clients to push/pop instruction state:
98
*/
99
struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
100
struct brw_insn_state *current;
101
102
/** Whether or not the user wants automatic exec sizes
103
*
104
* If true, codegen will try to automatically infer the exec size of an
105
* instruction from the width of the destination register. If false, it
106
* will take whatever is set by brw_set_default_exec_size verbatim.
107
*
108
* This is set to true by default in brw_init_codegen.
109
*/
110
bool automatic_exec_sizes;
111
112
bool single_program_flow;
113
const struct intel_device_info *devinfo;
114
115
/* Control flow stacks:
116
* - if_stack contains IF and ELSE instructions which must be patched
117
* (and popped) once the matching ENDIF instruction is encountered.
118
*
119
* Just store the instruction pointer(an index).
120
*/
121
int *if_stack;
122
int if_stack_depth;
123
int if_stack_array_size;
124
125
/**
126
* loop_stack contains the instruction pointers of the starts of loops which
127
* must be patched (and popped) once the matching WHILE instruction is
128
* encountered.
129
*/
130
int *loop_stack;
131
/**
132
* pre-gfx6, the BREAK and CONT instructions had to tell how many IF/ENDIF
133
* blocks they were popping out of, to fix up the mask stack. This tracks
134
* the IF/ENDIF nesting in each current nested loop level.
135
*/
136
int *if_depth_in_loop;
137
int loop_stack_depth;
138
int loop_stack_array_size;
139
140
struct brw_shader_reloc *relocs;
141
int num_relocs;
142
int reloc_array_size;
143
};
144
145
struct brw_label {
146
int offset;
147
int number;
148
struct brw_label *next;
149
};
150
151
void brw_pop_insn_state( struct brw_codegen *p );
152
void brw_push_insn_state( struct brw_codegen *p );
153
unsigned brw_get_default_exec_size(struct brw_codegen *p);
154
unsigned brw_get_default_group(struct brw_codegen *p);
155
unsigned brw_get_default_access_mode(struct brw_codegen *p);
156
struct tgl_swsb brw_get_default_swsb(struct brw_codegen *p);
157
void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
158
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
159
void brw_set_default_saturate( struct brw_codegen *p, bool enable );
160
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode );
161
void brw_inst_set_compression(const struct intel_device_info *devinfo,
162
brw_inst *inst, bool on);
163
void brw_set_default_compression(struct brw_codegen *p, bool on);
164
void brw_inst_set_group(const struct intel_device_info *devinfo,
165
brw_inst *inst, unsigned group);
166
void brw_set_default_group(struct brw_codegen *p, unsigned group);
167
void brw_set_default_compression_control(struct brw_codegen *p, enum brw_compression c);
168
void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc);
169
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse);
170
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg);
171
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value);
172
void brw_set_default_swsb(struct brw_codegen *p, struct tgl_swsb value);
173
174
void brw_init_codegen(const struct intel_device_info *, struct brw_codegen *p,
175
void *mem_ctx);
176
bool brw_has_jip(const struct intel_device_info *devinfo, enum opcode opcode);
177
bool brw_has_uip(const struct intel_device_info *devinfo, enum opcode opcode);
178
const struct brw_label *brw_find_label(const struct brw_label *root, int offset);
179
void brw_create_label(struct brw_label **labels, int offset, void *mem_ctx);
180
int brw_disassemble_inst(FILE *file, const struct intel_device_info *devinfo,
181
const struct brw_inst *inst, bool is_compacted,
182
int offset, const struct brw_label *root_label);
183
const struct
184
brw_label *brw_label_assembly(const struct intel_device_info *devinfo,
185
const void *assembly, int start, int end,
186
void *mem_ctx);
187
void brw_disassemble_with_labels(const struct intel_device_info *devinfo,
188
const void *assembly, int start, int end, FILE *out);
189
void brw_disassemble(const struct intel_device_info *devinfo,
190
const void *assembly, int start, int end,
191
const struct brw_label *root_label, FILE *out);
192
const struct brw_shader_reloc *brw_get_shader_relocs(struct brw_codegen *p,
193
unsigned *num_relocs);
194
const unsigned *brw_get_program( struct brw_codegen *p, unsigned *sz );
195
196
bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
197
const char *identifier);
198
199
void brw_realign(struct brw_codegen *p, unsigned align);
200
int brw_append_data(struct brw_codegen *p, void *data,
201
unsigned size, unsigned align);
202
brw_inst *brw_next_insn(struct brw_codegen *p, unsigned opcode);
203
void brw_add_reloc(struct brw_codegen *p, uint32_t id,
204
enum brw_shader_reloc_type type,
205
uint32_t offset, uint32_t delta);
206
void brw_set_dest(struct brw_codegen *p, brw_inst *insn, struct brw_reg dest);
207
void brw_set_src0(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
208
209
void gfx6_resolve_implied_move(struct brw_codegen *p,
210
struct brw_reg *src,
211
unsigned msg_reg_nr);
212
213
/* Helpers for regular instructions:
214
*/
215
#define ALU1(OP) \
216
brw_inst *brw_##OP(struct brw_codegen *p, \
217
struct brw_reg dest, \
218
struct brw_reg src0);
219
220
#define ALU2(OP) \
221
brw_inst *brw_##OP(struct brw_codegen *p, \
222
struct brw_reg dest, \
223
struct brw_reg src0, \
224
struct brw_reg src1);
225
226
#define ALU3(OP) \
227
brw_inst *brw_##OP(struct brw_codegen *p, \
228
struct brw_reg dest, \
229
struct brw_reg src0, \
230
struct brw_reg src1, \
231
struct brw_reg src2);
232
233
ALU1(MOV)
234
ALU2(SEL)
235
ALU1(NOT)
236
ALU2(AND)
237
ALU2(OR)
238
ALU2(XOR)
239
ALU2(SHR)
240
ALU2(SHL)
241
ALU1(DIM)
242
ALU2(ASR)
243
ALU2(ROL)
244
ALU2(ROR)
245
ALU3(CSEL)
246
ALU1(F32TO16)
247
ALU1(F16TO32)
248
ALU2(ADD)
249
ALU2(AVG)
250
ALU2(MUL)
251
ALU1(FRC)
252
ALU1(RNDD)
253
ALU1(RNDE)
254
ALU1(RNDU)
255
ALU1(RNDZ)
256
ALU2(MAC)
257
ALU2(MACH)
258
ALU1(LZD)
259
ALU2(DP4)
260
ALU2(DPH)
261
ALU2(DP3)
262
ALU2(DP2)
263
ALU2(LINE)
264
ALU2(PLN)
265
ALU3(MAD)
266
ALU3(LRP)
267
ALU1(BFREV)
268
ALU3(BFE)
269
ALU2(BFI1)
270
ALU3(BFI2)
271
ALU1(FBH)
272
ALU1(FBL)
273
ALU1(CBIT)
274
ALU2(ADDC)
275
ALU2(SUBB)
276
277
#undef ALU1
278
#undef ALU2
279
#undef ALU3
280
281
282
/* Helpers for SEND instruction:
283
*/
284
285
/**
286
* Construct a message descriptor immediate with the specified common
287
* descriptor controls.
288
*/
289
static inline uint32_t
290
brw_message_desc(const struct intel_device_info *devinfo,
291
unsigned msg_length,
292
unsigned response_length,
293
bool header_present)
294
{
295
if (devinfo->ver >= 5) {
296
return (SET_BITS(msg_length, 28, 25) |
297
SET_BITS(response_length, 24, 20) |
298
SET_BITS(header_present, 19, 19));
299
} else {
300
return (SET_BITS(msg_length, 23, 20) |
301
SET_BITS(response_length, 19, 16));
302
}
303
}
304
305
static inline unsigned
306
brw_message_desc_mlen(const struct intel_device_info *devinfo, uint32_t desc)
307
{
308
if (devinfo->ver >= 5)
309
return GET_BITS(desc, 28, 25);
310
else
311
return GET_BITS(desc, 23, 20);
312
}
313
314
static inline unsigned
315
brw_message_desc_rlen(const struct intel_device_info *devinfo, uint32_t desc)
316
{
317
if (devinfo->ver >= 5)
318
return GET_BITS(desc, 24, 20);
319
else
320
return GET_BITS(desc, 19, 16);
321
}
322
323
static inline bool
324
brw_message_desc_header_present(ASSERTED
325
const struct intel_device_info *devinfo,
326
uint32_t desc)
327
{
328
assert(devinfo->ver >= 5);
329
return GET_BITS(desc, 19, 19);
330
}
331
332
static inline unsigned
333
brw_message_ex_desc(UNUSED const struct intel_device_info *devinfo,
334
unsigned ex_msg_length)
335
{
336
return SET_BITS(ex_msg_length, 9, 6);
337
}
338
339
static inline unsigned
340
brw_message_ex_desc_ex_mlen(UNUSED const struct intel_device_info *devinfo,
341
uint32_t ex_desc)
342
{
343
return GET_BITS(ex_desc, 9, 6);
344
}
345
346
static inline uint32_t
347
brw_urb_desc(const struct intel_device_info *devinfo,
348
unsigned msg_type,
349
bool per_slot_offset_present,
350
bool channel_mask_present,
351
unsigned global_offset)
352
{
353
if (devinfo->ver >= 8) {
354
return (SET_BITS(per_slot_offset_present, 17, 17) |
355
SET_BITS(channel_mask_present, 15, 15) |
356
SET_BITS(global_offset, 14, 4) |
357
SET_BITS(msg_type, 3, 0));
358
} else if (devinfo->ver >= 7) {
359
assert(!channel_mask_present);
360
return (SET_BITS(per_slot_offset_present, 16, 16) |
361
SET_BITS(global_offset, 13, 3) |
362
SET_BITS(msg_type, 3, 0));
363
} else {
364
unreachable("unhandled URB write generation");
365
}
366
}
367
368
static inline uint32_t
369
brw_urb_desc_msg_type(ASSERTED const struct intel_device_info *devinfo,
370
uint32_t desc)
371
{
372
assert(devinfo->ver >= 7);
373
return GET_BITS(desc, 3, 0);
374
}
375
376
/**
377
* Construct a message descriptor immediate with the specified sampler
378
* function controls.
379
*/
380
static inline uint32_t
381
brw_sampler_desc(const struct intel_device_info *devinfo,
382
unsigned binding_table_index,
383
unsigned sampler,
384
unsigned msg_type,
385
unsigned simd_mode,
386
unsigned return_format)
387
{
388
const unsigned desc = (SET_BITS(binding_table_index, 7, 0) |
389
SET_BITS(sampler, 11, 8));
390
if (devinfo->ver >= 7)
391
return (desc | SET_BITS(msg_type, 16, 12) |
392
SET_BITS(simd_mode, 18, 17));
393
else if (devinfo->ver >= 5)
394
return (desc | SET_BITS(msg_type, 15, 12) |
395
SET_BITS(simd_mode, 17, 16));
396
else if (devinfo->is_g4x)
397
return desc | SET_BITS(msg_type, 15, 12);
398
else
399
return (desc | SET_BITS(return_format, 13, 12) |
400
SET_BITS(msg_type, 15, 14));
401
}
402
403
static inline unsigned
404
brw_sampler_desc_binding_table_index(UNUSED
405
const struct intel_device_info *devinfo,
406
uint32_t desc)
407
{
408
return GET_BITS(desc, 7, 0);
409
}
410
411
static inline unsigned
412
brw_sampler_desc_sampler(UNUSED const struct intel_device_info *devinfo,
413
uint32_t desc)
414
{
415
return GET_BITS(desc, 11, 8);
416
}
417
418
static inline unsigned
419
brw_sampler_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
420
{
421
if (devinfo->ver >= 7)
422
return GET_BITS(desc, 16, 12);
423
else if (devinfo->ver >= 5 || devinfo->is_g4x)
424
return GET_BITS(desc, 15, 12);
425
else
426
return GET_BITS(desc, 15, 14);
427
}
428
429
static inline unsigned
430
brw_sampler_desc_simd_mode(const struct intel_device_info *devinfo,
431
uint32_t desc)
432
{
433
assert(devinfo->ver >= 5);
434
if (devinfo->ver >= 7)
435
return GET_BITS(desc, 18, 17);
436
else
437
return GET_BITS(desc, 17, 16);
438
}
439
440
static inline unsigned
441
brw_sampler_desc_return_format(ASSERTED const struct intel_device_info *devinfo,
442
uint32_t desc)
443
{
444
assert(devinfo->ver == 4 && !devinfo->is_g4x);
445
return GET_BITS(desc, 13, 12);
446
}
447
448
/**
449
* Construct a message descriptor for the dataport
450
*/
451
static inline uint32_t
452
brw_dp_desc(const struct intel_device_info *devinfo,
453
unsigned binding_table_index,
454
unsigned msg_type,
455
unsigned msg_control)
456
{
457
/* Prior to gfx6, things are too inconsistent; use the dp_read/write_desc
458
* helpers instead.
459
*/
460
assert(devinfo->ver >= 6);
461
const unsigned desc = SET_BITS(binding_table_index, 7, 0);
462
if (devinfo->ver >= 8) {
463
return (desc | SET_BITS(msg_control, 13, 8) |
464
SET_BITS(msg_type, 18, 14));
465
} else if (devinfo->ver >= 7) {
466
return (desc | SET_BITS(msg_control, 13, 8) |
467
SET_BITS(msg_type, 17, 14));
468
} else {
469
return (desc | SET_BITS(msg_control, 12, 8) |
470
SET_BITS(msg_type, 16, 13));
471
}
472
}
473
474
static inline unsigned
475
brw_dp_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
476
uint32_t desc)
477
{
478
return GET_BITS(desc, 7, 0);
479
}
480
481
static inline unsigned
482
brw_dp_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
483
{
484
assert(devinfo->ver >= 6);
485
if (devinfo->ver >= 8)
486
return GET_BITS(desc, 18, 14);
487
else if (devinfo->ver >= 7)
488
return GET_BITS(desc, 17, 14);
489
else
490
return GET_BITS(desc, 16, 13);
491
}
492
493
static inline unsigned
494
brw_dp_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
495
{
496
assert(devinfo->ver >= 6);
497
if (devinfo->ver >= 7)
498
return GET_BITS(desc, 13, 8);
499
else
500
return GET_BITS(desc, 12, 8);
501
}
502
503
/**
504
* Construct a message descriptor immediate with the specified dataport read
505
* function controls.
506
*/
507
static inline uint32_t
508
brw_dp_read_desc(const struct intel_device_info *devinfo,
509
unsigned binding_table_index,
510
unsigned msg_control,
511
unsigned msg_type,
512
unsigned target_cache)
513
{
514
if (devinfo->ver >= 6)
515
return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control);
516
else if (devinfo->ver >= 5 || devinfo->is_g4x)
517
return (SET_BITS(binding_table_index, 7, 0) |
518
SET_BITS(msg_control, 10, 8) |
519
SET_BITS(msg_type, 13, 11) |
520
SET_BITS(target_cache, 15, 14));
521
else
522
return (SET_BITS(binding_table_index, 7, 0) |
523
SET_BITS(msg_control, 11, 8) |
524
SET_BITS(msg_type, 13, 12) |
525
SET_BITS(target_cache, 15, 14));
526
}
527
528
static inline unsigned
529
brw_dp_read_desc_msg_type(const struct intel_device_info *devinfo,
530
uint32_t desc)
531
{
532
if (devinfo->ver >= 6)
533
return brw_dp_desc_msg_type(devinfo, desc);
534
else if (devinfo->ver >= 5 || devinfo->is_g4x)
535
return GET_BITS(desc, 13, 11);
536
else
537
return GET_BITS(desc, 13, 12);
538
}
539
540
static inline unsigned
541
brw_dp_read_desc_msg_control(const struct intel_device_info *devinfo,
542
uint32_t desc)
543
{
544
if (devinfo->ver >= 6)
545
return brw_dp_desc_msg_control(devinfo, desc);
546
else if (devinfo->ver >= 5 || devinfo->is_g4x)
547
return GET_BITS(desc, 10, 8);
548
else
549
return GET_BITS(desc, 11, 8);
550
}
551
552
/**
553
* Construct a message descriptor immediate with the specified dataport write
554
* function controls.
555
*/
556
static inline uint32_t
557
brw_dp_write_desc(const struct intel_device_info *devinfo,
558
unsigned binding_table_index,
559
unsigned msg_control,
560
unsigned msg_type,
561
unsigned send_commit_msg)
562
{
563
assert(devinfo->ver <= 6 || !send_commit_msg);
564
if (devinfo->ver >= 6) {
565
return brw_dp_desc(devinfo, binding_table_index, msg_type, msg_control) |
566
SET_BITS(send_commit_msg, 17, 17);
567
} else {
568
return (SET_BITS(binding_table_index, 7, 0) |
569
SET_BITS(msg_control, 11, 8) |
570
SET_BITS(msg_type, 14, 12) |
571
SET_BITS(send_commit_msg, 15, 15));
572
}
573
}
574
575
static inline unsigned
576
brw_dp_write_desc_msg_type(const struct intel_device_info *devinfo,
577
uint32_t desc)
578
{
579
if (devinfo->ver >= 6)
580
return brw_dp_desc_msg_type(devinfo, desc);
581
else
582
return GET_BITS(desc, 14, 12);
583
}
584
585
static inline unsigned
586
brw_dp_write_desc_msg_control(const struct intel_device_info *devinfo,
587
uint32_t desc)
588
{
589
if (devinfo->ver >= 6)
590
return brw_dp_desc_msg_control(devinfo, desc);
591
else
592
return GET_BITS(desc, 11, 8);
593
}
594
595
static inline bool
596
brw_dp_write_desc_write_commit(const struct intel_device_info *devinfo,
597
uint32_t desc)
598
{
599
assert(devinfo->ver <= 6);
600
if (devinfo->ver >= 6)
601
return GET_BITS(desc, 17, 17);
602
else
603
return GET_BITS(desc, 15, 15);
604
}
605
606
/**
607
* Construct a message descriptor immediate with the specified dataport
608
* surface function controls.
609
*/
610
static inline uint32_t
611
brw_dp_surface_desc(const struct intel_device_info *devinfo,
612
unsigned msg_type,
613
unsigned msg_control)
614
{
615
assert(devinfo->ver >= 7);
616
/* We'll OR in the binding table index later */
617
return brw_dp_desc(devinfo, 0, msg_type, msg_control);
618
}
619
620
static inline uint32_t
621
brw_dp_untyped_atomic_desc(const struct intel_device_info *devinfo,
622
unsigned exec_size, /**< 0 for SIMD4x2 */
623
unsigned atomic_op,
624
bool response_expected)
625
{
626
assert(exec_size <= 8 || exec_size == 16);
627
628
unsigned msg_type;
629
if (devinfo->verx10 >= 75) {
630
if (exec_size > 0) {
631
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
632
} else {
633
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
634
}
635
} else {
636
msg_type = GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
637
}
638
639
const unsigned msg_control =
640
SET_BITS(atomic_op, 3, 0) |
641
SET_BITS(0 < exec_size && exec_size <= 8, 4, 4) |
642
SET_BITS(response_expected, 5, 5);
643
644
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
645
}
646
647
static inline uint32_t
648
brw_dp_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
649
unsigned exec_size,
650
unsigned atomic_op,
651
bool response_expected)
652
{
653
assert(exec_size <= 8 || exec_size == 16);
654
assert(devinfo->ver >= 9);
655
656
assert(exec_size > 0);
657
const unsigned msg_type = GFX9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP;
658
659
const unsigned msg_control =
660
SET_BITS(atomic_op, 1, 0) |
661
SET_BITS(exec_size <= 8, 4, 4) |
662
SET_BITS(response_expected, 5, 5);
663
664
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
665
}
666
667
static inline unsigned
668
brw_mdc_cmask(unsigned num_channels)
669
{
670
/* See also MDC_CMASK in the SKL PRM Vol 2d. */
671
return 0xf & (0xf << num_channels);
672
}
673
674
static inline unsigned
675
lsc_cmask(unsigned num_channels)
676
{
677
assert(num_channels > 0 && num_channels <= 4);
678
return BITSET_MASK(num_channels);
679
}
680
681
static inline uint32_t
682
brw_dp_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
683
unsigned exec_size, /**< 0 for SIMD4x2 */
684
unsigned num_channels,
685
bool write)
686
{
687
assert(exec_size <= 8 || exec_size == 16);
688
689
unsigned msg_type;
690
if (write) {
691
if (devinfo->verx10 >= 75) {
692
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE;
693
} else {
694
msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_WRITE;
695
}
696
} else {
697
/* Read */
698
if (devinfo->verx10 >= 75) {
699
msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
700
} else {
701
msg_type = GFX7_DATAPORT_DC_UNTYPED_SURFACE_READ;
702
}
703
}
704
705
/* SIMD4x2 is only valid for read messages on IVB; use SIMD8 instead */
706
if (write && devinfo->verx10 == 70 && exec_size == 0)
707
exec_size = 8;
708
709
/* See also MDC_SM3 in the SKL PRM Vol 2d. */
710
const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
711
exec_size <= 8 ? 2 : 1;
712
713
const unsigned msg_control =
714
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
715
SET_BITS(simd_mode, 5, 4);
716
717
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
718
}
719
720
static inline unsigned
721
brw_mdc_ds(unsigned bit_size)
722
{
723
switch (bit_size) {
724
case 8:
725
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_BYTE;
726
case 16:
727
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_WORD;
728
case 32:
729
return GFX7_BYTE_SCATTERED_DATA_ELEMENT_DWORD;
730
default:
731
unreachable("Unsupported bit_size for byte scattered messages");
732
}
733
}
734
735
static inline uint32_t
736
brw_dp_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
737
unsigned exec_size,
738
unsigned bit_size,
739
bool write)
740
{
741
assert(exec_size <= 8 || exec_size == 16);
742
743
assert(devinfo->verx10 >= 75);
744
const unsigned msg_type =
745
write ? HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE :
746
HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ;
747
748
assert(exec_size > 0);
749
const unsigned msg_control =
750
SET_BITS(exec_size == 16, 0, 0) |
751
SET_BITS(brw_mdc_ds(bit_size), 3, 2);
752
753
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
754
}
755
756
static inline uint32_t
757
brw_dp_dword_scattered_rw_desc(const struct intel_device_info *devinfo,
758
unsigned exec_size,
759
bool write)
760
{
761
assert(exec_size == 8 || exec_size == 16);
762
763
unsigned msg_type;
764
if (write) {
765
if (devinfo->ver >= 6) {
766
msg_type = GFX6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
767
} else {
768
msg_type = BRW_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE;
769
}
770
} else {
771
if (devinfo->ver >= 7) {
772
msg_type = GFX7_DATAPORT_DC_DWORD_SCATTERED_READ;
773
} else if (devinfo->ver > 4 || devinfo->is_g4x) {
774
msg_type = G45_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
775
} else {
776
msg_type = BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ;
777
}
778
}
779
780
const unsigned msg_control =
781
SET_BITS(1, 1, 1) | /* Legacy SIMD Mode */
782
SET_BITS(exec_size == 16, 0, 0);
783
784
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
785
}
786
787
static inline uint32_t
788
brw_dp_oword_block_rw_desc(const struct intel_device_info *devinfo,
789
bool align_16B,
790
unsigned num_dwords,
791
bool write)
792
{
793
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
794
assert(!write || align_16B);
795
796
const unsigned msg_type =
797
write ? GFX7_DATAPORT_DC_OWORD_BLOCK_WRITE :
798
align_16B ? GFX7_DATAPORT_DC_OWORD_BLOCK_READ :
799
GFX7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ;
800
801
const unsigned msg_control =
802
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
803
804
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
805
}
806
807
static inline uint32_t
808
brw_dp_a64_untyped_surface_rw_desc(const struct intel_device_info *devinfo,
809
unsigned exec_size, /**< 0 for SIMD4x2 */
810
unsigned num_channels,
811
bool write)
812
{
813
assert(exec_size <= 8 || exec_size == 16);
814
assert(devinfo->ver >= 8);
815
816
unsigned msg_type =
817
write ? GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE :
818
GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ;
819
820
/* See also MDC_SM3 in the SKL PRM Vol 2d. */
821
const unsigned simd_mode = exec_size == 0 ? 0 : /* SIMD4x2 */
822
exec_size <= 8 ? 2 : 1;
823
824
const unsigned msg_control =
825
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
826
SET_BITS(simd_mode, 5, 4);
827
828
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
829
msg_type, msg_control);
830
}
831
832
static inline uint32_t
833
brw_dp_a64_oword_block_rw_desc(const struct intel_device_info *devinfo,
834
bool align_16B,
835
unsigned num_dwords,
836
bool write)
837
{
838
/* Writes can only have addresses aligned by OWORDs (16 Bytes). */
839
assert(!write || align_16B);
840
841
unsigned msg_type =
842
write ? GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE :
843
GFX9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ;
844
845
unsigned msg_control =
846
SET_BITS(!align_16B, 4, 3) |
847
SET_BITS(BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_dwords), 2, 0);
848
849
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
850
msg_type, msg_control);
851
}
852
853
/**
854
* Calculate the data size (see MDC_A64_DS in the "Structures" volume of the
855
* Skylake PRM).
856
*/
857
static inline uint32_t
858
brw_mdc_a64_ds(unsigned elems)
859
{
860
switch (elems) {
861
case 1: return 0;
862
case 2: return 1;
863
case 4: return 2;
864
case 8: return 3;
865
default:
866
unreachable("Unsupported elmeent count for A64 scattered message");
867
}
868
}
869
870
static inline uint32_t
871
brw_dp_a64_byte_scattered_rw_desc(const struct intel_device_info *devinfo,
872
unsigned exec_size, /**< 0 for SIMD4x2 */
873
unsigned bit_size,
874
bool write)
875
{
876
assert(exec_size <= 8 || exec_size == 16);
877
assert(devinfo->ver >= 8);
878
879
unsigned msg_type =
880
write ? GFX8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE :
881
GFX9_DATAPORT_DC_PORT1_A64_SCATTERED_READ;
882
883
const unsigned msg_control =
884
SET_BITS(GFX8_A64_SCATTERED_SUBTYPE_BYTE, 1, 0) |
885
SET_BITS(brw_mdc_a64_ds(bit_size / 8), 3, 2) |
886
SET_BITS(exec_size == 16, 4, 4);
887
888
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
889
msg_type, msg_control);
890
}
891
892
static inline uint32_t
893
brw_dp_a64_untyped_atomic_desc(const struct intel_device_info *devinfo,
894
ASSERTED unsigned exec_size, /**< 0 for SIMD4x2 */
895
unsigned bit_size,
896
unsigned atomic_op,
897
bool response_expected)
898
{
899
assert(exec_size == 8);
900
assert(devinfo->ver >= 8);
901
assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
902
assert(devinfo->ver >= 12 || bit_size >= 32);
903
904
const unsigned msg_type = bit_size == 16 ?
905
GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
906
GFX8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
907
908
const unsigned msg_control =
909
SET_BITS(atomic_op, 3, 0) |
910
SET_BITS(bit_size == 64, 4, 4) |
911
SET_BITS(response_expected, 5, 5);
912
913
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
914
msg_type, msg_control);
915
}
916
917
static inline uint32_t
918
brw_dp_a64_untyped_atomic_float_desc(const struct intel_device_info *devinfo,
919
ASSERTED unsigned exec_size,
920
unsigned bit_size,
921
unsigned atomic_op,
922
bool response_expected)
923
{
924
assert(exec_size == 8);
925
assert(devinfo->ver >= 9);
926
assert(bit_size == 16 || bit_size == 32);
927
assert(devinfo->ver >= 12 || bit_size == 32);
928
929
assert(exec_size > 0);
930
const unsigned msg_type = bit_size == 32 ?
931
GFX9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
932
GFX12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
933
934
const unsigned msg_control =
935
SET_BITS(atomic_op, 1, 0) |
936
SET_BITS(response_expected, 5, 5);
937
938
return brw_dp_desc(devinfo, GFX8_BTI_STATELESS_NON_COHERENT,
939
msg_type, msg_control);
940
}
941
942
static inline uint32_t
943
brw_dp_typed_atomic_desc(const struct intel_device_info *devinfo,
944
unsigned exec_size,
945
unsigned exec_group,
946
unsigned atomic_op,
947
bool response_expected)
948
{
949
assert(exec_size > 0 || exec_group == 0);
950
assert(exec_group % 8 == 0);
951
952
unsigned msg_type;
953
if (devinfo->verx10 >= 75) {
954
if (exec_size == 0) {
955
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
956
} else {
957
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
958
}
959
} else {
960
/* SIMD4x2 typed surface R/W messages only exist on HSW+ */
961
assert(exec_size > 0);
962
msg_type = GFX7_DATAPORT_RC_TYPED_ATOMIC_OP;
963
}
964
965
const bool high_sample_mask = (exec_group / 8) % 2 == 1;
966
967
const unsigned msg_control =
968
SET_BITS(atomic_op, 3, 0) |
969
SET_BITS(high_sample_mask, 4, 4) |
970
SET_BITS(response_expected, 5, 5);
971
972
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
973
}
974
975
static inline uint32_t
976
brw_dp_typed_surface_rw_desc(const struct intel_device_info *devinfo,
977
unsigned exec_size,
978
unsigned exec_group,
979
unsigned num_channels,
980
bool write)
981
{
982
assert(exec_size > 0 || exec_group == 0);
983
assert(exec_group % 8 == 0);
984
985
/* Typed surface reads and writes don't support SIMD16 */
986
assert(exec_size <= 8);
987
988
unsigned msg_type;
989
if (write) {
990
if (devinfo->verx10 >= 75) {
991
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE;
992
} else {
993
msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_WRITE;
994
}
995
} else {
996
if (devinfo->verx10 >= 75) {
997
msg_type = HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ;
998
} else {
999
msg_type = GFX7_DATAPORT_RC_TYPED_SURFACE_READ;
1000
}
1001
}
1002
1003
/* See also MDC_SG3 in the SKL PRM Vol 2d. */
1004
unsigned msg_control;
1005
if (devinfo->verx10 >= 75) {
1006
/* See also MDC_SG3 in the SKL PRM Vol 2d. */
1007
const unsigned slot_group = exec_size == 0 ? 0 : /* SIMD4x2 */
1008
1 + ((exec_group / 8) % 2);
1009
1010
msg_control =
1011
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1012
SET_BITS(slot_group, 5, 4);
1013
} else {
1014
/* SIMD4x2 typed surface R/W messages only exist on HSW+ */
1015
assert(exec_size > 0);
1016
const unsigned slot_group = ((exec_group / 8) % 2);
1017
1018
msg_control =
1019
SET_BITS(brw_mdc_cmask(num_channels), 3, 0) |
1020
SET_BITS(slot_group, 5, 5);
1021
}
1022
1023
return brw_dp_surface_desc(devinfo, msg_type, msg_control);
1024
}
1025
1026
static inline uint32_t
1027
brw_fb_desc(const struct intel_device_info *devinfo,
1028
unsigned binding_table_index,
1029
unsigned msg_type,
1030
unsigned msg_control)
1031
{
1032
/* Prior to gen6, things are too inconsistent; use the fb_(read|write)_desc
1033
* helpers instead.
1034
*/
1035
assert(devinfo->ver >= 6);
1036
const unsigned desc = SET_BITS(binding_table_index, 7, 0);
1037
if (devinfo->ver >= 7) {
1038
return (desc | SET_BITS(msg_control, 13, 8) |
1039
SET_BITS(msg_type, 17, 14));
1040
} else {
1041
return (desc | SET_BITS(msg_control, 12, 8) |
1042
SET_BITS(msg_type, 16, 13));
1043
}
1044
}
1045
1046
static inline unsigned
1047
brw_fb_desc_binding_table_index(UNUSED const struct intel_device_info *devinfo,
1048
uint32_t desc)
1049
{
1050
return GET_BITS(desc, 7, 0);
1051
}
1052
1053
static inline uint32_t
1054
brw_fb_desc_msg_control(const struct intel_device_info *devinfo, uint32_t desc)
1055
{
1056
assert(devinfo->ver >= 6);
1057
if (devinfo->ver >= 7)
1058
return GET_BITS(desc, 13, 8);
1059
else
1060
return GET_BITS(desc, 12, 8);
1061
}
1062
1063
static inline unsigned
1064
brw_fb_desc_msg_type(const struct intel_device_info *devinfo, uint32_t desc)
1065
{
1066
assert(devinfo->ver >= 6);
1067
if (devinfo->ver >= 7)
1068
return GET_BITS(desc, 17, 14);
1069
else
1070
return GET_BITS(desc, 16, 13);
1071
}
1072
1073
static inline uint32_t
1074
brw_fb_read_desc(const struct intel_device_info *devinfo,
1075
unsigned binding_table_index,
1076
unsigned msg_control,
1077
unsigned exec_size,
1078
bool per_sample)
1079
{
1080
assert(devinfo->ver >= 9);
1081
assert(exec_size == 8 || exec_size == 16);
1082
1083
return brw_fb_desc(devinfo, binding_table_index,
1084
GFX9_DATAPORT_RC_RENDER_TARGET_READ, msg_control) |
1085
SET_BITS(per_sample, 13, 13) |
1086
SET_BITS(exec_size == 8, 8, 8) /* Render Target Message Subtype */;
1087
}
1088
1089
static inline uint32_t
1090
brw_fb_write_desc(const struct intel_device_info *devinfo,
1091
unsigned binding_table_index,
1092
unsigned msg_control,
1093
bool last_render_target,
1094
bool coarse_write)
1095
{
1096
const unsigned msg_type =
1097
devinfo->ver >= 6 ?
1098
GFX6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE :
1099
BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
1100
1101
assert(devinfo->ver >= 10 || !coarse_write);
1102
1103
if (devinfo->ver >= 6) {
1104
return brw_fb_desc(devinfo, binding_table_index, msg_type, msg_control) |
1105
SET_BITS(last_render_target, 12, 12) |
1106
SET_BITS(coarse_write, 18, 18);
1107
} else {
1108
return (SET_BITS(binding_table_index, 7, 0) |
1109
SET_BITS(msg_control, 11, 8) |
1110
SET_BITS(last_render_target, 11, 11) |
1111
SET_BITS(msg_type, 14, 12));
1112
}
1113
}
1114
1115
static inline unsigned
1116
brw_fb_write_desc_msg_type(const struct intel_device_info *devinfo,
1117
uint32_t desc)
1118
{
1119
if (devinfo->ver >= 6)
1120
return brw_fb_desc_msg_type(devinfo, desc);
1121
else
1122
return GET_BITS(desc, 14, 12);
1123
}
1124
1125
static inline unsigned
1126
brw_fb_write_desc_msg_control(const struct intel_device_info *devinfo,
1127
uint32_t desc)
1128
{
1129
if (devinfo->ver >= 6)
1130
return brw_fb_desc_msg_control(devinfo, desc);
1131
else
1132
return GET_BITS(desc, 11, 8);
1133
}
1134
1135
static inline bool
1136
brw_fb_write_desc_last_render_target(const struct intel_device_info *devinfo,
1137
uint32_t desc)
1138
{
1139
if (devinfo->ver >= 6)
1140
return GET_BITS(desc, 12, 12);
1141
else
1142
return GET_BITS(desc, 11, 11);
1143
}
1144
1145
static inline bool
1146
brw_fb_write_desc_write_commit(const struct intel_device_info *devinfo,
1147
uint32_t desc)
1148
{
1149
assert(devinfo->ver <= 6);
1150
if (devinfo->ver >= 6)
1151
return GET_BITS(desc, 17, 17);
1152
else
1153
return GET_BITS(desc, 15, 15);
1154
}
1155
1156
static inline bool
1157
brw_fb_write_desc_coarse_write(const struct intel_device_info *devinfo,
1158
uint32_t desc)
1159
{
1160
assert(devinfo->ver >= 10);
1161
return GET_BITS(desc, 18, 18);
1162
}
1163
1164
static inline bool
1165
lsc_opcode_has_cmask(enum lsc_opcode opcode)
1166
{
1167
return opcode == LSC_OP_LOAD_CMASK || opcode == LSC_OP_STORE_CMASK;
1168
}
1169
1170
static inline uint32_t
1171
lsc_data_size_bytes(enum lsc_data_size data_size)
1172
{
1173
switch (data_size) {
1174
case LSC_DATA_SIZE_D8:
1175
return 1;
1176
case LSC_DATA_SIZE_D16:
1177
return 2;
1178
case LSC_DATA_SIZE_D32:
1179
case LSC_DATA_SIZE_D8U32:
1180
case LSC_DATA_SIZE_D16U32:
1181
case LSC_DATA_SIZE_D16BF32:
1182
return 4;
1183
case LSC_DATA_SIZE_D64:
1184
return 8;
1185
default:
1186
unreachable("Unsupported data payload size.");
1187
}
1188
}
1189
1190
static inline uint32_t
1191
lsc_addr_size_bytes(enum lsc_addr_size addr_size)
1192
{
1193
switch (addr_size) {
1194
case LSC_ADDR_SIZE_A16: return 2;
1195
case LSC_ADDR_SIZE_A32: return 4;
1196
case LSC_ADDR_SIZE_A64: return 8;
1197
default:
1198
unreachable("Unsupported address size.");
1199
}
1200
}
1201
1202
static inline uint32_t
1203
lsc_vector_length(enum lsc_vect_size vect_size)
1204
{
1205
switch (vect_size) {
1206
case LSC_VECT_SIZE_V1: return 1;
1207
case LSC_VECT_SIZE_V2: return 2;
1208
case LSC_VECT_SIZE_V3: return 3;
1209
case LSC_VECT_SIZE_V4: return 4;
1210
case LSC_VECT_SIZE_V8: return 8;
1211
case LSC_VECT_SIZE_V16: return 16;
1212
case LSC_VECT_SIZE_V32: return 32;
1213
case LSC_VECT_SIZE_V64: return 64;
1214
default:
1215
unreachable("Unsupported size of vector");
1216
}
1217
}
1218
1219
static inline enum lsc_vect_size
1220
lsc_vect_size(unsigned vect_size)
1221
{
1222
switch(vect_size) {
1223
case 1: return LSC_VECT_SIZE_V1;
1224
case 2: return LSC_VECT_SIZE_V2;
1225
case 3: return LSC_VECT_SIZE_V3;
1226
case 4: return LSC_VECT_SIZE_V4;
1227
case 8: return LSC_VECT_SIZE_V8;
1228
case 16: return LSC_VECT_SIZE_V16;
1229
case 32: return LSC_VECT_SIZE_V32;
1230
case 64: return LSC_VECT_SIZE_V64;
1231
default:
1232
unreachable("Unsupported vector size for dataport");
1233
}
1234
}
1235
1236
static inline uint32_t
1237
lsc_msg_desc(UNUSED const struct intel_device_info *devinfo,
1238
enum lsc_opcode opcode, unsigned simd_size,
1239
enum lsc_addr_surface_type addr_type,
1240
enum lsc_addr_size addr_sz, unsigned num_coordinates,
1241
enum lsc_data_size data_sz, unsigned num_channels,
1242
bool transpose, unsigned cache_ctrl, bool has_dest)
1243
{
1244
assert(devinfo->has_lsc);
1245
1246
unsigned dest_length = !has_dest ? 0 :
1247
DIV_ROUND_UP(lsc_data_size_bytes(data_sz) * num_channels * simd_size,
1248
REG_SIZE);
1249
1250
unsigned src0_length =
1251
DIV_ROUND_UP(lsc_addr_size_bytes(addr_sz) * num_coordinates * simd_size,
1252
REG_SIZE);
1253
1254
unsigned msg_desc =
1255
SET_BITS(opcode, 5, 0) |
1256
SET_BITS(addr_sz, 8, 7) |
1257
SET_BITS(data_sz, 11, 9) |
1258
SET_BITS(transpose, 15, 15) |
1259
SET_BITS(cache_ctrl, 19, 17) |
1260
SET_BITS(dest_length, 24, 20) |
1261
SET_BITS(src0_length, 28, 25) |
1262
SET_BITS(addr_type, 30, 29);
1263
1264
if (lsc_opcode_has_cmask(opcode))
1265
msg_desc |= SET_BITS(lsc_cmask(num_channels), 15, 12);
1266
else
1267
msg_desc |= SET_BITS(lsc_vect_size(num_channels), 14, 12);
1268
1269
return msg_desc;
1270
}
1271
1272
static inline enum lsc_opcode
1273
lsc_msg_desc_opcode(UNUSED const struct intel_device_info *devinfo,
1274
uint32_t desc)
1275
{
1276
assert(devinfo->has_lsc);
1277
return (enum lsc_opcode) GET_BITS(desc, 5, 0);
1278
}
1279
1280
static inline enum lsc_addr_size
1281
lsc_msg_desc_addr_size(UNUSED const struct intel_device_info *devinfo,
1282
uint32_t desc)
1283
{
1284
assert(devinfo->has_lsc);
1285
return (enum lsc_addr_size) GET_BITS(desc, 8, 7);
1286
}
1287
1288
static inline enum lsc_data_size
1289
lsc_msg_desc_data_size(UNUSED const struct intel_device_info *devinfo,
1290
uint32_t desc)
1291
{
1292
assert(devinfo->has_lsc);
1293
return (enum lsc_data_size) GET_BITS(desc, 11, 9);
1294
}
1295
1296
static inline enum lsc_vect_size
1297
lsc_msg_desc_vect_size(UNUSED const struct intel_device_info *devinfo,
1298
uint32_t desc)
1299
{
1300
assert(devinfo->has_lsc);
1301
assert(!lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1302
return (enum lsc_vect_size) GET_BITS(desc, 14, 12);
1303
}
1304
1305
static inline enum lsc_cmask
1306
lsc_msg_desc_cmask(UNUSED const struct intel_device_info *devinfo,
1307
uint32_t desc)
1308
{
1309
assert(devinfo->has_lsc);
1310
assert(lsc_opcode_has_cmask(lsc_msg_desc_opcode(devinfo, desc)));
1311
return (enum lsc_cmask) GET_BITS(desc, 15, 12);
1312
}
1313
1314
static inline bool
1315
lsc_msg_desc_transpose(UNUSED const struct intel_device_info *devinfo,
1316
uint32_t desc)
1317
{
1318
assert(devinfo->has_lsc);
1319
return GET_BITS(desc, 15, 15);
1320
}
1321
1322
static inline unsigned
1323
lsc_msg_desc_cache_ctrl(UNUSED const struct intel_device_info *devinfo,
1324
uint32_t desc)
1325
{
1326
assert(devinfo->has_lsc);
1327
return GET_BITS(desc, 19, 17);
1328
}
1329
1330
static inline unsigned
1331
lsc_msg_desc_dest_len(const struct intel_device_info *devinfo,
1332
uint32_t desc)
1333
{
1334
assert(devinfo->has_lsc);
1335
return GET_BITS(desc, 24, 20);
1336
}
1337
1338
static inline unsigned
1339
lsc_msg_desc_src0_len(const struct intel_device_info *devinfo,
1340
uint32_t desc)
1341
{
1342
assert(devinfo->has_lsc);
1343
return GET_BITS(desc, 28, 25);
1344
}
1345
1346
static inline enum lsc_addr_surface_type
1347
lsc_msg_desc_addr_type(UNUSED const struct intel_device_info *devinfo,
1348
uint32_t desc)
1349
{
1350
assert(devinfo->has_lsc);
1351
return (enum lsc_addr_surface_type) GET_BITS(desc, 30, 29);
1352
}
1353
1354
static inline uint32_t
1355
lsc_fence_msg_desc(UNUSED const struct intel_device_info *devinfo,
1356
enum lsc_fence_scope scope,
1357
enum lsc_flush_type flush_type,
1358
bool route_to_lsc)
1359
{
1360
assert(devinfo->has_lsc);
1361
return SET_BITS(LSC_OP_FENCE, 5, 0) |
1362
SET_BITS(LSC_ADDR_SIZE_A32, 8, 7) |
1363
SET_BITS(scope, 11, 9) |
1364
SET_BITS(flush_type, 14, 12) |
1365
SET_BITS(route_to_lsc, 18, 18) |
1366
SET_BITS(LSC_ADDR_SURFTYPE_FLAT, 30, 29);
1367
}
1368
1369
static inline enum lsc_fence_scope
1370
lsc_fence_msg_desc_scope(UNUSED const struct intel_device_info *devinfo,
1371
uint32_t desc)
1372
{
1373
assert(devinfo->has_lsc);
1374
return (enum lsc_fence_scope) GET_BITS(desc, 11, 9);
1375
}
1376
1377
static inline enum lsc_flush_type
1378
lsc_fence_msg_desc_flush_type(UNUSED const struct intel_device_info *devinfo,
1379
uint32_t desc)
1380
{
1381
assert(devinfo->has_lsc);
1382
return (enum lsc_flush_type) GET_BITS(desc, 14, 12);
1383
}
1384
1385
static inline enum lsc_backup_fence_routing
1386
lsc_fence_msg_desc_backup_routing(UNUSED const struct intel_device_info *devinfo,
1387
uint32_t desc)
1388
{
1389
assert(devinfo->has_lsc);
1390
return (enum lsc_backup_fence_routing) GET_BITS(desc, 18, 18);
1391
}
1392
1393
static inline uint32_t
1394
lsc_bti_ex_desc(const struct intel_device_info *devinfo, unsigned bti)
1395
{
1396
assert(devinfo->has_lsc);
1397
return SET_BITS(bti, 31, 24) |
1398
SET_BITS(0, 23, 12); /* base offset */
1399
}
1400
1401
static inline unsigned
1402
lsc_bti_ex_desc_base_offset(const struct intel_device_info *devinfo,
1403
uint32_t ex_desc)
1404
{
1405
assert(devinfo->has_lsc);
1406
return GET_BITS(ex_desc, 23, 12);
1407
}
1408
1409
static inline unsigned
1410
lsc_bti_ex_desc_index(const struct intel_device_info *devinfo,
1411
uint32_t ex_desc)
1412
{
1413
assert(devinfo->has_lsc);
1414
return GET_BITS(ex_desc, 31, 24);
1415
}
1416
1417
static inline unsigned
1418
lsc_flat_ex_desc_base_offset(const struct intel_device_info *devinfo,
1419
uint32_t ex_desc)
1420
{
1421
assert(devinfo->has_lsc);
1422
return GET_BITS(ex_desc, 31, 12);
1423
}
1424
1425
static inline uint32_t
1426
lsc_bss_ex_desc(const struct intel_device_info *devinfo,
1427
unsigned surface_state_index)
1428
{
1429
assert(devinfo->has_lsc);
1430
return SET_BITS(surface_state_index, 31, 6);
1431
}
1432
1433
static inline unsigned
1434
lsc_bss_ex_desc_index(const struct intel_device_info *devinfo,
1435
uint32_t ex_desc)
1436
{
1437
assert(devinfo->has_lsc);
1438
return GET_BITS(ex_desc, 31, 6);
1439
}
1440
1441
static inline uint32_t
1442
brw_mdc_sm2(unsigned exec_size)
1443
{
1444
assert(exec_size == 8 || exec_size == 16);
1445
return exec_size > 8;
1446
}
1447
1448
static inline uint32_t
1449
brw_mdc_sm2_exec_size(uint32_t sm2)
1450
{
1451
assert(sm2 <= 1);
1452
return 8 << sm2;
1453
}
1454
1455
static inline uint32_t
1456
brw_btd_spawn_desc(ASSERTED const struct intel_device_info *devinfo,
1457
unsigned exec_size, unsigned msg_type)
1458
{
1459
assert(devinfo->has_ray_tracing);
1460
1461
return SET_BITS(0, 19, 19) | /* No header */
1462
SET_BITS(msg_type, 17, 14) |
1463
SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1464
}
1465
1466
static inline uint32_t
1467
brw_btd_spawn_msg_type(UNUSED const struct intel_device_info *devinfo,
1468
uint32_t desc)
1469
{
1470
return GET_BITS(desc, 17, 14);
1471
}
1472
1473
static inline uint32_t
1474
brw_btd_spawn_exec_size(UNUSED const struct intel_device_info *devinfo,
1475
uint32_t desc)
1476
{
1477
return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1478
}
1479
1480
static inline uint32_t
1481
brw_rt_trace_ray_desc(ASSERTED const struct intel_device_info *devinfo,
1482
unsigned exec_size)
1483
{
1484
assert(devinfo->has_ray_tracing);
1485
1486
return SET_BITS(0, 19, 19) | /* No header */
1487
SET_BITS(0, 17, 14) | /* Message type */
1488
SET_BITS(brw_mdc_sm2(exec_size), 8, 8);
1489
}
1490
1491
static inline uint32_t
1492
brw_rt_trace_ray_desc_exec_size(UNUSED const struct intel_device_info *devinfo,
1493
uint32_t desc)
1494
{
1495
return brw_mdc_sm2_exec_size(GET_BITS(desc, 8, 8));
1496
}
1497
1498
/**
1499
* Construct a message descriptor immediate with the specified pixel
1500
* interpolator function controls.
1501
*/
1502
static inline uint32_t
1503
brw_pixel_interp_desc(UNUSED const struct intel_device_info *devinfo,
1504
unsigned msg_type,
1505
bool noperspective,
1506
bool coarse_pixel_rate,
1507
unsigned simd_mode,
1508
unsigned slot_group)
1509
{
1510
assert(devinfo->ver >= 10 || !coarse_pixel_rate);
1511
return (SET_BITS(slot_group, 11, 11) |
1512
SET_BITS(msg_type, 13, 12) |
1513
SET_BITS(!!noperspective, 14, 14) |
1514
SET_BITS(coarse_pixel_rate, 15, 15) |
1515
SET_BITS(simd_mode, 16, 16));
1516
}
1517
1518
void brw_urb_WRITE(struct brw_codegen *p,
1519
struct brw_reg dest,
1520
unsigned msg_reg_nr,
1521
struct brw_reg src0,
1522
enum brw_urb_write_flags flags,
1523
unsigned msg_length,
1524
unsigned response_length,
1525
unsigned offset,
1526
unsigned swizzle);
1527
1528
/**
1529
* Send message to shared unit \p sfid with a possibly indirect descriptor \p
1530
* desc. If \p desc is not an immediate it will be transparently loaded to an
1531
* address register using an OR instruction.
1532
*/
1533
void
1534
brw_send_indirect_message(struct brw_codegen *p,
1535
unsigned sfid,
1536
struct brw_reg dst,
1537
struct brw_reg payload,
1538
struct brw_reg desc,
1539
unsigned desc_imm,
1540
bool eot);
1541
1542
void
1543
brw_send_indirect_split_message(struct brw_codegen *p,
1544
unsigned sfid,
1545
struct brw_reg dst,
1546
struct brw_reg payload0,
1547
struct brw_reg payload1,
1548
struct brw_reg desc,
1549
unsigned desc_imm,
1550
struct brw_reg ex_desc,
1551
unsigned ex_desc_imm,
1552
bool eot);
1553
1554
void brw_ff_sync(struct brw_codegen *p,
1555
struct brw_reg dest,
1556
unsigned msg_reg_nr,
1557
struct brw_reg src0,
1558
bool allocate,
1559
unsigned response_length,
1560
bool eot);
1561
1562
void brw_svb_write(struct brw_codegen *p,
1563
struct brw_reg dest,
1564
unsigned msg_reg_nr,
1565
struct brw_reg src0,
1566
unsigned binding_table_index,
1567
bool send_commit_msg);
1568
1569
brw_inst *brw_fb_WRITE(struct brw_codegen *p,
1570
struct brw_reg payload,
1571
struct brw_reg implied_header,
1572
unsigned msg_control,
1573
unsigned binding_table_index,
1574
unsigned msg_length,
1575
unsigned response_length,
1576
bool eot,
1577
bool last_render_target,
1578
bool header_present);
1579
1580
brw_inst *gfx9_fb_READ(struct brw_codegen *p,
1581
struct brw_reg dst,
1582
struct brw_reg payload,
1583
unsigned binding_table_index,
1584
unsigned msg_length,
1585
unsigned response_length,
1586
bool per_sample);
1587
1588
void brw_SAMPLE(struct brw_codegen *p,
1589
struct brw_reg dest,
1590
unsigned msg_reg_nr,
1591
struct brw_reg src0,
1592
unsigned binding_table_index,
1593
unsigned sampler,
1594
unsigned msg_type,
1595
unsigned response_length,
1596
unsigned msg_length,
1597
unsigned header_present,
1598
unsigned simd_mode,
1599
unsigned return_format);
1600
1601
void brw_adjust_sampler_state_pointer(struct brw_codegen *p,
1602
struct brw_reg header,
1603
struct brw_reg sampler_index);
1604
1605
void gfx4_math(struct brw_codegen *p,
1606
struct brw_reg dest,
1607
unsigned function,
1608
unsigned msg_reg_nr,
1609
struct brw_reg src,
1610
unsigned precision );
1611
1612
void gfx6_math(struct brw_codegen *p,
1613
struct brw_reg dest,
1614
unsigned function,
1615
struct brw_reg src0,
1616
struct brw_reg src1);
1617
1618
void brw_oword_block_read(struct brw_codegen *p,
1619
struct brw_reg dest,
1620
struct brw_reg mrf,
1621
uint32_t offset,
1622
uint32_t bind_table_index);
1623
1624
unsigned brw_scratch_surface_idx(const struct brw_codegen *p);
1625
1626
void brw_oword_block_read_scratch(struct brw_codegen *p,
1627
struct brw_reg dest,
1628
struct brw_reg mrf,
1629
int num_regs,
1630
unsigned offset);
1631
1632
void brw_oword_block_write_scratch(struct brw_codegen *p,
1633
struct brw_reg mrf,
1634
int num_regs,
1635
unsigned offset);
1636
1637
void gfx7_block_read_scratch(struct brw_codegen *p,
1638
struct brw_reg dest,
1639
int num_regs,
1640
unsigned offset);
1641
1642
void brw_shader_time_add(struct brw_codegen *p,
1643
struct brw_reg payload,
1644
uint32_t surf_index);
1645
1646
/**
1647
* Return the generation-specific jump distance scaling factor.
1648
*
1649
* Given the number of instructions to jump, we need to scale by
1650
* some number to obtain the actual jump distance to program in an
1651
* instruction.
1652
*/
1653
static inline unsigned
1654
brw_jump_scale(const struct intel_device_info *devinfo)
1655
{
1656
/* Broadwell measures jump targets in bytes. */
1657
if (devinfo->ver >= 8)
1658
return 16;
1659
1660
/* Ironlake and later measure jump targets in 64-bit data chunks (in order
1661
* (to support compaction), so each 128-bit instruction requires 2 chunks.
1662
*/
1663
if (devinfo->ver >= 5)
1664
return 2;
1665
1666
/* Gfx4 simply uses the number of 128-bit instructions. */
1667
return 1;
1668
}
1669
1670
void brw_barrier(struct brw_codegen *p, struct brw_reg src);
1671
1672
/* If/else/endif. Works by manipulating the execution flags on each
1673
* channel.
1674
*/
1675
brw_inst *brw_IF(struct brw_codegen *p, unsigned execute_size);
1676
brw_inst *gfx6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
1677
struct brw_reg src0, struct brw_reg src1);
1678
1679
void brw_ELSE(struct brw_codegen *p);
1680
void brw_ENDIF(struct brw_codegen *p);
1681
1682
/* DO/WHILE loops:
1683
*/
1684
brw_inst *brw_DO(struct brw_codegen *p, unsigned execute_size);
1685
1686
brw_inst *brw_WHILE(struct brw_codegen *p);
1687
1688
brw_inst *brw_BREAK(struct brw_codegen *p);
1689
brw_inst *brw_CONT(struct brw_codegen *p);
1690
brw_inst *brw_HALT(struct brw_codegen *p);
1691
1692
/* Forward jumps:
1693
*/
1694
void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx);
1695
1696
brw_inst *brw_JMPI(struct brw_codegen *p, struct brw_reg index,
1697
unsigned predicate_control);
1698
1699
void brw_NOP(struct brw_codegen *p);
1700
1701
void brw_WAIT(struct brw_codegen *p);
1702
1703
void brw_SYNC(struct brw_codegen *p, enum tgl_sync_function func);
1704
1705
/* Special case: there is never a destination, execution size will be
1706
* taken from src0:
1707
*/
1708
void brw_CMP(struct brw_codegen *p,
1709
struct brw_reg dest,
1710
unsigned conditional,
1711
struct brw_reg src0,
1712
struct brw_reg src1);
1713
1714
void brw_CMPN(struct brw_codegen *p,
1715
struct brw_reg dest,
1716
unsigned conditional,
1717
struct brw_reg src0,
1718
struct brw_reg src1);
1719
1720
void
1721
brw_untyped_atomic(struct brw_codegen *p,
1722
struct brw_reg dst,
1723
struct brw_reg payload,
1724
struct brw_reg surface,
1725
unsigned atomic_op,
1726
unsigned msg_length,
1727
bool response_expected,
1728
bool header_present);
1729
1730
void
1731
brw_untyped_surface_read(struct brw_codegen *p,
1732
struct brw_reg dst,
1733
struct brw_reg payload,
1734
struct brw_reg surface,
1735
unsigned msg_length,
1736
unsigned num_channels);
1737
1738
void
1739
brw_untyped_surface_write(struct brw_codegen *p,
1740
struct brw_reg payload,
1741
struct brw_reg surface,
1742
unsigned msg_length,
1743
unsigned num_channels,
1744
bool header_present);
1745
1746
void
1747
brw_memory_fence(struct brw_codegen *p,
1748
struct brw_reg dst,
1749
struct brw_reg src,
1750
enum opcode send_op,
1751
enum brw_message_target sfid,
1752
bool commit_enable,
1753
unsigned bti);
1754
1755
void
1756
brw_pixel_interpolator_query(struct brw_codegen *p,
1757
struct brw_reg dest,
1758
struct brw_reg mrf,
1759
bool noperspective,
1760
bool coarse_pixel_rate,
1761
unsigned mode,
1762
struct brw_reg data,
1763
unsigned msg_length,
1764
unsigned response_length);
1765
1766
void
1767
brw_find_live_channel(struct brw_codegen *p,
1768
struct brw_reg dst,
1769
struct brw_reg mask);
1770
1771
void
1772
brw_broadcast(struct brw_codegen *p,
1773
struct brw_reg dst,
1774
struct brw_reg src,
1775
struct brw_reg idx);
1776
1777
void
1778
brw_float_controls_mode(struct brw_codegen *p,
1779
unsigned mode, unsigned mask);
1780
1781
void
1782
brw_update_reloc_imm(const struct intel_device_info *devinfo,
1783
brw_inst *inst,
1784
uint32_t value);
1785
1786
void
1787
brw_MOV_reloc_imm(struct brw_codegen *p,
1788
struct brw_reg dst,
1789
enum brw_reg_type src_type,
1790
uint32_t id);
1791
1792
/***********************************************************************
1793
* brw_eu_util.c:
1794
*/
1795
1796
void brw_copy_indirect_to_indirect(struct brw_codegen *p,
1797
struct brw_indirect dst_ptr,
1798
struct brw_indirect src_ptr,
1799
unsigned count);
1800
1801
void brw_copy_from_indirect(struct brw_codegen *p,
1802
struct brw_reg dst,
1803
struct brw_indirect ptr,
1804
unsigned count);
1805
1806
void brw_copy4(struct brw_codegen *p,
1807
struct brw_reg dst,
1808
struct brw_reg src,
1809
unsigned count);
1810
1811
void brw_copy8(struct brw_codegen *p,
1812
struct brw_reg dst,
1813
struct brw_reg src,
1814
unsigned count);
1815
1816
void brw_math_invert( struct brw_codegen *p,
1817
struct brw_reg dst,
1818
struct brw_reg src);
1819
1820
void brw_set_src1(struct brw_codegen *p, brw_inst *insn, struct brw_reg reg);
1821
1822
void brw_set_desc_ex(struct brw_codegen *p, brw_inst *insn,
1823
unsigned desc, unsigned ex_desc);
1824
1825
static inline void
1826
brw_set_desc(struct brw_codegen *p, brw_inst *insn, unsigned desc)
1827
{
1828
brw_set_desc_ex(p, insn, desc, 0);
1829
}
1830
1831
void brw_set_uip_jip(struct brw_codegen *p, int start_offset);
1832
1833
enum brw_conditional_mod brw_negate_cmod(enum brw_conditional_mod cmod);
1834
enum brw_conditional_mod brw_swap_cmod(enum brw_conditional_mod cmod);
1835
1836
/* brw_eu_compact.c */
1837
void brw_compact_instructions(struct brw_codegen *p, int start_offset,
1838
struct disasm_info *disasm);
1839
void brw_uncompact_instruction(const struct intel_device_info *devinfo,
1840
brw_inst *dst, brw_compact_inst *src);
1841
bool brw_try_compact_instruction(const struct intel_device_info *devinfo,
1842
brw_compact_inst *dst, const brw_inst *src);
1843
1844
void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,
1845
brw_inst *orig, brw_inst *uncompacted);
1846
1847
/* brw_eu_validate.c */
1848
bool brw_validate_instruction(const struct intel_device_info *devinfo,
1849
const brw_inst *inst, int offset,
1850
struct disasm_info *disasm);
1851
bool brw_validate_instructions(const struct intel_device_info *devinfo,
1852
const void *assembly, int start_offset, int end_offset,
1853
struct disasm_info *disasm);
1854
1855
static inline int
1856
next_offset(const struct intel_device_info *devinfo, void *store, int offset)
1857
{
1858
brw_inst *insn = (brw_inst *)((char *)store + offset);
1859
1860
if (brw_inst_cmpt_control(devinfo, insn))
1861
return offset + 8;
1862
else
1863
return offset + 16;
1864
}
1865
1866
struct opcode_desc {
1867
unsigned ir;
1868
unsigned hw;
1869
const char *name;
1870
int nsrc;
1871
int ndst;
1872
int gfx_vers;
1873
};
1874
1875
const struct opcode_desc *
1876
brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode);
1877
1878
const struct opcode_desc *
1879
brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw);
1880
1881
static inline unsigned
1882
brw_opcode_encode(const struct intel_device_info *devinfo, enum opcode opcode)
1883
{
1884
return brw_opcode_desc(devinfo, opcode)->hw;
1885
}
1886
1887
static inline enum opcode
1888
brw_opcode_decode(const struct intel_device_info *devinfo, unsigned hw)
1889
{
1890
const struct opcode_desc *desc = brw_opcode_desc_from_hw(devinfo, hw);
1891
return desc ? (enum opcode)desc->ir : BRW_OPCODE_ILLEGAL;
1892
}
1893
1894
static inline void
1895
brw_inst_set_opcode(const struct intel_device_info *devinfo,
1896
brw_inst *inst, enum opcode opcode)
1897
{
1898
brw_inst_set_hw_opcode(devinfo, inst, brw_opcode_encode(devinfo, opcode));
1899
}
1900
1901
static inline enum opcode
1902
brw_inst_opcode(const struct intel_device_info *devinfo, const brw_inst *inst)
1903
{
1904
return brw_opcode_decode(devinfo, brw_inst_hw_opcode(devinfo, inst));
1905
}
1906
1907
static inline bool
1908
is_3src(const struct intel_device_info *devinfo, enum opcode opcode)
1909
{
1910
const struct opcode_desc *desc = brw_opcode_desc(devinfo, opcode);
1911
return desc && desc->nsrc == 3;
1912
}
1913
1914
/** Maximum SEND message length */
1915
#define BRW_MAX_MSG_LENGTH 15
1916
1917
/** First MRF register used by pull loads */
1918
#define FIRST_SPILL_MRF(gen) ((gen) == 6 ? 21 : 13)
1919
1920
/** First MRF register used by spills */
1921
#define FIRST_PULL_LOAD_MRF(gen) ((gen) == 6 ? 16 : 13)
1922
1923
#ifdef __cplusplus
1924
}
1925
#endif
1926
1927
#endif
1928
1929