Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/compiler/brw_eu.cpp
4550 views
1
/*
2
Copyright (C) Intel Corp. 2006. All Rights Reserved.
3
Intel funded Tungsten Graphics to
4
develop this 3D driver.
5
6
Permission is hereby granted, free of charge, to any person obtaining
7
a copy of this software and associated documentation files (the
8
"Software"), to deal in the Software without restriction, including
9
without limitation the rights to use, copy, modify, merge, publish,
10
distribute, sublicense, and/or sell copies of the Software, and to
11
permit persons to whom the Software is furnished to do so, subject to
12
the following conditions:
13
14
The above copyright notice and this permission notice (including the
15
next paragraph) shall be included in all copies or substantial
16
portions of the Software.
17
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26
**********************************************************************/
27
/*
28
* Authors:
29
* Keith Whitwell <[email protected]>
30
*/
31
32
#include <sys/stat.h>
33
#include <fcntl.h>
34
35
#include "brw_eu_defines.h"
36
#include "brw_eu.h"
37
#include "brw_shader.h"
38
#include "brw_gfx_ver_enum.h"
39
#include "dev/intel_debug.h"
40
41
#include "util/ralloc.h"
42
43
/* Returns a conditional modifier that negates the condition. */
44
enum brw_conditional_mod
45
brw_negate_cmod(enum brw_conditional_mod cmod)
46
{
47
switch (cmod) {
48
case BRW_CONDITIONAL_Z:
49
return BRW_CONDITIONAL_NZ;
50
case BRW_CONDITIONAL_NZ:
51
return BRW_CONDITIONAL_Z;
52
case BRW_CONDITIONAL_G:
53
return BRW_CONDITIONAL_LE;
54
case BRW_CONDITIONAL_GE:
55
return BRW_CONDITIONAL_L;
56
case BRW_CONDITIONAL_L:
57
return BRW_CONDITIONAL_GE;
58
case BRW_CONDITIONAL_LE:
59
return BRW_CONDITIONAL_G;
60
default:
61
unreachable("Can't negate this cmod");
62
}
63
}
64
65
/* Returns the corresponding conditional mod for swapping src0 and
66
* src1 in e.g. CMP.
67
*/
68
enum brw_conditional_mod
69
brw_swap_cmod(enum brw_conditional_mod cmod)
70
{
71
switch (cmod) {
72
case BRW_CONDITIONAL_Z:
73
case BRW_CONDITIONAL_NZ:
74
return cmod;
75
case BRW_CONDITIONAL_G:
76
return BRW_CONDITIONAL_L;
77
case BRW_CONDITIONAL_GE:
78
return BRW_CONDITIONAL_LE;
79
case BRW_CONDITIONAL_L:
80
return BRW_CONDITIONAL_G;
81
case BRW_CONDITIONAL_LE:
82
return BRW_CONDITIONAL_GE;
83
default:
84
return BRW_CONDITIONAL_NONE;
85
}
86
}
87
88
/**
89
* Get the least significant bit offset of the i+1-th component of immediate
90
* type \p type. For \p i equal to the two's complement of j, return the
91
* offset of the j-th component starting from the end of the vector. For
92
* scalar register types return zero.
93
*/
94
static unsigned
95
imm_shift(enum brw_reg_type type, unsigned i)
96
{
97
assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V &&
98
"Not implemented.");
99
100
if (type == BRW_REGISTER_TYPE_VF)
101
return 8 * (i & 3);
102
else
103
return 0;
104
}
105
106
/**
107
* Swizzle an arbitrary immediate \p x of the given type according to the
108
* permutation specified as \p swz.
109
*/
110
uint32_t
111
brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
112
{
113
if (imm_shift(type, 1)) {
114
const unsigned n = 32 / imm_shift(type, 1);
115
uint32_t y = 0;
116
117
for (unsigned i = 0; i < n; i++) {
118
/* Shift the specified component all the way to the right and left to
119
* discard any undesired L/MSBs, then shift it right into component i.
120
*/
121
y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))
122
<< imm_shift(type, ~0u)
123
>> imm_shift(type, ~0u - i);
124
}
125
126
return y;
127
} else {
128
return x;
129
}
130
}
131
132
unsigned
133
brw_get_default_exec_size(struct brw_codegen *p)
134
{
135
return p->current->exec_size;
136
}
137
138
unsigned
139
brw_get_default_group(struct brw_codegen *p)
140
{
141
return p->current->group;
142
}
143
144
unsigned
145
brw_get_default_access_mode(struct brw_codegen *p)
146
{
147
return p->current->access_mode;
148
}
149
150
tgl_swsb
151
brw_get_default_swsb(struct brw_codegen *p)
152
{
153
return p->current->swsb;
154
}
155
156
void
157
brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
158
{
159
p->current->exec_size = value;
160
}
161
162
void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)
163
{
164
p->current->predicate = pc;
165
}
166
167
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
168
{
169
p->current->pred_inv = predicate_inverse;
170
}
171
172
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
173
{
174
assert(subreg < 2);
175
p->current->flag_subreg = reg * 2 + subreg;
176
}
177
178
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
179
{
180
p->current->access_mode = access_mode;
181
}
182
183
void
184
brw_set_default_compression_control(struct brw_codegen *p,
185
enum brw_compression compression_control)
186
{
187
switch (compression_control) {
188
case BRW_COMPRESSION_NONE:
189
/* This is the "use the first set of bits of dmask/vmask/arf
190
* according to execsize" option.
191
*/
192
p->current->group = 0;
193
break;
194
case BRW_COMPRESSION_2NDHALF:
195
/* For SIMD8, this is "use the second set of 8 bits." */
196
p->current->group = 8;
197
break;
198
case BRW_COMPRESSION_COMPRESSED:
199
/* For SIMD16 instruction compression, use the first set of 16 bits
200
* since we don't do SIMD32 dispatch.
201
*/
202
p->current->group = 0;
203
break;
204
default:
205
unreachable("not reached");
206
}
207
208
if (p->devinfo->ver <= 6) {
209
p->current->compressed =
210
(compression_control == BRW_COMPRESSION_COMPRESSED);
211
}
212
}
213
214
/**
215
* Enable or disable instruction compression on the given instruction leaving
216
* the currently selected channel enable group untouched.
217
*/
218
void
219
brw_inst_set_compression(const struct intel_device_info *devinfo,
220
brw_inst *inst, bool on)
221
{
222
if (devinfo->ver >= 6) {
223
/* No-op, the EU will figure out for us whether the instruction needs to
224
* be compressed.
225
*/
226
} else {
227
/* The channel group and compression controls are non-orthogonal, there
228
* are two possible representations for uncompressed instructions and we
229
* may need to preserve the current one to avoid changing the selected
230
* channel group inadvertently.
231
*/
232
if (on)
233
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED);
234
else if (brw_inst_qtr_control(devinfo, inst)
235
== BRW_COMPRESSION_COMPRESSED)
236
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
237
}
238
}
239
240
void
241
brw_set_default_compression(struct brw_codegen *p, bool on)
242
{
243
p->current->compressed = on;
244
}
245
246
/**
247
* Apply the range of channel enable signals given by
248
* [group, group + exec_size) to the instruction passed as argument.
249
*/
250
void
251
brw_inst_set_group(const struct intel_device_info *devinfo,
252
brw_inst *inst, unsigned group)
253
{
254
if (devinfo->ver >= 7) {
255
assert(group % 4 == 0 && group < 32);
256
brw_inst_set_qtr_control(devinfo, inst, group / 8);
257
brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
258
259
} else if (devinfo->ver == 6) {
260
assert(group % 8 == 0 && group < 32);
261
brw_inst_set_qtr_control(devinfo, inst, group / 8);
262
263
} else {
264
assert(group % 8 == 0 && group < 16);
265
/* The channel group and compression controls are non-orthogonal, there
266
* are two possible representations for group zero and we may need to
267
* preserve the current one to avoid changing the selected compression
268
* enable inadvertently.
269
*/
270
if (group == 8)
271
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF);
272
else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF)
273
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
274
}
275
}
276
277
void
278
brw_set_default_group(struct brw_codegen *p, unsigned group)
279
{
280
p->current->group = group;
281
}
282
283
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
284
{
285
p->current->mask_control = value;
286
}
287
288
void brw_set_default_saturate( struct brw_codegen *p, bool enable )
289
{
290
p->current->saturate = enable;
291
}
292
293
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
294
{
295
p->current->acc_wr_control = value;
296
}
297
298
void brw_set_default_swsb(struct brw_codegen *p, tgl_swsb value)
299
{
300
p->current->swsb = value;
301
}
302
303
void brw_push_insn_state( struct brw_codegen *p )
304
{
305
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
306
*(p->current + 1) = *p->current;
307
p->current++;
308
}
309
310
void brw_pop_insn_state( struct brw_codegen *p )
311
{
312
assert(p->current != p->stack);
313
p->current--;
314
}
315
316
317
/***********************************************************************
318
*/
319
void
320
brw_init_codegen(const struct intel_device_info *devinfo,
321
struct brw_codegen *p, void *mem_ctx)
322
{
323
memset(p, 0, sizeof(*p));
324
325
p->devinfo = devinfo;
326
p->automatic_exec_sizes = true;
327
/*
328
* Set the initial instruction store array size to 1024, if found that
329
* isn't enough, then it will double the store size at brw_next_insn()
330
* until out of memory.
331
*/
332
p->store_size = 1024;
333
p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);
334
p->nr_insn = 0;
335
p->current = p->stack;
336
memset(p->current, 0, sizeof(p->current[0]));
337
338
p->mem_ctx = mem_ctx;
339
340
/* Some defaults?
341
*/
342
brw_set_default_exec_size(p, BRW_EXECUTE_8);
343
brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */
344
brw_set_default_saturate(p, 0);
345
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
346
347
/* Set up control flow stack */
348
p->if_stack_depth = 0;
349
p->if_stack_array_size = 16;
350
p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
351
352
p->loop_stack_depth = 0;
353
p->loop_stack_array_size = 16;
354
p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
355
p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
356
}
357
358
359
const unsigned *brw_get_program( struct brw_codegen *p,
360
unsigned *sz )
361
{
362
*sz = p->next_insn_offset;
363
return (const unsigned *)p->store;
364
}
365
366
const brw_shader_reloc *
367
brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)
368
{
369
*num_relocs = p->num_relocs;
370
return p->relocs;
371
}
372
373
bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,
374
const char *identifier)
375
{
376
const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
377
if (!read_path) {
378
return false;
379
}
380
381
char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
382
383
int fd = open(name, O_RDONLY);
384
ralloc_free(name);
385
386
if (fd == -1) {
387
return false;
388
}
389
390
struct stat sb;
391
if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
392
close(fd);
393
return false;
394
}
395
396
p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);
397
p->nr_insn += sb.st_size / sizeof(brw_inst);
398
399
p->next_insn_offset = start_offset + sb.st_size;
400
p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);
401
p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
402
assert(p->store);
403
404
ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
405
close(fd);
406
if (ret != sb.st_size) {
407
return false;
408
}
409
410
ASSERTED bool valid =
411
brw_validate_instructions(p->devinfo, p->store,
412
start_offset, p->next_insn_offset,
413
NULL);
414
assert(valid);
415
416
return true;
417
}
418
419
const struct brw_label *
420
brw_find_label(const struct brw_label *root, int offset)
421
{
422
const struct brw_label *curr = root;
423
424
if (curr != NULL)
425
{
426
do {
427
if (curr->offset == offset)
428
return curr;
429
430
curr = curr->next;
431
} while (curr != NULL);
432
}
433
434
return curr;
435
}
436
437
void
438
brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)
439
{
440
if (*labels != NULL) {
441
struct brw_label *curr = *labels;
442
struct brw_label *prev;
443
444
do {
445
prev = curr;
446
447
if (curr->offset == offset)
448
return;
449
450
curr = curr->next;
451
} while (curr != NULL);
452
453
curr = ralloc(mem_ctx, struct brw_label);
454
curr->offset = offset;
455
curr->number = prev->number + 1;
456
curr->next = NULL;
457
prev->next = curr;
458
} else {
459
struct brw_label *root = ralloc(mem_ctx, struct brw_label);
460
root->number = 0;
461
root->offset = offset;
462
root->next = NULL;
463
*labels = root;
464
}
465
}
466
467
const struct brw_label *
468
brw_label_assembly(const struct intel_device_info *devinfo,
469
const void *assembly, int start, int end, void *mem_ctx)
470
{
471
struct brw_label *root_label = NULL;
472
473
int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);
474
475
for (int offset = start; offset < end;) {
476
const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);
477
brw_inst uncompacted;
478
479
bool is_compact = brw_inst_cmpt_control(devinfo, inst);
480
481
if (is_compact) {
482
brw_compact_inst *compacted = (brw_compact_inst *)inst;
483
brw_uncompact_instruction(devinfo, &uncompacted, compacted);
484
inst = &uncompacted;
485
}
486
487
if (brw_has_uip(devinfo, brw_inst_opcode(devinfo, inst))) {
488
/* Instructions that have UIP also have JIP. */
489
brw_create_label(&root_label,
490
offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
491
brw_create_label(&root_label,
492
offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
493
} else if (brw_has_jip(devinfo, brw_inst_opcode(devinfo, inst))) {
494
int jip;
495
if (devinfo->ver >= 7) {
496
jip = brw_inst_jip(devinfo, inst);
497
} else {
498
jip = brw_inst_gfx6_jump_count(devinfo, inst);
499
}
500
501
brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
502
}
503
504
if (is_compact) {
505
offset += sizeof(brw_compact_inst);
506
} else {
507
offset += sizeof(brw_inst);
508
}
509
}
510
511
return root_label;
512
}
513
514
void
515
brw_disassemble_with_labels(const struct intel_device_info *devinfo,
516
const void *assembly, int start, int end, FILE *out)
517
{
518
void *mem_ctx = ralloc_context(NULL);
519
const struct brw_label *root_label =
520
brw_label_assembly(devinfo, assembly, start, end, mem_ctx);
521
522
brw_disassemble(devinfo, assembly, start, end, root_label, out);
523
524
ralloc_free(mem_ctx);
525
}
526
527
void
528
brw_disassemble(const struct intel_device_info *devinfo,
529
const void *assembly, int start, int end,
530
const struct brw_label *root_label, FILE *out)
531
{
532
bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;
533
534
for (int offset = start; offset < end;) {
535
const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);
536
brw_inst uncompacted;
537
538
if (root_label != NULL) {
539
const struct brw_label *label = brw_find_label(root_label, offset);
540
if (label != NULL) {
541
fprintf(out, "\nLABEL%d:\n", label->number);
542
}
543
}
544
545
bool compacted = brw_inst_cmpt_control(devinfo, insn);
546
if (0)
547
fprintf(out, "0x%08x: ", offset);
548
549
if (compacted) {
550
brw_compact_inst *compacted = (brw_compact_inst *)insn;
551
if (dump_hex) {
552
unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
553
const unsigned int blank_spaces = 24;
554
for (int i = 0 ; i < 8; i = i + 4) {
555
fprintf(out, "%02x %02x %02x %02x ",
556
insn_ptr[i],
557
insn_ptr[i + 1],
558
insn_ptr[i + 2],
559
insn_ptr[i + 3]);
560
}
561
/* Make compacted instructions hex value output vertically aligned
562
* with uncompacted instructions hex value
563
*/
564
fprintf(out, "%*c", blank_spaces, ' ');
565
}
566
567
brw_uncompact_instruction(devinfo, &uncompacted, compacted);
568
insn = &uncompacted;
569
} else {
570
if (dump_hex) {
571
unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
572
for (int i = 0 ; i < 16; i = i + 4) {
573
fprintf(out, "%02x %02x %02x %02x ",
574
insn_ptr[i],
575
insn_ptr[i + 1],
576
insn_ptr[i + 2],
577
insn_ptr[i + 3]);
578
}
579
}
580
}
581
582
brw_disassemble_inst(out, devinfo, insn, compacted, offset, root_label);
583
584
if (compacted) {
585
offset += sizeof(brw_compact_inst);
586
} else {
587
offset += sizeof(brw_inst);
588
}
589
}
590
}
591
592
static const struct opcode_desc opcode_descs[] = {
593
/* IR, HW, name, nsrc, ndst, gfx_vers */
594
{ BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
595
{ BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },
596
{ BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
597
{ BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
598
{ BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
599
{ BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
600
{ BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) },
601
{ BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
602
{ BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
603
{ BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
604
{ BRW_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },
605
{ BRW_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },
606
{ BRW_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },
607
{ BRW_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },
608
{ BRW_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },
609
{ BRW_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },
610
{ BRW_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },
611
{ BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
612
{ BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
613
{ BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
614
{ BRW_OPCODE_DIM, 10, "dim", 1, 1, GFX75 },
615
{ BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) },
616
{ BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
617
{ BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
618
{ BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
619
{ BRW_OPCODE_ROR, 14, "ror", 2, 1, GFX11 },
620
{ BRW_OPCODE_ROR, 110, "ror", 2, 1, GFX_GE(GFX12) },
621
{ BRW_OPCODE_ROL, 15, "rol", 2, 1, GFX11 },
622
{ BRW_OPCODE_ROL, 111, "rol", 2, 1, GFX_GE(GFX12) },
623
{ BRW_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },
624
{ BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
625
{ BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
626
{ BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
627
{ BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) },
628
{ BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
629
{ BRW_OPCODE_F32TO16, 19, "f32to16", 1, 1, GFX7 | GFX75 },
630
{ BRW_OPCODE_F16TO32, 20, "f16to32", 1, 1, GFX7 | GFX75 },
631
{ BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
632
{ BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
633
{ BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
634
{ BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
635
{ BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
636
{ BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
637
{ BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
638
{ BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
639
{ BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
640
{ BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) },
641
{ BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
642
{ BRW_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) },
643
{ BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) },
644
{ BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
645
{ BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
646
{ BRW_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) },
647
{ BRW_OPCODE_CASE, 38, "case", 0, 0, GFX6 },
648
{ BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
649
{ BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
650
{ BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
651
{ BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
652
{ BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) },
653
{ BRW_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) },
654
{ BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) },
655
{ BRW_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) },
656
{ BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) },
657
{ BRW_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) },
658
{ BRW_OPCODE_FORK, 46, "fork", 0, 0, GFX6 },
659
{ BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) },
660
{ BRW_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) },
661
{ BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
662
{ BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
663
{ BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
664
{ BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
665
{ BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
666
{ BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
667
{ BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },
668
{ BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) },
669
{ BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
670
{ BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
671
{ BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
672
{ BRW_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },
673
{ BRW_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },
674
{ BRW_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },
675
{ BRW_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },
676
{ BRW_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },
677
{ BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
678
{ BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
679
{ BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
680
{ BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) },
681
{ BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) },
682
{ BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) },
683
{ BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) },
684
{ BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) },
685
{ BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },
686
{ BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },
687
{ BRW_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },
688
{ BRW_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },
689
{ BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
690
{ BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
691
{ BRW_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },
692
{ BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },
693
{ BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },
694
{ BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },
695
{ BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) },
696
{ BRW_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 },
697
{ BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
698
{ BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
699
};
700
701
/**
702
* Look up the opcode_descs[] entry with \p key member matching \p k which is
703
* supported by the device specified by \p devinfo, or NULL if there is no
704
* matching entry.
705
*
706
* This is implemented by using an index data structure (storage for which is
707
* provided by the caller as \p index_ver and \p index_descs) in order to
708
* provide efficient constant-time look-up.
709
*/
710
static const opcode_desc *
711
lookup_opcode_desc(gfx_ver *index_ver,
712
const opcode_desc **index_descs,
713
unsigned index_size,
714
unsigned opcode_desc::*key,
715
const intel_device_info *devinfo,
716
unsigned k)
717
{
718
if (*index_ver != gfx_ver_from_devinfo(devinfo)) {
719
*index_ver = gfx_ver_from_devinfo(devinfo);
720
721
for (unsigned l = 0; l < index_size; l++)
722
index_descs[l] = NULL;
723
724
for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
725
if (opcode_descs[i].gfx_vers & *index_ver) {
726
const unsigned l = opcode_descs[i].*key;
727
assert(l < index_size && !index_descs[l]);
728
index_descs[l] = &opcode_descs[i];
729
}
730
}
731
}
732
733
if (k < index_size)
734
return index_descs[k];
735
else
736
return NULL;
737
}
738
739
/**
740
* Return the matching opcode_desc for the specified IR opcode and hardware
741
* generation, or NULL if the opcode is not supported by the device.
742
*/
743
const struct opcode_desc *
744
brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode)
745
{
746
static __thread gfx_ver index_ver = {};
747
static __thread const opcode_desc *index_descs[NUM_BRW_OPCODES];
748
return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),
749
&opcode_desc::ir, devinfo, opcode);
750
}
751
752
/**
753
* Return the matching opcode_desc for the specified HW opcode and hardware
754
* generation, or NULL if the opcode is not supported by the device.
755
*/
756
const struct opcode_desc *
757
brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw)
758
{
759
static __thread gfx_ver index_ver = {};
760
static __thread const opcode_desc *index_descs[128];
761
return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),
762
&opcode_desc::hw, devinfo, hw);
763
}
764
765