Path: blob/21.2-virgl/src/intel/compiler/brw_eu.cpp
4550 views
/*1Copyright (C) Intel Corp. 2006. All Rights Reserved.2Intel funded Tungsten Graphics to3develop this 3D driver.45Permission is hereby granted, free of charge, to any person obtaining6a copy of this software and associated documentation files (the7"Software"), to deal in the Software without restriction, including8without limitation the rights to use, copy, modify, merge, publish,9distribute, sublicense, and/or sell copies of the Software, and to10permit persons to whom the Software is furnished to do so, subject to11the following conditions:1213The above copyright notice and this permission notice (including the14next paragraph) shall be included in all copies or substantial15portions of the Software.1617THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,18EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.20IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE21LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION22OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION23WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.2425**********************************************************************/26/*27* Authors:28* Keith Whitwell <[email protected]>29*/3031#include <sys/stat.h>32#include <fcntl.h>3334#include "brw_eu_defines.h"35#include "brw_eu.h"36#include "brw_shader.h"37#include "brw_gfx_ver_enum.h"38#include "dev/intel_debug.h"3940#include "util/ralloc.h"4142/* Returns a conditional modifier that negates the condition. */43enum brw_conditional_mod44brw_negate_cmod(enum brw_conditional_mod cmod)45{46switch (cmod) {47case BRW_CONDITIONAL_Z:48return BRW_CONDITIONAL_NZ;49case BRW_CONDITIONAL_NZ:50return BRW_CONDITIONAL_Z;51case BRW_CONDITIONAL_G:52return BRW_CONDITIONAL_LE;53case BRW_CONDITIONAL_GE:54return BRW_CONDITIONAL_L;55case BRW_CONDITIONAL_L:56return BRW_CONDITIONAL_GE;57case BRW_CONDITIONAL_LE:58return BRW_CONDITIONAL_G;59default:60unreachable("Can't negate this cmod");61}62}6364/* Returns the corresponding conditional mod for swapping src0 and65* src1 in e.g. CMP.66*/67enum brw_conditional_mod68brw_swap_cmod(enum brw_conditional_mod cmod)69{70switch (cmod) {71case BRW_CONDITIONAL_Z:72case BRW_CONDITIONAL_NZ:73return cmod;74case BRW_CONDITIONAL_G:75return BRW_CONDITIONAL_L;76case BRW_CONDITIONAL_GE:77return BRW_CONDITIONAL_LE;78case BRW_CONDITIONAL_L:79return BRW_CONDITIONAL_G;80case BRW_CONDITIONAL_LE:81return BRW_CONDITIONAL_GE;82default:83return BRW_CONDITIONAL_NONE;84}85}8687/**88* Get the least significant bit offset of the i+1-th component of immediate89* type \p type. For \p i equal to the two's complement of j, return the90* offset of the j-th component starting from the end of the vector. For91* scalar register types return zero.92*/93static unsigned94imm_shift(enum brw_reg_type type, unsigned i)95{96assert(type != BRW_REGISTER_TYPE_UV && type != BRW_REGISTER_TYPE_V &&97"Not implemented.");9899if (type == BRW_REGISTER_TYPE_VF)100return 8 * (i & 3);101else102return 0;103}104105/**106* Swizzle an arbitrary immediate \p x of the given type according to the107* permutation specified as \p swz.108*/109uint32_t110brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)111{112if (imm_shift(type, 1)) {113const unsigned n = 32 / imm_shift(type, 1);114uint32_t y = 0;115116for (unsigned i = 0; i < n; i++) {117/* Shift the specified component all the way to the right and left to118* discard any undesired L/MSBs, then shift it right into component i.119*/120y |= x >> imm_shift(type, (i & ~3) + BRW_GET_SWZ(swz, i & 3))121<< imm_shift(type, ~0u)122>> imm_shift(type, ~0u - i);123}124125return y;126} else {127return x;128}129}130131unsigned132brw_get_default_exec_size(struct brw_codegen *p)133{134return p->current->exec_size;135}136137unsigned138brw_get_default_group(struct brw_codegen *p)139{140return p->current->group;141}142143unsigned144brw_get_default_access_mode(struct brw_codegen *p)145{146return p->current->access_mode;147}148149tgl_swsb150brw_get_default_swsb(struct brw_codegen *p)151{152return p->current->swsb;153}154155void156brw_set_default_exec_size(struct brw_codegen *p, unsigned value)157{158p->current->exec_size = value;159}160161void brw_set_default_predicate_control(struct brw_codegen *p, enum brw_predicate pc)162{163p->current->predicate = pc;164}165166void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)167{168p->current->pred_inv = predicate_inverse;169}170171void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)172{173assert(subreg < 2);174p->current->flag_subreg = reg * 2 + subreg;175}176177void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )178{179p->current->access_mode = access_mode;180}181182void183brw_set_default_compression_control(struct brw_codegen *p,184enum brw_compression compression_control)185{186switch (compression_control) {187case BRW_COMPRESSION_NONE:188/* This is the "use the first set of bits of dmask/vmask/arf189* according to execsize" option.190*/191p->current->group = 0;192break;193case BRW_COMPRESSION_2NDHALF:194/* For SIMD8, this is "use the second set of 8 bits." */195p->current->group = 8;196break;197case BRW_COMPRESSION_COMPRESSED:198/* For SIMD16 instruction compression, use the first set of 16 bits199* since we don't do SIMD32 dispatch.200*/201p->current->group = 0;202break;203default:204unreachable("not reached");205}206207if (p->devinfo->ver <= 6) {208p->current->compressed =209(compression_control == BRW_COMPRESSION_COMPRESSED);210}211}212213/**214* Enable or disable instruction compression on the given instruction leaving215* the currently selected channel enable group untouched.216*/217void218brw_inst_set_compression(const struct intel_device_info *devinfo,219brw_inst *inst, bool on)220{221if (devinfo->ver >= 6) {222/* No-op, the EU will figure out for us whether the instruction needs to223* be compressed.224*/225} else {226/* The channel group and compression controls are non-orthogonal, there227* are two possible representations for uncompressed instructions and we228* may need to preserve the current one to avoid changing the selected229* channel group inadvertently.230*/231if (on)232brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_COMPRESSED);233else if (brw_inst_qtr_control(devinfo, inst)234== BRW_COMPRESSION_COMPRESSED)235brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);236}237}238239void240brw_set_default_compression(struct brw_codegen *p, bool on)241{242p->current->compressed = on;243}244245/**246* Apply the range of channel enable signals given by247* [group, group + exec_size) to the instruction passed as argument.248*/249void250brw_inst_set_group(const struct intel_device_info *devinfo,251brw_inst *inst, unsigned group)252{253if (devinfo->ver >= 7) {254assert(group % 4 == 0 && group < 32);255brw_inst_set_qtr_control(devinfo, inst, group / 8);256brw_inst_set_nib_control(devinfo, inst, (group / 4) % 2);257258} else if (devinfo->ver == 6) {259assert(group % 8 == 0 && group < 32);260brw_inst_set_qtr_control(devinfo, inst, group / 8);261262} else {263assert(group % 8 == 0 && group < 16);264/* The channel group and compression controls are non-orthogonal, there265* are two possible representations for group zero and we may need to266* preserve the current one to avoid changing the selected compression267* enable inadvertently.268*/269if (group == 8)270brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_2NDHALF);271else if (brw_inst_qtr_control(devinfo, inst) == BRW_COMPRESSION_2NDHALF)272brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);273}274}275276void277brw_set_default_group(struct brw_codegen *p, unsigned group)278{279p->current->group = group;280}281282void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )283{284p->current->mask_control = value;285}286287void brw_set_default_saturate( struct brw_codegen *p, bool enable )288{289p->current->saturate = enable;290}291292void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)293{294p->current->acc_wr_control = value;295}296297void brw_set_default_swsb(struct brw_codegen *p, tgl_swsb value)298{299p->current->swsb = value;300}301302void brw_push_insn_state( struct brw_codegen *p )303{304assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);305*(p->current + 1) = *p->current;306p->current++;307}308309void brw_pop_insn_state( struct brw_codegen *p )310{311assert(p->current != p->stack);312p->current--;313}314315316/***********************************************************************317*/318void319brw_init_codegen(const struct intel_device_info *devinfo,320struct brw_codegen *p, void *mem_ctx)321{322memset(p, 0, sizeof(*p));323324p->devinfo = devinfo;325p->automatic_exec_sizes = true;326/*327* Set the initial instruction store array size to 1024, if found that328* isn't enough, then it will double the store size at brw_next_insn()329* until out of memory.330*/331p->store_size = 1024;332p->store = rzalloc_array(mem_ctx, brw_inst, p->store_size);333p->nr_insn = 0;334p->current = p->stack;335memset(p->current, 0, sizeof(p->current[0]));336337p->mem_ctx = mem_ctx;338339/* Some defaults?340*/341brw_set_default_exec_size(p, BRW_EXECUTE_8);342brw_set_default_mask_control(p, BRW_MASK_ENABLE); /* what does this do? */343brw_set_default_saturate(p, 0);344brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);345346/* Set up control flow stack */347p->if_stack_depth = 0;348p->if_stack_array_size = 16;349p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);350351p->loop_stack_depth = 0;352p->loop_stack_array_size = 16;353p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);354p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);355}356357358const unsigned *brw_get_program( struct brw_codegen *p,359unsigned *sz )360{361*sz = p->next_insn_offset;362return (const unsigned *)p->store;363}364365const brw_shader_reloc *366brw_get_shader_relocs(struct brw_codegen *p, unsigned *num_relocs)367{368*num_relocs = p->num_relocs;369return p->relocs;370}371372bool brw_try_override_assembly(struct brw_codegen *p, int start_offset,373const char *identifier)374{375const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");376if (!read_path) {377return false;378}379380char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);381382int fd = open(name, O_RDONLY);383ralloc_free(name);384385if (fd == -1) {386return false;387}388389struct stat sb;390if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {391close(fd);392return false;393}394395p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(brw_inst);396p->nr_insn += sb.st_size / sizeof(brw_inst);397398p->next_insn_offset = start_offset + sb.st_size;399p->store_size = (start_offset + sb.st_size) / sizeof(brw_inst);400p->store = (brw_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);401assert(p->store);402403ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);404close(fd);405if (ret != sb.st_size) {406return false;407}408409ASSERTED bool valid =410brw_validate_instructions(p->devinfo, p->store,411start_offset, p->next_insn_offset,412NULL);413assert(valid);414415return true;416}417418const struct brw_label *419brw_find_label(const struct brw_label *root, int offset)420{421const struct brw_label *curr = root;422423if (curr != NULL)424{425do {426if (curr->offset == offset)427return curr;428429curr = curr->next;430} while (curr != NULL);431}432433return curr;434}435436void437brw_create_label(struct brw_label **labels, int offset, void *mem_ctx)438{439if (*labels != NULL) {440struct brw_label *curr = *labels;441struct brw_label *prev;442443do {444prev = curr;445446if (curr->offset == offset)447return;448449curr = curr->next;450} while (curr != NULL);451452curr = ralloc(mem_ctx, struct brw_label);453curr->offset = offset;454curr->number = prev->number + 1;455curr->next = NULL;456prev->next = curr;457} else {458struct brw_label *root = ralloc(mem_ctx, struct brw_label);459root->number = 0;460root->offset = offset;461root->next = NULL;462*labels = root;463}464}465466const struct brw_label *467brw_label_assembly(const struct intel_device_info *devinfo,468const void *assembly, int start, int end, void *mem_ctx)469{470struct brw_label *root_label = NULL;471472int to_bytes_scale = sizeof(brw_inst) / brw_jump_scale(devinfo);473474for (int offset = start; offset < end;) {475const brw_inst *inst = (const brw_inst *) ((const char *) assembly + offset);476brw_inst uncompacted;477478bool is_compact = brw_inst_cmpt_control(devinfo, inst);479480if (is_compact) {481brw_compact_inst *compacted = (brw_compact_inst *)inst;482brw_uncompact_instruction(devinfo, &uncompacted, compacted);483inst = &uncompacted;484}485486if (brw_has_uip(devinfo, brw_inst_opcode(devinfo, inst))) {487/* Instructions that have UIP also have JIP. */488brw_create_label(&root_label,489offset + brw_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);490brw_create_label(&root_label,491offset + brw_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);492} else if (brw_has_jip(devinfo, brw_inst_opcode(devinfo, inst))) {493int jip;494if (devinfo->ver >= 7) {495jip = brw_inst_jip(devinfo, inst);496} else {497jip = brw_inst_gfx6_jump_count(devinfo, inst);498}499500brw_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);501}502503if (is_compact) {504offset += sizeof(brw_compact_inst);505} else {506offset += sizeof(brw_inst);507}508}509510return root_label;511}512513void514brw_disassemble_with_labels(const struct intel_device_info *devinfo,515const void *assembly, int start, int end, FILE *out)516{517void *mem_ctx = ralloc_context(NULL);518const struct brw_label *root_label =519brw_label_assembly(devinfo, assembly, start, end, mem_ctx);520521brw_disassemble(devinfo, assembly, start, end, root_label, out);522523ralloc_free(mem_ctx);524}525526void527brw_disassemble(const struct intel_device_info *devinfo,528const void *assembly, int start, int end,529const struct brw_label *root_label, FILE *out)530{531bool dump_hex = (INTEL_DEBUG & DEBUG_HEX) != 0;532533for (int offset = start; offset < end;) {534const brw_inst *insn = (const brw_inst *)((char *)assembly + offset);535brw_inst uncompacted;536537if (root_label != NULL) {538const struct brw_label *label = brw_find_label(root_label, offset);539if (label != NULL) {540fprintf(out, "\nLABEL%d:\n", label->number);541}542}543544bool compacted = brw_inst_cmpt_control(devinfo, insn);545if (0)546fprintf(out, "0x%08x: ", offset);547548if (compacted) {549brw_compact_inst *compacted = (brw_compact_inst *)insn;550if (dump_hex) {551unsigned char * insn_ptr = ((unsigned char *)&insn[0]);552const unsigned int blank_spaces = 24;553for (int i = 0 ; i < 8; i = i + 4) {554fprintf(out, "%02x %02x %02x %02x ",555insn_ptr[i],556insn_ptr[i + 1],557insn_ptr[i + 2],558insn_ptr[i + 3]);559}560/* Make compacted instructions hex value output vertically aligned561* with uncompacted instructions hex value562*/563fprintf(out, "%*c", blank_spaces, ' ');564}565566brw_uncompact_instruction(devinfo, &uncompacted, compacted);567insn = &uncompacted;568} else {569if (dump_hex) {570unsigned char * insn_ptr = ((unsigned char *)&insn[0]);571for (int i = 0 ; i < 16; i = i + 4) {572fprintf(out, "%02x %02x %02x %02x ",573insn_ptr[i],574insn_ptr[i + 1],575insn_ptr[i + 2],576insn_ptr[i + 3]);577}578}579}580581brw_disassemble_inst(out, devinfo, insn, compacted, offset, root_label);582583if (compacted) {584offset += sizeof(brw_compact_inst);585} else {586offset += sizeof(brw_inst);587}588}589}590591static const struct opcode_desc opcode_descs[] = {592/* IR, HW, name, nsrc, ndst, gfx_vers */593{ BRW_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },594{ BRW_OPCODE_SYNC, 1, "sync", 1, 0, GFX_GE(GFX12) },595{ BRW_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },596{ BRW_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },597{ BRW_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },598{ BRW_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },599{ BRW_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) },600{ BRW_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },601{ BRW_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },602{ BRW_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },603{ BRW_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },604{ BRW_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },605{ BRW_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },606{ BRW_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },607{ BRW_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },608{ BRW_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },609{ BRW_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },610{ BRW_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },611{ BRW_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },612{ BRW_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },613{ BRW_OPCODE_DIM, 10, "dim", 1, 1, GFX75 },614{ BRW_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) },615{ BRW_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },616{ BRW_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },617{ BRW_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },618{ BRW_OPCODE_ROR, 14, "ror", 2, 1, GFX11 },619{ BRW_OPCODE_ROR, 110, "ror", 2, 1, GFX_GE(GFX12) },620{ BRW_OPCODE_ROL, 15, "rol", 2, 1, GFX11 },621{ BRW_OPCODE_ROL, 111, "rol", 2, 1, GFX_GE(GFX12) },622{ BRW_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },623{ BRW_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },624{ BRW_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },625{ BRW_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },626{ BRW_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) },627{ BRW_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },628{ BRW_OPCODE_F32TO16, 19, "f32to16", 1, 1, GFX7 | GFX75 },629{ BRW_OPCODE_F16TO32, 20, "f16to32", 1, 1, GFX7 | GFX75 },630{ BRW_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },631{ BRW_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },632{ BRW_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },633{ BRW_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },634{ BRW_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },635{ BRW_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },636{ BRW_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },637{ BRW_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },638{ BRW_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },639{ BRW_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) },640{ BRW_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },641{ BRW_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) },642{ BRW_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) },643{ BRW_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },644{ BRW_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },645{ BRW_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) },646{ BRW_OPCODE_CASE, 38, "case", 0, 0, GFX6 },647{ BRW_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },648{ BRW_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },649{ BRW_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },650{ BRW_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },651{ BRW_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) },652{ BRW_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) },653{ BRW_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) },654{ BRW_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) },655{ BRW_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) },656{ BRW_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) },657{ BRW_OPCODE_FORK, 46, "fork", 0, 0, GFX6 },658{ BRW_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) },659{ BRW_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) },660{ BRW_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },661{ BRW_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },662{ BRW_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },663{ BRW_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },664{ BRW_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },665{ BRW_OPCODE_SENDS, 51, "sends", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },666{ BRW_OPCODE_SENDSC, 52, "sendsc", 2, 1, GFX_GE(GFX9) & GFX_LT(GFX12) },667{ BRW_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) },668{ BRW_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },669{ BRW_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },670{ BRW_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },671{ BRW_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },672{ BRW_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },673{ BRW_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },674{ BRW_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },675{ BRW_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },676{ BRW_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },677{ BRW_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },678{ BRW_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },679{ BRW_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) },680{ BRW_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) },681{ BRW_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) },682{ BRW_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) },683{ BRW_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) },684{ BRW_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },685{ BRW_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },686{ BRW_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },687{ BRW_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },688{ BRW_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },689{ BRW_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },690{ BRW_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },691{ BRW_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },692{ BRW_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },693{ BRW_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },694{ BRW_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) },695{ BRW_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 },696{ BRW_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },697{ BRW_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }698};699700/**701* Look up the opcode_descs[] entry with \p key member matching \p k which is702* supported by the device specified by \p devinfo, or NULL if there is no703* matching entry.704*705* This is implemented by using an index data structure (storage for which is706* provided by the caller as \p index_ver and \p index_descs) in order to707* provide efficient constant-time look-up.708*/709static const opcode_desc *710lookup_opcode_desc(gfx_ver *index_ver,711const opcode_desc **index_descs,712unsigned index_size,713unsigned opcode_desc::*key,714const intel_device_info *devinfo,715unsigned k)716{717if (*index_ver != gfx_ver_from_devinfo(devinfo)) {718*index_ver = gfx_ver_from_devinfo(devinfo);719720for (unsigned l = 0; l < index_size; l++)721index_descs[l] = NULL;722723for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {724if (opcode_descs[i].gfx_vers & *index_ver) {725const unsigned l = opcode_descs[i].*key;726assert(l < index_size && !index_descs[l]);727index_descs[l] = &opcode_descs[i];728}729}730}731732if (k < index_size)733return index_descs[k];734else735return NULL;736}737738/**739* Return the matching opcode_desc for the specified IR opcode and hardware740* generation, or NULL if the opcode is not supported by the device.741*/742const struct opcode_desc *743brw_opcode_desc(const struct intel_device_info *devinfo, enum opcode opcode)744{745static __thread gfx_ver index_ver = {};746static __thread const opcode_desc *index_descs[NUM_BRW_OPCODES];747return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),748&opcode_desc::ir, devinfo, opcode);749}750751/**752* Return the matching opcode_desc for the specified HW opcode and hardware753* generation, or NULL if the opcode is not supported by the device.754*/755const struct opcode_desc *756brw_opcode_desc_from_hw(const struct intel_device_info *devinfo, unsigned hw)757{758static __thread gfx_ver index_ver = {};759static __thread const opcode_desc *index_descs[128];760return lookup_opcode_desc(&index_ver, index_descs, ARRAY_SIZE(index_descs),761&opcode_desc::hw, devinfo, hw);762}763764765