Path: blob/21.2-virgl/src/intel/compiler/brw_eu_compact.c
4550 views
/*1* Copyright © 2012-2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/** @file brw_eu_compact.c24*25* Instruction compaction is a feature of G45 and newer hardware that allows26* for a smaller instruction encoding.27*28* The instruction cache is on the order of 32KB, and many programs generate29* far more instructions than that. The instruction cache is built to barely30* keep up with instruction dispatch ability in cache hit cases -- L131* instruction cache misses that still hit in the next level could limit32* throughput by around 50%.33*34* The idea of instruction compaction is that most instructions use a tiny35* subset of the GPU functionality, so we can encode what would be a 16 byte36* instruction in 8 bytes using some lookup tables for various fields.37*38*39* Instruction compaction capabilities vary subtly by generation.40*41* G45's support for instruction compaction is very limited. Jump counts on42* this generation are in units of 16-byte uncompacted instructions. As such,43* all jump targets must be 16-byte aligned. Also, all instructions must be44* naturally aligned, i.e. uncompacted instructions must be 16-byte aligned.45* A G45-only instruction, NENOP, must be used to provide padding to align46* uncompacted instructions.47*48* Gfx5 removes these restrictions and changes jump counts to be in units of49* 8-byte compacted instructions, allowing jump targets to be only 8-byte50* aligned. Uncompacted instructions can also be placed on 8-byte boundaries.51*52* Gfx6 adds the ability to compact instructions with a limited range of53* immediate values. Compactable immediates have 12 unrestricted bits, and a54* 13th bit that's replicated through the high 20 bits, to create the 32-bit55* value of DW3 in the uncompacted instruction word.56*57* On Gfx7 we can compact some control flow instructions with a small positive58* immediate in the low bits of DW3, like ENDIF with the JIP field. Other59* control flow instructions with UIP cannot be compacted, because of the60* replicated 13th bit. No control flow instructions can be compacted on Gfx661* since the jump count field is not in DW3.62*63* break JIP/UIP64* cont JIP/UIP65* halt JIP/UIP66* if JIP/UIP67* else JIP (plus UIP on BDW+)68* endif JIP69* while JIP (must be negative)70*71* Gen 8 adds support for compacting 3-src instructions.72*73* Gfx12 reduces the number of bits that available to compacted immediates from74* 13 to 12, but improves the compaction of floating-point immediates by75* allowing the high bits to be encoded (the sign, 8-bit exponent, and the76* three most significant bits of the mantissa), rather than the lowest bits of77* the mantissa.78*/7980#include "brw_eu.h"81#include "brw_shader.h"82#include "brw_disasm_info.h"83#include "dev/intel_debug.h"8485static const uint32_t g45_control_index_table[32] = {860b00000000000000000,870b01000000000000000,880b00110000000000000,890b00000000000000010,900b00100000000000000,910b00010000000000000,920b01000000000100000,930b01000000100000000,940b01010000000100000,950b00000000100000010,960b11000000000000000,970b00001000100000010,980b01001000100000000,990b00000000100000000,1000b11000000000100000,1010b00001000100000000,1020b10110000000000000,1030b11010000000100000,1040b00110000100000000,1050b00100000100000000,1060b01000000000001000,1070b01000000000000100,1080b00111100000000000,1090b00101011000000000,1100b00110000000010000,1110b00010000100000000,1120b01000000000100100,1130b01000000000101000,1140b00110000000000110,1150b00000000000001010,1160b01010000000101000,1170b01010000000100100,118};119120static const uint32_t g45_datatype_table[32] = {1210b001000000000100001,1220b001011010110101101,1230b001000001000110001,1240b001111011110111101,1250b001011010110101100,1260b001000000110101101,1270b001000000000100000,1280b010100010110110001,1290b001100011000101101,1300b001000000000100010,1310b001000001000110110,1320b010000001000110001,1330b001000001000110010,1340b011000001000110010,1350b001111011110111100,1360b001000000100101000,1370b010100011000110001,1380b001010010100101001,1390b001000001000101001,1400b010000001000110110,1410b101000001000110001,1420b001011011000101101,1430b001000000100001001,1440b001011011000101100,1450b110100011000110001,1460b001000001110111101,1470b110000001000110001,1480b011000000100101010,1490b101000001000101001,1500b001011010110001100,1510b001000000110100001,1520b001010010100001000,153};154155static const uint16_t g45_subreg_table[32] = {1560b000000000000000,1570b000000010000000,1580b000001000000000,1590b000100000000000,1600b000000000100000,1610b100000000000000,1620b000000000010000,1630b001100000000000,1640b001010000000000,1650b000000100000000,1660b001000000000000,1670b000000000001000,1680b000000001000000,1690b000000000000001,1700b000010000000000,1710b000000010100000,1720b000000000000111,1730b000001000100000,1740b011000000000000,1750b000000110000000,1760b000000000000010,1770b000000000000100,1780b000000001100000,1790b000100000000010,1800b001110011000110,1810b001110100001000,1820b000110011000110,1830b000001000011000,1840b000110010000100,1850b001100000000110,1860b000000010000110,1870b000001000110000,188};189190static const uint16_t g45_src_index_table[32] = {1910b000000000000,1920b010001101000,1930b010110001000,1940b011010010000,1950b001101001000,1960b010110001010,1970b010101110000,1980b011001111000,1990b001000101000,2000b000000101000,2010b010001010000,2020b111101101100,2030b010110001100,2040b010001101100,2050b011010010100,2060b010001001100,2070b001100101000,2080b000000000010,2090b111101001100,2100b011001101000,2110b010101001000,2120b000000000100,2130b000000101100,2140b010001101010,2150b000000111000,2160b010101011000,2170b000100100000,2180b010110000000,2190b010000000100,2200b010000111000,2210b000101100000,2220b111101110100,223};224225static const uint32_t gfx6_control_index_table[32] = {2260b00000000000000000,2270b01000000000000000,2280b00110000000000000,2290b00000000100000000,2300b00010000000000000,2310b00001000100000000,2320b00000000100000010,2330b00000000000000010,2340b01000000100000000,2350b01010000000000000,2360b10110000000000000,2370b00100000000000000,2380b11010000000000000,2390b11000000000000000,2400b01001000100000000,2410b01000000000001000,2420b01000000000000100,2430b00000000000001000,2440b00000000000000100,2450b00111000100000000,2460b00001000100000010,2470b00110000100000000,2480b00110000000000001,2490b00100000000000001,2500b00110000000000010,2510b00110000000000101,2520b00110000000001001,2530b00110000000010000,2540b00110000000000011,2550b00110000000000100,2560b00110000100001000,2570b00100000000001001,258};259260static const uint32_t gfx6_datatype_table[32] = {2610b001001110000000000,2620b001000110000100000,2630b001001110000000001,2640b001000000001100000,2650b001010110100101001,2660b001000000110101101,2670b001100011000101100,2680b001011110110101101,2690b001000000111101100,2700b001000000001100001,2710b001000110010100101,2720b001000000001000001,2730b001000001000110001,2740b001000001000101001,2750b001000000000100000,2760b001000001000110010,2770b001010010100101001,2780b001011010010100101,2790b001000000110100101,2800b001100011000101001,2810b001011011000101100,2820b001011010110100101,2830b001011110110100101,2840b001111011110111101,2850b001111011110111100,2860b001111011110111101,2870b001111011110011101,2880b001111011110111110,2890b001000000000100001,2900b001000000000100010,2910b001001111111011101,2920b001000001110111110,293};294295static const uint16_t gfx6_subreg_table[32] = {2960b000000000000000,2970b000000000000100,2980b000000110000000,2990b111000000000000,3000b011110000001000,3010b000010000000000,3020b000000000010000,3030b000110000001100,3040b001000000000000,3050b000001000000000,3060b000001010010100,3070b000000001010110,3080b010000000000000,3090b110000000000000,3100b000100000000000,3110b000000010000000,3120b000000000001000,3130b100000000000000,3140b000001010000000,3150b001010000000000,3160b001100000000000,3170b000000001010100,3180b101101010010100,3190b010100000000000,3200b000000010001111,3210b011000000000000,3220b111110000000000,3230b101000000000000,3240b000000000001111,3250b000100010001111,3260b001000010001111,3270b000110000000000,328};329330static const uint16_t gfx6_src_index_table[32] = {3310b000000000000,3320b010110001000,3330b010001101000,3340b001000101000,3350b011010010000,3360b000100100000,3370b010001101100,3380b010101110000,3390b011001111000,3400b001100101000,3410b010110001100,3420b001000100000,3430b010110001010,3440b000000000010,3450b010101010000,3460b010101101000,3470b111101001100,3480b111100101100,3490b011001110000,3500b010110001001,3510b010101011000,3520b001101001000,3530b010000101100,3540b010000000000,3550b001101110000,3560b001100010000,3570b001100000000,3580b010001101010,3590b001101111000,3600b000001110000,3610b001100100000,3620b001101010000,363};364365static const uint32_t gfx7_control_index_table[32] = {3660b0000000000000000010,3670b0000100000000000000,3680b0000100000000000001,3690b0000100000000000010,3700b0000100000000000011,3710b0000100000000000100,3720b0000100000000000101,3730b0000100000000000111,3740b0000100000000001000,3750b0000100000000001001,3760b0000100000000001101,3770b0000110000000000000,3780b0000110000000000001,3790b0000110000000000010,3800b0000110000000000011,3810b0000110000000000100,3820b0000110000000000101,3830b0000110000000000111,3840b0000110000000001001,3850b0000110000000001101,3860b0000110000000010000,3870b0000110000100000000,3880b0001000000000000000,3890b0001000000000000010,3900b0001000000000000100,3910b0001000000100000000,3920b0010110000000000000,3930b0010110000000010000,3940b0011000000000000000,3950b0011000000100000000,3960b0101000000000000000,3970b0101000000100000000,398};399400static const uint32_t gfx7_datatype_table[32] = {4010b001000000000000001,4020b001000000000100000,4030b001000000000100001,4040b001000000001100001,4050b001000000010111101,4060b001000001011111101,4070b001000001110100001,4080b001000001110100101,4090b001000001110111101,4100b001000010000100001,4110b001000110000100000,4120b001000110000100001,4130b001001010010100101,4140b001001110010100100,4150b001001110010100101,4160b001111001110111101,4170b001111011110011101,4180b001111011110111100,4190b001111011110111101,4200b001111111110111100,4210b000000001000001100,4220b001000000000111101,4230b001000000010100101,4240b001000010000100000,4250b001001010010100100,4260b001001110010000100,4270b001010010100001001,4280b001101111110111101,4290b001111111110111101,4300b001011110110101100,4310b001010010100101000,4320b001010110100101000,433};434435static const uint16_t gfx7_subreg_table[32] = {4360b000000000000000,4370b000000000000001,4380b000000000001000,4390b000000000001111,4400b000000000010000,4410b000000010000000,4420b000000100000000,4430b000000110000000,4440b000001000000000,4450b000001000010000,4460b000010100000000,4470b001000000000000,4480b001000000000001,4490b001000010000001,4500b001000010000010,4510b001000010000011,4520b001000010000100,4530b001000010000111,4540b001000010001000,4550b001000010001110,4560b001000010001111,4570b001000110000000,4580b001000111101000,4590b010000000000000,4600b010000110000000,4610b011000000000000,4620b011110010000111,4630b100000000000000,4640b101000000000000,4650b110000000000000,4660b111000000000000,4670b111000000011100,468};469470static const uint16_t gfx7_src_index_table[32] = {4710b000000000000,4720b000000000010,4730b000000010000,4740b000000010010,4750b000000011000,4760b000000100000,4770b000000101000,4780b000001001000,4790b000001010000,4800b000001110000,4810b000001111000,4820b001100000000,4830b001100000010,4840b001100001000,4850b001100010000,4860b001100010010,4870b001100100000,4880b001100101000,4890b001100111000,4900b001101000000,4910b001101000010,4920b001101001000,4930b001101010000,4940b001101100000,4950b001101101000,4960b001101110000,4970b001101110001,4980b001101111000,4990b010001101000,5000b010001101001,5010b010001101010,5020b010110001000,503};504505static const uint32_t gfx8_control_index_table[32] = {5060b0000000000000000010,5070b0000100000000000000,5080b0000100000000000001,5090b0000100000000000010,5100b0000100000000000011,5110b0000100000000000100,5120b0000100000000000101,5130b0000100000000000111,5140b0000100000000001000,5150b0000100000000001001,5160b0000100000000001101,5170b0000110000000000000,5180b0000110000000000001,5190b0000110000000000010,5200b0000110000000000011,5210b0000110000000000100,5220b0000110000000000101,5230b0000110000000000111,5240b0000110000000001001,5250b0000110000000001101,5260b0000110000000010000,5270b0000110000100000000,5280b0001000000000000000,5290b0001000000000000010,5300b0001000000000000100,5310b0001000000100000000,5320b0010110000000000000,5330b0010110000000010000,5340b0011000000000000000,5350b0011000000100000000,5360b0101000000000000000,5370b0101000000100000000,538};539540static const uint32_t gfx8_datatype_table[32] = {5410b001000000000000000001,5420b001000000000001000000,5430b001000000000001000001,5440b001000000000011000001,5450b001000000000101011101,5460b001000000010111011101,5470b001000000011101000001,5480b001000000011101000101,5490b001000000011101011101,5500b001000001000001000001,5510b001000011000001000000,5520b001000011000001000001,5530b001000101000101000101,5540b001000111000101000100,5550b001000111000101000101,5560b001011100011101011101,5570b001011101011100011101,5580b001011101011101011100,5590b001011101011101011101,5600b001011111011101011100,5610b000000000010000001100,5620b001000000000001011101,5630b001000000000101000101,5640b001000001000001000000,5650b001000101000101000100,5660b001000111000100000100,5670b001001001001000001001,5680b001010111011101011101,5690b001011111011101011101,5700b001001111001101001100,5710b001001001001001001000,5720b001001011001001001000,573};574575static const uint16_t gfx8_subreg_table[32] = {5760b000000000000000,5770b000000000000001,5780b000000000001000,5790b000000000001111,5800b000000000010000,5810b000000010000000,5820b000000100000000,5830b000000110000000,5840b000001000000000,5850b000001000010000,5860b000001010000000,5870b001000000000000,5880b001000000000001,5890b001000010000001,5900b001000010000010,5910b001000010000011,5920b001000010000100,5930b001000010000111,5940b001000010001000,5950b001000010001110,5960b001000010001111,5970b001000110000000,5980b001000111101000,5990b010000000000000,6000b010000110000000,6010b011000000000000,6020b011110010000111,6030b100000000000000,6040b101000000000000,6050b110000000000000,6060b111000000000000,6070b111000000011100,608};609610static const uint16_t gfx8_src_index_table[32] = {6110b000000000000,6120b000000000010,6130b000000010000,6140b000000010010,6150b000000011000,6160b000000100000,6170b000000101000,6180b000001001000,6190b000001010000,6200b000001110000,6210b000001111000,6220b001100000000,6230b001100000010,6240b001100001000,6250b001100010000,6260b001100010010,6270b001100100000,6280b001100101000,6290b001100111000,6300b001101000000,6310b001101000010,6320b001101001000,6330b001101010000,6340b001101100000,6350b001101101000,6360b001101110000,6370b001101110001,6380b001101111000,6390b010001101000,6400b010001101001,6410b010001101010,6420b010110001000,643};644645static const uint32_t gfx11_datatype_table[32] = {6460b001000000000000000001,6470b001000000000001000000,6480b001000000000001000001,6490b001000000000011000001,6500b001000000000101100101,6510b001000000101111100101,6520b001000000100101000001,6530b001000000100101000101,6540b001000000100101100101,6550b001000001000001000001,6560b001000011000001000000,6570b001000011000001000001,6580b001000101000101000101,6590b001000111000101000100,6600b001000111000101000101,6610b001100100100101100101,6620b001100101100100100101,6630b001100101100101100100,6640b001100101100101100101,6650b001100111100101100100,6660b000000000010000001100,6670b001000000000001100101,6680b001000000000101000101,6690b001000001000001000000,6700b001000101000101000100,6710b001000111000100000100,6720b001001001001000001001,6730b001101111100101100101,6740b001100111100101100101,6750b001001111001101001100,6760b001001001001001001000,6770b001001011001001001000,678};679680static const uint32_t gfx12_control_index_table[32] = {6810b000000000000000000100, /* (16|M0) */6820b000000000000000000011, /* (8|M0) */6830b000000010000000000000, /* (W) (1|M0) */6840b000000010000000000100, /* (W) (16|M0) */6850b000000010000000000011, /* (W) (8|M0) */6860b010000000000000000100, /* (16|M0) (ge)f0.0 */6870b000000000000000100100, /* (16|M16) */6880b010100000000000000100, /* (16|M0) (lt)f0.0 */6890b000000000000000000000, /* (1|M0) */6900b000010000000000000100, /* (16|M0) (sat) */6910b000000000000000010011, /* (8|M8) */6920b001100000000000000100, /* (16|M0) (gt)f0.0 */6930b000100000000000000100, /* (16|M0) (eq)f0.0 */6940b000100010000000000100, /* (W) (16|M0) (eq)f0.0 */6950b001000000000000000100, /* (16|M0) (ne)f0.0 */6960b000000000000100000100, /* (f0.0) (16|M0) */6970b010100000000000000011, /* (8|M0) (lt)f0.0 */6980b000000000000110000100, /* (f1.0) (16|M0) */6990b000000010000000000001, /* (W) (2|M0) */7000b000000000000101000100, /* (f0.1) (16|M0) */7010b000000000000111000100, /* (f1.1) (16|M0) */7020b010000010000000000100, /* (W) (16|M0) (ge)f0.0 */7030b000000000000000100011, /* (8|M16) */7040b000000000000000110011, /* (8|M24) */7050b010100010000000000100, /* (W) (16|M0) (lt)f0.0 */7060b010000000000000000011, /* (8|M0) (ge)f0.0 */7070b000100010000000000000, /* (W) (1|M0) (eq)f0.0 */7080b000010000000000000011, /* (8|M0) (sat) */7090b010100000000010000100, /* (16|M0) (lt)f1.0 */7100b000100000000000000011, /* (8|M0) (eq)f0.0 */7110b000001000000000000011, /* (8|M0) {AccWrEn} */7120b000000010000000100100, /* (W) (16|M16) */713};714715static const uint32_t gfx12_datatype_table[32] = {7160b11010110100101010100, /* grf<1>:f grf:f grf:f */7170b00000110100101010100, /* grf<1>:f grf:f arf:ub */7180b00000010101101010100, /* grf<1>:f imm:f arf:ub */7190b01010110110101010100, /* grf<1>:f grf:f imm:f */7200b11010100100101010100, /* arf<1>:f grf:f grf:f */7210b11010010100101010100, /* grf<1>:f arf:f grf:f */7220b01010100110101010100, /* arf<1>:f grf:f imm:f */7230b00000000100000000000, /* arf<1>:ub arf:ub arf:ub */7240b11010000100101010100, /* arf<1>:f arf:f grf:f */7250b00101110110011001100, /* grf<1>:d grf:d imm:w */7260b10110110100011001100, /* grf<1>:d grf:d grf:d */7270b01010010110101010100, /* grf<1>:f arf:f imm:f */7280b10010110100001000100, /* grf<1>:ud grf:ud grf:ud */7290b01010000110101010100, /* arf<1>:f arf:f imm:f */7300b00110110110011001100, /* grf<1>:d grf:d imm:d */7310b00010110110001000100, /* grf<1>:ud grf:ud imm:ud */7320b00000111000101010100, /* grf<2>:f grf:f arf:ub */7330b00101100110011001100, /* arf<1>:d grf:d imm:w */7340b00000000100000100010, /* arf<1>:uw arf:uw arf:ub */7350b00000010100001000100, /* grf<1>:ud arf:ud arf:ub */7360b00100110110000101010, /* grf<1>:w grf:uw imm:uv */7370b00001110110000100010, /* grf<1>:uw grf:uw imm:uw */7380b10010111000001000100, /* grf<2>:ud grf:ud grf:ud */7390b00000110100101001100, /* grf<1>:d grf:f arf:ub */7400b10001100100011001100, /* arf<1>:d grf:d grf:uw */7410b00000110100001010100, /* grf<1>:f grf:ud arf:ub */7420b00101110110001001100, /* grf<1>:d grf:ud imm:w */7430b00000010100000100010, /* grf<1>:uw arf:uw arf:ub */7440b00000110100000110100, /* grf<1>:f grf:uw arf:ub */7450b00000110100000010100, /* grf<1>:f grf:ub arf:ub */7460b00000110100011010100, /* grf<1>:f grf:d arf:ub */7470b00000010100101010100, /* grf<1>:f arf:f arf:ub */748};749750static const uint16_t gfx12_subreg_table[32] = {7510b000000000000000, /* .0 .0 .0 */7520b100000000000000, /* .0 .0 .16 */7530b001000000000000, /* .0 .0 .4 */7540b011000000000000, /* .0 .0 .12 */7550b000000010000000, /* .0 .4 .0 */7560b010000000000000, /* .0 .0 .8 */7570b101000000000000, /* .0 .0 .20 */7580b000000000001000, /* .8 .0 .0 */7590b000000100000000, /* .0 .8 .0 */7600b110000000000000, /* .0 .0 .24 */7610b111000000000000, /* .0 .0 .28 */7620b000001000000000, /* .0 .16 .0 */7630b000000000000100, /* .4 .0 .0 */7640b000001100000000, /* .0 .24 .0 */7650b000001010000000, /* .0 .20 .0 */7660b000000110000000, /* .0 .12 .0 */7670b000001110000000, /* .0 .28 .0 */7680b000000000011100, /* .28 .0 .0 */7690b000000000010000, /* .16 .0 .0 */7700b000000000001100, /* .12 .0 .0 */7710b000000000011000, /* .24 .0 .0 */7720b000000000010100, /* .20 .0 .0 */7730b000000000000010, /* .2 .0 .0 */7740b000000101000000, /* .0 .10 .0 */7750b000000001000000, /* .0 .2 .0 */7760b000000010000100, /* .4 .4 .0 */7770b000000001011100, /* .28 .2 .0 */7780b000000001000010, /* .2 .2 .0 */7790b000000110001100, /* .12 .12 .0 */7800b000000000100000, /* .0 .1 .0 */7810b000000001100000, /* .0 .3 .0 */7820b110001100000000, /* .0 .24 .24 */783};784785static const uint16_t gfx12_src0_index_table[16] = {7860b010001100100, /* r<8;8,1> */7870b000000000000, /* r<0;1,0> */7880b010001100110, /* -r<8;8,1> */7890b010001100101, /* (abs)r<8;8,1> */7900b000000000010, /* -r<0;1,0> */7910b001000000000, /* r<2;1,0> */7920b001001000000, /* r<2;4,0> */7930b001101000000, /* r<4;4,0> */7940b001000100100, /* r<2;2,1> */7950b001100000000, /* r<4;1,0> */7960b001000100110, /* -r<2;2,1> */7970b001101000100, /* r<4;4,1> */7980b010001100111, /* -(abs)r<8;8,1> */7990b000100000000, /* r<1;1,0> */8000b000000000001, /* (abs)r<0;1,0> */8010b111100010000, /* r[a]<1,0> */802};803804static const uint16_t gfx12_src1_index_table[16] = {8050b000100011001, /* r<8;8,1> */8060b000000000000, /* r<0;1,0> */8070b100100011001, /* -r<8;8,1> */8080b100000000000, /* -r<0;1,0> */8090b010100011001, /* (abs)r<8;8,1> */8100b100011010000, /* -r<4;4,0> */8110b000010000000, /* r<2;1,0> */8120b000010001001, /* r<2;2,1> */8130b100010001001, /* -r<2;2,1> */8140b000011010000, /* r<4;4,0> */8150b000011010001, /* r<4;4,1> */8160b000011000000, /* r<4;1,0> */8170b110100011001, /* -(abs)r<8;8,1> */8180b010000000000, /* (abs)r<0;1,0> */8190b110000000000, /* -(abs)r<0;1,0> */8200b100011010001, /* -r<4;4,1> */821};822823static const uint16_t xehp_src0_index_table[16] = {8240b000100000000, /* r<1;1,0> */8250b000000000000, /* r<0;1,0> */8260b000100000010, /* -r<1;1,0> */8270b000100000001, /* (abs)r<1;1,0> */8280b000000000010, /* -r<0;1,0> */8290b001000000000, /* r<2;1,0> */8300b001001000000, /* r<2;4,0> */8310b001101000000, /* r<4;4,0> */8320b001100000000, /* r<4;1,0> */8330b000100000011, /* -(abs)r<1;1,0> */8340b000000000001, /* (abs)r<0;1,0> */8350b111100010000, /* r[a]<1,0> */8360b010001100000, /* r<8;8,0> */8370b000101000000, /* r<1;4,0> */8380b010001001000, /* r<8;4,2> */8390b001000000010, /* -r<2;1,0> */840};841842static const uint16_t xehp_src1_index_table[16] = {8430b000001000000, /* r<1;1,0> */8440b000000000000, /* r<0;1,0> */8450b100001000000, /* -r<1;1,0> */8460b100000000000, /* -r<0;1,0> */8470b010001000000, /* (abs)r<1;1,0> */8480b100011010000, /* -r<4;4,0> */8490b000010000000, /* r<2;1,0> */8500b000011010000, /* r<4;4,0> */8510b000011000000, /* r<4;1,0> */8520b110001000000, /* -(abs)r<1;1,0> */8530b010000000000, /* (abs)r<0;1,0> */8540b110000000000, /* -(abs)r<0;1,0> */8550b000100011000, /* r<8;8,0> */8560b100010000000, /* -r<2;1,0> */8570b100000001001, /* -r<0;2,1> */8580b100001000100, /* -r[a]<1;1,0> */859};860861/* This is actually the control index table for Cherryview (26 bits), but the862* only difference from Broadwell (24 bits) is that it has two extra 0-bits at863* the start.864*865* The low 24 bits have the same mappings on both hardware.866*/867static const uint32_t gfx8_3src_control_index_table[4] = {8680b00100000000110000000000001,8690b00000000000110000000000001,8700b00000000001000000000000001,8710b00000000001000000000100001,872};873874/* This is actually the control index table for Cherryview (49 bits), but the875* only difference from Broadwell (46 bits) is that it has three extra 0-bits876* at the start.877*878* The low 44 bits have the same mappings on both hardware, and since the high879* three bits on Broadwell are zero, we can reuse Cherryview's table.880*/881static const uint64_t gfx8_3src_source_index_table[4] = {8820b0000001110010011100100111001000001111000000000000,8830b0000001110010011100100111001000001111000000000010,8840b0000001110010011100100111001000001111000000001000,8850b0000001110010011100100111001000001111000000100000,886};887888static const uint64_t gfx12_3src_control_index_table[32] = {8890b000001001010010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */8900b000001001010010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */8910b000001001000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */8920b000001001010010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */8930b000001001000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */8940b000001001000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */8950b000001001010010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */8960b000001001000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */8970b000001001010010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */8980b000001001010010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */8990b000001001000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */9000b000001001010010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */9010b000001001010010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */9020b000001001000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */9030b000001001010010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */9040b000001001010010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */9050b000001001000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */9060b000001001000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */9070b000001001010010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */9080b000001001010010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */9090b000001001000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */9100b000001001000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */9110b000001001010010101000000000000100011, /* (8|M16) grf<1>:f :f :f :f */9120b000001001010010101000000000000110011, /* (8|M24) grf<1>:f :f :f :f */9130b000001001000010101010000000000000100, /* (16|M0) (sat)arf<1>:f :f :f :f */9140b000001001010010101010010000000000100, /* (W) (16|M0) (sat)grf<1>:f :f :f :f */9150b000001001010010101000010000000100100, /* (W) (16|M16) grf<1>:f :f :f :f */9160b000001001010010001000010000000000000, /* (W) (1|M0) grf<1>:ud :ud :ud :ud */9170b000001001000010101000000000000100100, /* (16|M16) arf<1>:f :f :f :f */9180b000001001010010101010000000000100100, /* (16|M16) (sat)grf<1>:f :f :f :f */9190b000001001010010101000010000000000010, /* (W) (4|M0) grf<1>:f :f :f :f */9200b000001001000010101010000000000000011, /* (8|M0) (sat)arf<1>:f :f :f :f */921};922923static const uint64_t xehp_3src_control_index_table[32] = {9240b0000010010100010101000000000000000100, /* (16|M0) grf<1>:f :f :f :f */9250b0000010010100010101000000000000000011, /* (8|M0) grf<1>:f :f :f :f */9260b0000010010000010101000000000000000011, /* (8|M0) arf<1>:f :f :f :f */9270b0000010010100010101000010000000000011, /* (W) (8|M0) grf<1>:f :f :f :f */9280b0000010010000010101000010000000000011, /* (W) (8|M0) arf<1>:f :f :f :f */9290b0000010010000010101000000000000010011, /* (8|M8) arf<1>:f :f :f :f */9300b0000010010100010101000000000000010011, /* (8|M8) grf<1>:f :f :f :f */9310b0000010010000010101000010000000010011, /* (W) (8|M8) arf<1>:f :f :f :f */9320b0000010010100010101000010000000010011, /* (W) (8|M8) grf<1>:f :f :f :f */9330b0000010010100010101000010000000000100, /* (W) (16|M0) grf<1>:f :f :f :f */9340b0000010010000010101000000000000000100, /* (16|M0) arf<1>:f :f :f :f */9350b0000010010100010101010000000000000100, /* (16|M0) (sat)grf<1>:f :f :f :f */9360b0000010010100010101000000000000100100, /* (16|M16) grf<1>:f :f :f :f */9370b0000010010000010101000010000000000100, /* (W) (16|M0) arf<1>:f :f :f :f */9380b0000010010100010101000010000000000000, /* (W) (1|M0) grf<1>:f :f :f :f */9390b0000010010100010101010000000000000011, /* (8|M0) (sat)grf<1>:f :f :f :f */9400b0000010010000010101000010000000100011, /* (W) (8|M16) arf<1>:f :f :f :f */9410b0000010010000010101000010000000110011, /* (W) (8|M24) arf<1>:f :f :f :f */9420b0000010010100010101000010000000100011, /* (W) (8|M16) grf<1>:f :f :f :f */9430b0000010010100010101000010000000110011, /* (W) (8|M24) grf<1>:f :f :f :f */9440b0000010010000010101000000000000110011, /* (8|M24) arf<1>:f :f :f :f */9450b0000010010000010101000000000000100011, /* (8|M16) arf<1>:f :f :f :f */9460b0000000100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b */9470b0000000000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub {Atomic} */9480b0000100100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b {Atomic} */9490b0000100000111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub {Atomic} */9500b0000100100111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :b */9510b0000000000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :ub */9520b0000000100111110011000100000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :ub :b {Atomic} */9530b0000100000111110011000000000000000011, /* dpas.8x* (8|M0) grf<1>:d :d :b :ub */9540b0000101101111010101000100000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf {Atomic} */9550b0000101101111010101000000000000000011, /* dpas.8x* (8|M0) grf<1>:f :f :bf :bf */956};957958static const uint32_t gfx12_3src_source_index_table[32] = {9590b100101100001100000000, /* grf<0;0> grf<8;1> grf<0> */9600b100101100001001000010, /* arf<4;1> grf<8;1> grf<0> */9610b101101100001101000011, /* grf<8;1> grf<8;1> grf<1> */9620b100101100001101000011, /* grf<8;1> grf<8;1> grf<0> */9630b101100000000101000011, /* grf<8;1> grf<0;0> grf<1> */9640b101101100001101001011, /* -grf<8;1> grf<8;1> grf<1> */9650b101001100001101000011, /* grf<8;1> arf<8;1> grf<1> */9660b100001100001100000000, /* grf<0;0> arf<8;1> grf<0> */9670b101101100001100000000, /* grf<0;0> grf<8;1> grf<1> */9680b101101100101101000011, /* grf<8;1> grf<8;1> -grf<1> */9690b101101110001101000011, /* grf<8;1> -grf<8;1> grf<1> */9700b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */9710b100001100001101000011, /* grf<8;1> arf<8;1> grf<0> */9720b100101110001100000000, /* grf<0;0> -grf<8;1> grf<0> */9730b100101110001101000011, /* grf<8;1> -grf<8;1> grf<0> */9740b100101100001101001011, /* -grf<8;1> grf<8;1> grf<0> */9750b100100000000101000011, /* grf<8;1> grf<0;0> grf<0> */9760b100101100001100001000, /* -grf<0;0> grf<8;1> grf<0> */9770b100100000000100000000, /* grf<0;0> grf<0;0> grf<0> */9780b101101110001100000000, /* grf<0;0> -grf<8;1> grf<1> */9790b100101100101100000000, /* grf<0;0> grf<8;1> -grf<0> */9800b101001100001100000000, /* grf<0;0> arf<8;1> grf<1> */9810b100101100101101000011, /* grf<8;1> grf<8;1> -grf<0> */9820b101101100101101001011, /* -grf<8;1> grf<8;1> -grf<1> */9830b101001100001101001011, /* -grf<8;1> arf<8;1> grf<1> */9840b101101110001101001011, /* -grf<8;1> -grf<8;1> grf<1> */9850b101100010000101000011, /* grf<8;1> -grf<0;0> grf<1> */9860b101100000100101000011, /* grf<8;1> grf<0;0> -grf<1> */9870b101101100001100001000, /* -grf<0;0> grf<8;1> grf<1> */9880b101101100101100000000, /* grf<0;0> grf<8;1> -grf<1> */9890b100100000100101000011, /* grf<8;1> grf<0;0> -grf<0> */9900b101001100101101000011, /* grf<8;1> arf<8;1> -grf<1> */991};992993static const uint32_t xehp_3src_source_index_table[32] = {9940b100100000001100000000, /* grf<0;0> grf<1;0> grf<0> */9950b100100000001000000001, /* arf<1;0> grf<1;0> grf<0> */9960b101100000001100000001, /* grf<1;0> grf<1;0> grf<1> */9970b100100000001100000001, /* grf<1;0> grf<1;0> grf<0> */9980b101100000000100000001, /* grf<1;0> grf<0;0> grf<1> */9990b101100000001100001001, /* -grf<1;0> grf<1;0> grf<1> */10000b101000000001100000001, /* grf<1;0> arf<1;0> grf<1> */10010b101100000001100000000, /* grf<0;0> grf<1;0> grf<1> */10020b100000000001100000000, /* grf<0;0> arf<1;0> grf<0> */10030b101100000101100000001, /* grf<1;0> grf<1;0> -grf<1> */10040b101100010001100000001, /* grf<1;0> -grf<1;0> grf<1> */10050b101100000000100000000, /* grf<0;0> grf<0;0> grf<1> */10060b100000000001100000001, /* grf<1;0> arf<1;0> grf<0> */10070b100100010001100000000, /* grf<0;0> -grf<1;0> grf<0> */10080b100100010001100000001, /* grf<1;0> -grf<1;0> grf<0> */10090b100100000001100001001, /* -grf<1;0> grf<1;0> grf<0> */10100b100100000000100000001, /* grf<1;0> grf<0;0> grf<0> */10110b100100000001100001000, /* -grf<0;0> grf<1;0> grf<0> */10120b100100000000100000000, /* grf<0;0> grf<0;0> grf<0>1013* dpas.*x1 grf:d grf:[ub,b] grf:[ub,b]1014* dpas.*x1 grf:f grf:bf grf:bf1015*/10160b101100010001100000000, /* grf<0;0> -grf<1;0> grf<1> */10170b100100000101100000000, /* grf<0;0> grf<1;0> -grf<0> */10180b101000000001100000000, /* grf<0;0> arf<1;0> grf<1> */10190b100100000101100000001, /* grf<1;0> grf<1;0> -grf<0> */10200b101100000101100001001, /* -grf<1;0> grf<1;0> -grf<1> */10210b100100010000100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[ub,b] */10220b100100000100100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u2,s2] */10230b100100010100100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u2,s2] */10240b100100001000100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[ub,b] */10250b100100001100100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u2,s2] */10260b100100000010100000000, /* dpas.*x1 grf:d grf:[ub,b] grf:[u4,s4] */10270b100100001010100000000, /* dpas.*x1 grf:d grf:[u4,s4] grf:[u4,s4] */10280b100100010010100000000, /* dpas.*x1 grf:d grf:[u2,s2] grf:[u4,s4] */1029};10301031static const uint32_t gfx12_3src_subreg_table[32] = {10320b00000000000000000000, /* .0 .0 .0 .0 */10330b00100000000000000000, /* .0 .0 .0 .4 */10340b00000000000110000000, /* .0 .12 .0 .0 */10350b10100000000000000000, /* .0 .0 .0 .20 */10360b10000000001110000000, /* .0 .28 .0 .16 */10370b01100000000000000000, /* .0 .0 .0 .12 */10380b01000000000000000000, /* .0 .0 .0 .8 */10390b00000010000000000000, /* .0 .0 .8 .0 */10400b00000001000000000000, /* .0 .0 .4 .0 */10410b11000000000000000000, /* .0 .0 .0 .24 */10420b10000000000000000000, /* .0 .0 .0 .16 */10430b11100000000000000000, /* .0 .0 .0 .28 */10440b00000110000000000000, /* .0 .0 .24 .0 */10450b00000000000010000000, /* .0 .4 .0 .0 */10460b00000100000000000000, /* .0 .0 .16 .0 */10470b00000011000000000000, /* .0 .0 .12 .0 */10480b00000101000000000000, /* .0 .0 .20 .0 */10490b00000111000000000000, /* .0 .0 .28 .0 */10500b00000000000100000000, /* .0 .8 .0 .0 */10510b00000000001000000000, /* .0 .16 .0 .0 */10520b00000000001100000000, /* .0 .24 .0 .0 */10530b00000000001010000000, /* .0 .20 .0 .0 */10540b00000000001110000000, /* .0 .28 .0 .0 */10550b11000000001110000000, /* .0 .28 .0 .24 */10560b00100000000100000000, /* .0 .8 .0 .4 */10570b00100000000110000000, /* .0 .12 .0 .4 */10580b01000000000110000000, /* .0 .12 .0 .8 */10590b10000000001100000000, /* .0 .24 .0 .16 */10600b10000000001010000000, /* .0 .20 .0 .16 */10610b01100000000010000000, /* .0 .4 .0 .12 */10620b10100000001110000000, /* .0 .28 .0 .20 */10630b01000000000010000000, /* .0 .4 .0 .8 */1064};10651066struct compaction_state {1067const struct intel_device_info *devinfo;1068const uint32_t *control_index_table;1069const uint32_t *datatype_table;1070const uint16_t *subreg_table;1071const uint16_t *src0_index_table;1072const uint16_t *src1_index_table;1073};10741075static void compaction_state_init(struct compaction_state *c,1076const struct intel_device_info *devinfo);10771078static bool1079set_control_index(const struct compaction_state *c,1080brw_compact_inst *dst, const brw_inst *src)1081{1082const struct intel_device_info *devinfo = c->devinfo;1083uint32_t uncompacted; /* 17b/G45; 19b/IVB+; 21b/TGL+ */10841085if (devinfo->ver >= 12) {1086uncompacted = (brw_inst_bits(src, 95, 92) << 17) | /* 4b */1087(brw_inst_bits(src, 34, 34) << 16) | /* 1b */1088(brw_inst_bits(src, 33, 33) << 15) | /* 1b */1089(brw_inst_bits(src, 32, 32) << 14) | /* 1b */1090(brw_inst_bits(src, 31, 31) << 13) | /* 1b */1091(brw_inst_bits(src, 28, 28) << 12) | /* 1b */1092(brw_inst_bits(src, 27, 24) << 8) | /* 4b */1093(brw_inst_bits(src, 23, 22) << 6) | /* 2b */1094(brw_inst_bits(src, 21, 19) << 3) | /* 3b */1095(brw_inst_bits(src, 18, 16)); /* 3b */1096} else if (devinfo->ver >= 8) {1097uncompacted = (brw_inst_bits(src, 33, 31) << 16) | /* 3b */1098(brw_inst_bits(src, 23, 12) << 4) | /* 12b */1099(brw_inst_bits(src, 10, 9) << 2) | /* 2b */1100(brw_inst_bits(src, 34, 34) << 1) | /* 1b */1101(brw_inst_bits(src, 8, 8)); /* 1b */1102} else {1103uncompacted = (brw_inst_bits(src, 31, 31) << 16) | /* 1b */1104(brw_inst_bits(src, 23, 8)); /* 16b */11051106/* On gfx7, the flag register and subregister numbers are integrated into1107* the control index.1108*/1109if (devinfo->ver == 7)1110uncompacted |= brw_inst_bits(src, 90, 89) << 17; /* 2b */1111}11121113for (int i = 0; i < 32; i++) {1114if (c->control_index_table[i] == uncompacted) {1115brw_compact_inst_set_control_index(devinfo, dst, i);1116return true;1117}1118}11191120return false;1121}11221123static bool1124set_datatype_index(const struct compaction_state *c, brw_compact_inst *dst,1125const brw_inst *src, bool is_immediate)1126{1127const struct intel_device_info *devinfo = c->devinfo;1128uint32_t uncompacted; /* 18b/G45+; 21b/BDW+; 20b/TGL+ */11291130if (devinfo->ver >= 12) {1131uncompacted = (brw_inst_bits(src, 91, 88) << 15) | /* 4b */1132(brw_inst_bits(src, 66, 66) << 14) | /* 1b */1133(brw_inst_bits(src, 50, 50) << 13) | /* 1b */1134(brw_inst_bits(src, 49, 48) << 11) | /* 2b */1135(brw_inst_bits(src, 47, 47) << 10) | /* 1b */1136(brw_inst_bits(src, 46, 46) << 9) | /* 1b */1137(brw_inst_bits(src, 43, 40) << 5) | /* 4b */1138(brw_inst_bits(src, 39, 36) << 1) | /* 4b */1139(brw_inst_bits(src, 35, 35)); /* 1b */11401141/* Src1.RegFile overlaps with the immediate, so ignore it if an immediate1142* is present1143*/1144if (!is_immediate) {1145uncompacted |= brw_inst_bits(src, 98, 98) << 19; /* 1b */1146}1147} else if (devinfo->ver >= 8) {1148uncompacted = (brw_inst_bits(src, 63, 61) << 18) | /* 3b */1149(brw_inst_bits(src, 94, 89) << 12) | /* 6b */1150(brw_inst_bits(src, 46, 35)); /* 12b */1151} else {1152uncompacted = (brw_inst_bits(src, 63, 61) << 15) | /* 3b */1153(brw_inst_bits(src, 46, 32)); /* 15b */1154}11551156for (int i = 0; i < 32; i++) {1157if (c->datatype_table[i] == uncompacted) {1158brw_compact_inst_set_datatype_index(devinfo, dst, i);1159return true;1160}1161}11621163return false;1164}11651166static bool1167set_subreg_index(const struct compaction_state *c, brw_compact_inst *dst,1168const brw_inst *src, bool is_immediate)1169{1170const struct intel_device_info *devinfo = c->devinfo;1171uint16_t uncompacted; /* 15b */11721173if (devinfo->ver >= 12) {1174uncompacted = (brw_inst_bits(src, 55, 51) << 0) | /* 5b */1175(brw_inst_bits(src, 71, 67) << 5); /* 5b */11761177if (!is_immediate)1178uncompacted |= brw_inst_bits(src, 103, 99) << 10; /* 5b */1179} else {1180uncompacted = (brw_inst_bits(src, 52, 48) << 0) | /* 5b */1181(brw_inst_bits(src, 68, 64) << 5); /* 5b */11821183if (!is_immediate)1184uncompacted |= brw_inst_bits(src, 100, 96) << 10; /* 5b */1185}11861187for (int i = 0; i < 32; i++) {1188if (c->subreg_table[i] == uncompacted) {1189brw_compact_inst_set_subreg_index(devinfo, dst, i);1190return true;1191}1192}11931194return false;1195}11961197static bool1198set_src0_index(const struct compaction_state *c, brw_compact_inst *dst,1199const brw_inst *src)1200{1201const struct intel_device_info *devinfo = c->devinfo;1202uint16_t uncompacted; /* 12b */1203int table_len;12041205if (devinfo->ver >= 12) {1206table_len = ARRAY_SIZE(gfx12_src0_index_table);1207uncompacted = (brw_inst_bits(src, 87, 84) << 8) | /* 4b */1208(brw_inst_bits(src, 83, 81) << 5) | /* 3b */1209(brw_inst_bits(src, 80, 80) << 4) | /* 1b */1210(brw_inst_bits(src, 65, 64) << 2) | /* 2b */1211(brw_inst_bits(src, 45, 44)); /* 2b */1212} else {1213table_len = ARRAY_SIZE(gfx8_src_index_table);1214uncompacted = brw_inst_bits(src, 88, 77); /* 12b */1215}12161217for (int i = 0; i < table_len; i++) {1218if (c->src0_index_table[i] == uncompacted) {1219brw_compact_inst_set_src0_index(devinfo, dst, i);1220return true;1221}1222}12231224return false;1225}12261227static bool1228set_src1_index(const struct compaction_state *c, brw_compact_inst *dst,1229const brw_inst *src, bool is_immediate, unsigned imm)1230{1231const struct intel_device_info *devinfo = c->devinfo;1232if (is_immediate) {1233if (devinfo->ver >= 12) {1234/* src1 index takes the low 4 bits of the 12-bit compacted value */1235brw_compact_inst_set_src1_index(devinfo, dst, imm & 0xf);1236} else {1237/* src1 index takes the high 5 bits of the 13-bit compacted value */1238brw_compact_inst_set_src1_index(devinfo, dst, imm >> 8);1239}1240return true;1241} else {1242uint16_t uncompacted; /* 12b */1243int table_len;12441245if (devinfo->ver >= 12) {1246table_len = ARRAY_SIZE(gfx12_src0_index_table);1247uncompacted = (brw_inst_bits(src, 121, 120) << 10) | /* 2b */1248(brw_inst_bits(src, 119, 116) << 6) | /* 4b */1249(brw_inst_bits(src, 115, 113) << 3) | /* 3b */1250(brw_inst_bits(src, 112, 112) << 2) | /* 1b */1251(brw_inst_bits(src, 97, 96)); /* 2b */1252} else {1253table_len = ARRAY_SIZE(gfx8_src_index_table);1254uncompacted = brw_inst_bits(src, 120, 109); /* 12b */1255}12561257for (int i = 0; i < table_len; i++) {1258if (c->src1_index_table[i] == uncompacted) {1259brw_compact_inst_set_src1_index(devinfo, dst, i);1260return true;1261}1262}1263}12641265return false;1266}12671268static bool1269set_3src_control_index(const struct intel_device_info *devinfo,1270brw_compact_inst *dst, const brw_inst *src)1271{1272assert(devinfo->ver >= 8);12731274if (devinfo->verx10 >= 125) {1275uint64_t uncompacted = /* 37b/XeHP+ */1276(brw_inst_bits(src, 95, 92) << 33) | /* 4b */1277(brw_inst_bits(src, 90, 88) << 30) | /* 3b */1278(brw_inst_bits(src, 82, 80) << 27) | /* 3b */1279(brw_inst_bits(src, 50, 50) << 26) | /* 1b */1280(brw_inst_bits(src, 49, 48) << 24) | /* 2b */1281(brw_inst_bits(src, 42, 40) << 21) | /* 3b */1282(brw_inst_bits(src, 39, 39) << 20) | /* 1b */1283(brw_inst_bits(src, 38, 36) << 17) | /* 3b */1284(brw_inst_bits(src, 34, 34) << 16) | /* 1b */1285(brw_inst_bits(src, 33, 33) << 15) | /* 1b */1286(brw_inst_bits(src, 32, 32) << 14) | /* 1b */1287(brw_inst_bits(src, 31, 31) << 13) | /* 1b */1288(brw_inst_bits(src, 28, 28) << 12) | /* 1b */1289(brw_inst_bits(src, 27, 24) << 8) | /* 4b */1290(brw_inst_bits(src, 23, 23) << 7) | /* 1b */1291(brw_inst_bits(src, 22, 22) << 6) | /* 1b */1292(brw_inst_bits(src, 21, 19) << 3) | /* 3b */1293(brw_inst_bits(src, 18, 16)); /* 3b */12941295for (unsigned i = 0; i < ARRAY_SIZE(xehp_3src_control_index_table); i++) {1296if (xehp_3src_control_index_table[i] == uncompacted) {1297brw_compact_inst_set_3src_control_index(devinfo, dst, i);1298return true;1299}1300}1301} else if (devinfo->ver >= 12) {1302uint64_t uncompacted = /* 36b/TGL+ */1303(brw_inst_bits(src, 95, 92) << 32) | /* 4b */1304(brw_inst_bits(src, 90, 88) << 29) | /* 3b */1305(brw_inst_bits(src, 82, 80) << 26) | /* 3b */1306(brw_inst_bits(src, 50, 50) << 25) | /* 1b */1307(brw_inst_bits(src, 48, 48) << 24) | /* 1b */1308(brw_inst_bits(src, 42, 40) << 21) | /* 3b */1309(brw_inst_bits(src, 39, 39) << 20) | /* 1b */1310(brw_inst_bits(src, 38, 36) << 17) | /* 3b */1311(brw_inst_bits(src, 34, 34) << 16) | /* 1b */1312(brw_inst_bits(src, 33, 33) << 15) | /* 1b */1313(brw_inst_bits(src, 32, 32) << 14) | /* 1b */1314(brw_inst_bits(src, 31, 31) << 13) | /* 1b */1315(brw_inst_bits(src, 28, 28) << 12) | /* 1b */1316(brw_inst_bits(src, 27, 24) << 8) | /* 4b */1317(brw_inst_bits(src, 23, 23) << 7) | /* 1b */1318(brw_inst_bits(src, 22, 22) << 6) | /* 1b */1319(brw_inst_bits(src, 21, 19) << 3) | /* 3b */1320(brw_inst_bits(src, 18, 16)); /* 3b */13211322for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_control_index_table); i++) {1323if (gfx12_3src_control_index_table[i] == uncompacted) {1324brw_compact_inst_set_3src_control_index(devinfo, dst, i);1325return true;1326}1327}1328} else {1329uint32_t uncompacted = /* 24b/BDW; 26b/CHV/SKL+ */1330(brw_inst_bits(src, 34, 32) << 21) | /* 3b */1331(brw_inst_bits(src, 28, 8)); /* 21b */13321333if (devinfo->ver >= 9 || devinfo->is_cherryview) {1334uncompacted |=1335brw_inst_bits(src, 36, 35) << 24; /* 2b */1336}13371338for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_control_index_table); i++) {1339if (gfx8_3src_control_index_table[i] == uncompacted) {1340brw_compact_inst_set_3src_control_index(devinfo, dst, i);1341return true;1342}1343}1344}13451346return false;1347}13481349static bool1350set_3src_source_index(const struct intel_device_info *devinfo,1351brw_compact_inst *dst, const brw_inst *src)1352{1353assert(devinfo->ver >= 8);13541355if (devinfo->ver >= 12) {1356uint32_t uncompacted = /* 21b/TGL+ */1357(brw_inst_bits(src, 114, 114) << 20) | /* 1b */1358(brw_inst_bits(src, 113, 112) << 18) | /* 2b */1359(brw_inst_bits(src, 98, 98) << 17) | /* 1b */1360(brw_inst_bits(src, 97, 96) << 15) | /* 2b */1361(brw_inst_bits(src, 91, 91) << 14) | /* 1b */1362(brw_inst_bits(src, 87, 86) << 12) | /* 2b */1363(brw_inst_bits(src, 85, 84) << 10) | /* 2b */1364(brw_inst_bits(src, 83, 83) << 9) | /* 1b */1365(brw_inst_bits(src, 66, 66) << 8) | /* 1b */1366(brw_inst_bits(src, 65, 64) << 6) | /* 2b */1367(brw_inst_bits(src, 47, 47) << 5) | /* 1b */1368(brw_inst_bits(src, 46, 46) << 4) | /* 1b */1369(brw_inst_bits(src, 45, 44) << 2) | /* 2b */1370(brw_inst_bits(src, 43, 43) << 1) | /* 1b */1371(brw_inst_bits(src, 35, 35)); /* 1b */13721373const uint32_t *three_src_source_index_table =1374devinfo->verx10 >= 125 ?1375xehp_3src_source_index_table : gfx12_3src_source_index_table;1376const uint32_t three_src_source_index_table_len =1377devinfo->verx10 >= 125 ? ARRAY_SIZE(xehp_3src_source_index_table) :1378ARRAY_SIZE(gfx12_3src_source_index_table);13791380for (unsigned i = 0; i < three_src_source_index_table_len; i++) {1381if (three_src_source_index_table[i] == uncompacted) {1382brw_compact_inst_set_3src_source_index(devinfo, dst, i);1383return true;1384}1385}1386} else {1387uint64_t uncompacted = /* 46b/BDW; 49b/CHV/SKL+ */1388(brw_inst_bits(src, 83, 83) << 43) | /* 1b */1389(brw_inst_bits(src, 114, 107) << 35) | /* 8b */1390(brw_inst_bits(src, 93, 86) << 27) | /* 8b */1391(brw_inst_bits(src, 72, 65) << 19) | /* 8b */1392(brw_inst_bits(src, 55, 37)); /* 19b */13931394if (devinfo->ver >= 9 || devinfo->is_cherryview) {1395uncompacted |=1396(brw_inst_bits(src, 126, 125) << 47) | /* 2b */1397(brw_inst_bits(src, 105, 104) << 45) | /* 2b */1398(brw_inst_bits(src, 84, 84) << 44); /* 1b */1399} else {1400uncompacted |=1401(brw_inst_bits(src, 125, 125) << 45) | /* 1b */1402(brw_inst_bits(src, 104, 104) << 44); /* 1b */1403}14041405for (unsigned i = 0; i < ARRAY_SIZE(gfx8_3src_source_index_table); i++) {1406if (gfx8_3src_source_index_table[i] == uncompacted) {1407brw_compact_inst_set_3src_source_index(devinfo, dst, i);1408return true;1409}1410}1411}14121413return false;1414}14151416static bool1417set_3src_subreg_index(const struct intel_device_info *devinfo,1418brw_compact_inst *dst, const brw_inst *src)1419{1420assert(devinfo->ver >= 12);14211422uint32_t uncompacted = /* 20b/TGL+ */1423(brw_inst_bits(src, 119, 115) << 15) | /* 5b */1424(brw_inst_bits(src, 103, 99) << 10) | /* 5b */1425(brw_inst_bits(src, 71, 67) << 5) | /* 5b */1426(brw_inst_bits(src, 55, 51)); /* 5b */14271428for (unsigned i = 0; i < ARRAY_SIZE(gfx12_3src_subreg_table); i++) {1429if (gfx12_3src_subreg_table[i] == uncompacted) {1430brw_compact_inst_set_3src_subreg_index(devinfo, dst, i);1431return true;1432}1433}14341435return false;1436}14371438static bool1439has_unmapped_bits(const struct intel_device_info *devinfo, const brw_inst *src)1440{1441/* EOT can only be mapped on a send if the src1 is an immediate */1442if ((brw_inst_opcode(devinfo, src) == BRW_OPCODE_SENDC ||1443brw_inst_opcode(devinfo, src) == BRW_OPCODE_SEND) &&1444brw_inst_eot(devinfo, src))1445return true;14461447/* Check for instruction bits that don't map to any of the fields of the1448* compacted instruction. The instruction cannot be compacted if any of1449* them are set. They overlap with:1450* - NibCtrl (bit 47 on Gfx7, bit 11 on Gfx8)1451* - Dst.AddrImm[9] (bit 47 on Gfx8)1452* - Src0.AddrImm[9] (bit 95 on Gfx8)1453* - Imm64[27:31] (bits 91-95 on Gfx7, bit 95 on Gfx8)1454* - UIP[31] (bit 95 on Gfx8)1455*/1456if (devinfo->ver >= 12) {1457assert(!brw_inst_bits(src, 7, 7));1458return false;1459} else if (devinfo->ver >= 8) {1460assert(!brw_inst_bits(src, 7, 7));1461return brw_inst_bits(src, 95, 95) ||1462brw_inst_bits(src, 47, 47) ||1463brw_inst_bits(src, 11, 11);1464} else {1465assert(!brw_inst_bits(src, 7, 7) &&1466!(devinfo->ver < 7 && brw_inst_bits(src, 90, 90)));1467return brw_inst_bits(src, 95, 91) ||1468brw_inst_bits(src, 47, 47);1469}1470}14711472static bool1473has_3src_unmapped_bits(const struct intel_device_info *devinfo,1474const brw_inst *src)1475{1476/* Check for three-source instruction bits that don't map to any of the1477* fields of the compacted instruction. All of them seem to be reserved1478* bits currently.1479*/1480if (devinfo->ver >= 12) {1481assert(!brw_inst_bits(src, 7, 7));1482} else if (devinfo->ver >= 9 || devinfo->is_cherryview) {1483assert(!brw_inst_bits(src, 127, 127) &&1484!brw_inst_bits(src, 7, 7));1485} else {1486assert(devinfo->ver >= 8);1487assert(!brw_inst_bits(src, 127, 126) &&1488!brw_inst_bits(src, 105, 105) &&1489!brw_inst_bits(src, 84, 84) &&1490!brw_inst_bits(src, 7, 7));14911492/* Src1Type and Src2Type, used for mixed-precision floating point */1493if (brw_inst_bits(src, 36, 35))1494return true;1495}14961497return false;1498}14991500static bool1501brw_try_compact_3src_instruction(const struct intel_device_info *devinfo,1502brw_compact_inst *dst, const brw_inst *src)1503{1504assert(devinfo->ver >= 8);15051506if (has_3src_unmapped_bits(devinfo, src))1507return false;15081509#define compact(field) \1510brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_##field(devinfo, src))1511#define compact_a16(field) \1512brw_compact_inst_set_3src_##field(devinfo, dst, brw_inst_3src_a16_##field(devinfo, src))15131514compact(hw_opcode);15151516if (!set_3src_control_index(devinfo, dst, src))1517return false;15181519if (!set_3src_source_index(devinfo, dst, src))1520return false;15211522if (devinfo->ver >= 12) {1523if (!set_3src_subreg_index(devinfo, dst, src))1524return false;15251526compact(swsb);1527compact(debug_control);1528compact(dst_reg_nr);1529compact(src0_reg_nr);1530compact(src1_reg_nr);1531compact(src2_reg_nr);1532} else {1533compact(dst_reg_nr);1534compact_a16(src0_rep_ctrl);1535compact(debug_control);1536compact(saturate);1537compact_a16(src1_rep_ctrl);1538compact_a16(src2_rep_ctrl);1539compact(src0_reg_nr);1540compact(src1_reg_nr);1541compact(src2_reg_nr);1542compact_a16(src0_subreg_nr);1543compact_a16(src1_subreg_nr);1544compact_a16(src2_subreg_nr);1545}1546brw_compact_inst_set_3src_cmpt_control(devinfo, dst, true);15471548#undef compact1549#undef compact_a1615501551return true;1552}15531554/* On SNB through ICL, compacted instructions have 12-bits for immediate1555* sources, and a 13th bit that's replicated through the high 20 bits.1556*1557* Effectively this means we get 12-bit integers, 0.0f, and some limited uses1558* of packed vectors as compactable immediates.1559*1560* On TGL+, the high 12-bits of floating-point values (:f and :hf) are encoded1561* rather than the low 12-bits. For signed integer the 12th bit is replicated,1562* while for unsigned integers it is not.1563*1564* Returns the compacted immediate, or -1 if immediate cannot be compacted1565*/1566static int1567compact_immediate(const struct intel_device_info *devinfo,1568enum brw_reg_type type, unsigned imm)1569{1570if (devinfo->ver >= 12) {1571/* 16-bit immediates need to be replicated through the 32-bit immediate1572* field1573*/1574switch (type) {1575case BRW_REGISTER_TYPE_W:1576case BRW_REGISTER_TYPE_UW:1577case BRW_REGISTER_TYPE_HF:1578if ((imm >> 16) != (imm & 0xffff))1579return -1;1580break;1581default:1582break;1583}15841585switch (type) {1586case BRW_REGISTER_TYPE_F:1587/* We get the high 12-bits as-is; rest must be zero */1588if ((imm & 0xfffff) == 0)1589return (imm >> 20) & 0xfff;1590break;1591case BRW_REGISTER_TYPE_HF:1592/* We get the high 12-bits as-is; rest must be zero */1593if ((imm & 0xf) == 0)1594return (imm >> 4) & 0xfff;1595break;1596case BRW_REGISTER_TYPE_UD:1597case BRW_REGISTER_TYPE_VF:1598case BRW_REGISTER_TYPE_UV:1599case BRW_REGISTER_TYPE_V:1600/* We get the low 12-bits as-is; rest must be zero */1601if ((imm & 0xfffff000) == 0)1602return imm & 0xfff;1603break;1604case BRW_REGISTER_TYPE_UW:1605/* We get the low 12-bits as-is; rest must be zero */1606if ((imm & 0xf000) == 0)1607return imm & 0xfff;1608break;1609case BRW_REGISTER_TYPE_D:1610/* We get the low 11-bits as-is; 12th is replicated */1611if (((int)imm >> 11) == 0 || ((int)imm >> 11) == -1)1612return imm & 0xfff;1613break;1614case BRW_REGISTER_TYPE_W:1615/* We get the low 11-bits as-is; 12th is replicated */1616if (((short)imm >> 11) == 0 || ((short)imm >> 11) == -1)1617return imm & 0xfff;1618break;1619case BRW_REGISTER_TYPE_NF:1620case BRW_REGISTER_TYPE_DF:1621case BRW_REGISTER_TYPE_Q:1622case BRW_REGISTER_TYPE_UQ:1623case BRW_REGISTER_TYPE_B:1624case BRW_REGISTER_TYPE_UB:1625return -1;1626}1627} else {1628/* We get the low 12 bits as-is; 13th is replicated */1629if (((int)imm >> 12) == 0 || ((int)imm >> 12 == -1)) {1630return imm & 0x1fff;1631}1632}16331634return -1;1635}16361637static int1638uncompact_immediate(const struct intel_device_info *devinfo,1639enum brw_reg_type type, unsigned compact_imm)1640{1641if (devinfo->ver >= 12) {1642switch (type) {1643case BRW_REGISTER_TYPE_F:1644return compact_imm << 20;1645case BRW_REGISTER_TYPE_HF:1646return (compact_imm << 20) | (compact_imm << 4);1647case BRW_REGISTER_TYPE_UD:1648case BRW_REGISTER_TYPE_VF:1649case BRW_REGISTER_TYPE_UV:1650case BRW_REGISTER_TYPE_V:1651return compact_imm;1652case BRW_REGISTER_TYPE_UW:1653/* Replicate */1654return compact_imm << 16 | compact_imm;1655case BRW_REGISTER_TYPE_D:1656/* Extend the 12th bit into the high 20 bits */1657return (int)(compact_imm << 20) >> 20;1658case BRW_REGISTER_TYPE_W:1659/* Extend the 12th bit into the high 4 bits and replicate */1660return ( (int)(compact_imm << 20) >> 4) |1661((short)(compact_imm << 4) >> 4);1662case BRW_REGISTER_TYPE_NF:1663case BRW_REGISTER_TYPE_DF:1664case BRW_REGISTER_TYPE_Q:1665case BRW_REGISTER_TYPE_UQ:1666case BRW_REGISTER_TYPE_B:1667case BRW_REGISTER_TYPE_UB:1668unreachable("not reached");1669}1670} else {1671/* Replicate the 13th bit into the high 19 bits */1672return (int)(compact_imm << 19) >> 19;1673}16741675unreachable("not reached");1676}16771678static bool1679has_immediate(const struct intel_device_info *devinfo, const brw_inst *inst,1680enum brw_reg_type *type)1681{1682if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {1683*type = brw_inst_src0_type(devinfo, inst);1684return *type != INVALID_REG_TYPE;1685} else if (brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) {1686*type = brw_inst_src1_type(devinfo, inst);1687return *type != INVALID_REG_TYPE;1688}16891690return false;1691}16921693/**1694* Applies some small changes to instruction types to increase chances of1695* compaction.1696*/1697static brw_inst1698precompact(const struct intel_device_info *devinfo, brw_inst inst)1699{1700if (brw_inst_src0_reg_file(devinfo, &inst) != BRW_IMMEDIATE_VALUE)1701return inst;17021703/* The Bspec's section titled "Non-present Operands" claims that if src01704* is an immediate that src1's type must be the same as that of src0.1705*1706* The SNB+ DataTypeIndex instruction compaction tables contain mappings1707* that do not follow this rule. E.g., from the IVB/HSW table:1708*1709* DataTypeIndex 18-Bit Mapping Mapped Meaning1710* 3 001000001011111101 r:f | i:vf | a:ud | <1> | dir |1711*1712* And from the SNB table:1713*1714* DataTypeIndex 18-Bit Mapping Mapped Meaning1715* 8 001000000111101100 a:w | i:w | a:ud | <1> | dir |1716*1717* Neither of these cause warnings from the simulator when used,1718* compacted or otherwise. In fact, all compaction mappings that have an1719* immediate in src0 use a:ud for src1.1720*1721* The GM45 instruction compaction tables do not contain mapped meanings1722* so it's not clear whether it has the restriction. We'll assume it was1723* lifted on SNB. (FINISHME: decode the GM45 tables and check.)1724*1725* Don't do any of this for 64-bit immediates, since the src1 fields1726* overlap with the immediate and setting them would overwrite the1727* immediate we set.1728*/1729if (devinfo->ver >= 6 &&1730!(devinfo->is_haswell &&1731brw_inst_opcode(devinfo, &inst) == BRW_OPCODE_DIM) &&1732!(devinfo->ver >= 8 &&1733(brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_DF ||1734brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_UQ ||1735brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_Q))) {1736brw_inst_set_src1_reg_hw_type(devinfo, &inst, 0);1737}17381739/* Compacted instructions only have 12-bits (plus 1 for the other 20)1740* for immediate values. Presumably the hardware engineers realized1741* that the only useful floating-point value that could be represented1742* in this format is 0.0, which can also be represented as a VF-typed1743* immediate, so they gave us the previously mentioned mapping on IVB+.1744*1745* Strangely, we do have a mapping for imm:f in src1, so we don't need1746* to do this there.1747*1748* If we see a 0.0:F, change the type to VF so that it can be compacted.1749*1750* Compaction of floating-point immediates is improved on Gfx12, thus1751* removing the need for this.1752*/1753if (devinfo->ver < 12 &&1754brw_inst_imm_ud(devinfo, &inst) == 0x0 &&1755brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&1756brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_F &&1757brw_inst_dst_hstride(devinfo, &inst) == BRW_HORIZONTAL_STRIDE_1) {1758enum brw_reg_file file = brw_inst_src0_reg_file(devinfo, &inst);1759brw_inst_set_src0_file_type(devinfo, &inst, file, BRW_REGISTER_TYPE_VF);1760}17611762/* There are no mappings for dst:d | i:d, so if the immediate is suitable1763* set the types to :UD so the instruction can be compacted.1764*1765* FINISHME: Use dst:f | imm:f on Gfx121766*/1767if (devinfo->ver < 12 &&1768compact_immediate(devinfo, BRW_REGISTER_TYPE_D,1769brw_inst_imm_ud(devinfo, &inst)) != -1 &&1770brw_inst_cond_modifier(devinfo, &inst) == BRW_CONDITIONAL_NONE &&1771brw_inst_src0_type(devinfo, &inst) == BRW_REGISTER_TYPE_D &&1772brw_inst_dst_type(devinfo, &inst) == BRW_REGISTER_TYPE_D) {1773enum brw_reg_file src_file = brw_inst_src0_reg_file(devinfo, &inst);1774enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, &inst);17751776brw_inst_set_src0_file_type(devinfo, &inst, src_file, BRW_REGISTER_TYPE_UD);1777brw_inst_set_dst_file_type(devinfo, &inst, dst_file, BRW_REGISTER_TYPE_UD);1778}17791780return inst;1781}17821783/**1784* Tries to compact instruction src into dst.1785*1786* It doesn't modify dst unless src is compactable, which is relied on by1787* brw_compact_instructions().1788*/1789static bool1790try_compact_instruction(const struct compaction_state *c,1791brw_compact_inst *dst, const brw_inst *src)1792{1793const struct intel_device_info *devinfo = c->devinfo;1794brw_compact_inst temp;17951796assert(brw_inst_cmpt_control(devinfo, src) == 0);17971798if (is_3src(devinfo, brw_inst_opcode(devinfo, src))) {1799if (devinfo->ver >= 8) {1800memset(&temp, 0, sizeof(temp));1801if (brw_try_compact_3src_instruction(devinfo, &temp, src)) {1802*dst = temp;1803return true;1804} else {1805return false;1806}1807} else {1808return false;1809}1810}18111812enum brw_reg_type type;1813bool is_immediate = has_immediate(devinfo, src, &type);18141815unsigned compacted_imm = 0;18161817if (is_immediate) {1818/* Instructions with immediates cannot be compacted on Gen < 6 */1819if (devinfo->ver < 6)1820return false;18211822compacted_imm = compact_immediate(devinfo, type,1823brw_inst_imm_ud(devinfo, src));1824if (compacted_imm == -1)1825return false;1826}18271828if (has_unmapped_bits(devinfo, src))1829return false;18301831memset(&temp, 0, sizeof(temp));18321833#define compact(field) \1834brw_compact_inst_set_##field(devinfo, &temp, brw_inst_##field(devinfo, src))1835#define compact_reg(field) \1836brw_compact_inst_set_##field##_reg_nr(devinfo, &temp, \1837brw_inst_##field##_da_reg_nr(devinfo, src))18381839compact(hw_opcode);1840compact(debug_control);18411842if (!set_control_index(c, &temp, src))1843return false;1844if (!set_datatype_index(c, &temp, src, is_immediate))1845return false;1846if (!set_subreg_index(c, &temp, src, is_immediate))1847return false;1848if (!set_src0_index(c, &temp, src))1849return false;1850if (!set_src1_index(c, &temp, src, is_immediate, compacted_imm))1851return false;18521853if (devinfo->ver >= 12) {1854compact(swsb);1855compact_reg(dst);1856compact_reg(src0);18571858if (is_immediate) {1859/* src1 reg takes the high 8 bits (of the 12-bit compacted value) */1860brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm >> 4);1861} else {1862compact_reg(src1);1863}1864} else {1865if (devinfo->ver >= 6) {1866compact(acc_wr_control);1867} else {1868compact(mask_control_ex);1869}18701871if (devinfo->ver <= 6)1872compact(flag_subreg_nr);18731874compact(cond_modifier);18751876compact_reg(dst);1877compact_reg(src0);18781879if (is_immediate) {1880/* src1 reg takes the low 8 bits (of the 13-bit compacted value) */1881brw_compact_inst_set_src1_reg_nr(devinfo, &temp, compacted_imm & 0xff);1882} else {1883compact_reg(src1);1884}1885}1886brw_compact_inst_set_cmpt_control(devinfo, &temp, true);18871888#undef compact1889#undef compact_reg18901891*dst = temp;18921893return true;1894}18951896bool1897brw_try_compact_instruction(const struct intel_device_info *devinfo,1898brw_compact_inst *dst, const brw_inst *src)1899{1900struct compaction_state c;1901compaction_state_init(&c, devinfo);1902return try_compact_instruction(&c, dst, src);1903}19041905static void1906set_uncompacted_control(const struct compaction_state *c, brw_inst *dst,1907brw_compact_inst *src)1908{1909const struct intel_device_info *devinfo = c->devinfo;1910uint32_t uncompacted =1911c->control_index_table[brw_compact_inst_control_index(devinfo, src)];19121913if (devinfo->ver >= 12) {1914brw_inst_set_bits(dst, 95, 92, (uncompacted >> 17));1915brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);1916brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);1917brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);1918brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);1919brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);1920brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);1921brw_inst_set_bits(dst, 23, 22, (uncompacted >> 6) & 0x3);1922brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);1923brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);1924} else if (devinfo->ver >= 8) {1925brw_inst_set_bits(dst, 33, 31, (uncompacted >> 16));1926brw_inst_set_bits(dst, 23, 12, (uncompacted >> 4) & 0xfff);1927brw_inst_set_bits(dst, 10, 9, (uncompacted >> 2) & 0x3);1928brw_inst_set_bits(dst, 34, 34, (uncompacted >> 1) & 0x1);1929brw_inst_set_bits(dst, 8, 8, (uncompacted >> 0) & 0x1);1930} else {1931brw_inst_set_bits(dst, 31, 31, (uncompacted >> 16) & 0x1);1932brw_inst_set_bits(dst, 23, 8, (uncompacted & 0xffff));19331934if (devinfo->ver == 7)1935brw_inst_set_bits(dst, 90, 89, uncompacted >> 17);1936}1937}19381939static void1940set_uncompacted_datatype(const struct compaction_state *c, brw_inst *dst,1941brw_compact_inst *src)1942{1943const struct intel_device_info *devinfo = c->devinfo;1944uint32_t uncompacted =1945c->datatype_table[brw_compact_inst_datatype_index(devinfo, src)];19461947if (devinfo->ver >= 12) {1948brw_inst_set_bits(dst, 98, 98, (uncompacted >> 19));1949brw_inst_set_bits(dst, 91, 88, (uncompacted >> 15) & 0xf);1950brw_inst_set_bits(dst, 66, 66, (uncompacted >> 14) & 0x1);1951brw_inst_set_bits(dst, 50, 50, (uncompacted >> 13) & 0x1);1952brw_inst_set_bits(dst, 49, 48, (uncompacted >> 11) & 0x3);1953brw_inst_set_bits(dst, 47, 47, (uncompacted >> 10) & 0x1);1954brw_inst_set_bits(dst, 46, 46, (uncompacted >> 9) & 0x1);1955brw_inst_set_bits(dst, 43, 40, (uncompacted >> 5) & 0xf);1956brw_inst_set_bits(dst, 39, 36, (uncompacted >> 1) & 0xf);1957brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);1958} else if (devinfo->ver >= 8) {1959brw_inst_set_bits(dst, 63, 61, (uncompacted >> 18));1960brw_inst_set_bits(dst, 94, 89, (uncompacted >> 12) & 0x3f);1961brw_inst_set_bits(dst, 46, 35, (uncompacted >> 0) & 0xfff);1962} else {1963brw_inst_set_bits(dst, 63, 61, (uncompacted >> 15));1964brw_inst_set_bits(dst, 46, 32, (uncompacted & 0x7fff));1965}1966}19671968static void1969set_uncompacted_subreg(const struct compaction_state *c, brw_inst *dst,1970brw_compact_inst *src)1971{1972const struct intel_device_info *devinfo = c->devinfo;1973uint16_t uncompacted =1974c->subreg_table[brw_compact_inst_subreg_index(devinfo, src)];19751976if (devinfo->ver >= 12) {1977brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10));1978brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);1979brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);1980} else {1981brw_inst_set_bits(dst, 100, 96, (uncompacted >> 10));1982brw_inst_set_bits(dst, 68, 64, (uncompacted >> 5) & 0x1f);1983brw_inst_set_bits(dst, 52, 48, (uncompacted >> 0) & 0x1f);1984}1985}19861987static void1988set_uncompacted_src0(const struct compaction_state *c, brw_inst *dst,1989brw_compact_inst *src)1990{1991const struct intel_device_info *devinfo = c->devinfo;1992uint32_t compacted = brw_compact_inst_src0_index(devinfo, src);1993uint16_t uncompacted = c->src0_index_table[compacted];19941995if (devinfo->ver >= 12) {1996brw_inst_set_bits(dst, 87, 84, (uncompacted >> 8));1997brw_inst_set_bits(dst, 83, 81, (uncompacted >> 5) & 0x7);1998brw_inst_set_bits(dst, 80, 80, (uncompacted >> 4) & 0x1);1999brw_inst_set_bits(dst, 65, 64, (uncompacted >> 2) & 0x3);2000brw_inst_set_bits(dst, 45, 44, (uncompacted >> 0) & 0x3);2001} else {2002brw_inst_set_bits(dst, 88, 77, uncompacted);2003}2004}20052006static void2007set_uncompacted_src1(const struct compaction_state *c, brw_inst *dst,2008brw_compact_inst *src)2009{2010const struct intel_device_info *devinfo = c->devinfo;2011uint16_t uncompacted =2012c->src1_index_table[brw_compact_inst_src1_index(devinfo, src)];20132014if (devinfo->ver >= 12) {2015brw_inst_set_bits(dst, 121, 120, (uncompacted >> 10));2016brw_inst_set_bits(dst, 119, 116, (uncompacted >> 6) & 0xf);2017brw_inst_set_bits(dst, 115, 113, (uncompacted >> 3) & 0x7);2018brw_inst_set_bits(dst, 112, 112, (uncompacted >> 2) & 0x1);2019brw_inst_set_bits(dst, 97, 96, (uncompacted >> 0) & 0x3);2020} else {2021brw_inst_set_bits(dst, 120, 109, uncompacted);2022}2023}20242025static void2026set_uncompacted_3src_control_index(const struct compaction_state *c,2027brw_inst *dst, brw_compact_inst *src)2028{2029const struct intel_device_info *devinfo = c->devinfo;2030assert(devinfo->ver >= 8);20312032if (devinfo->verx10 >= 125) {2033uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);2034uint64_t uncompacted = xehp_3src_control_index_table[compacted];20352036brw_inst_set_bits(dst, 95, 92, (uncompacted >> 33));2037brw_inst_set_bits(dst, 90, 88, (uncompacted >> 30) & 0x7);2038brw_inst_set_bits(dst, 82, 80, (uncompacted >> 27) & 0x7);2039brw_inst_set_bits(dst, 50, 50, (uncompacted >> 26) & 0x1);2040brw_inst_set_bits(dst, 49, 48, (uncompacted >> 24) & 0x3);2041brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);2042brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);2043brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);2044brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);2045brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);2046brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);2047brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);2048brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);2049brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);2050brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);2051brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);2052brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);2053brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);20542055} else if (devinfo->ver >= 12) {2056uint64_t compacted = brw_compact_inst_3src_control_index(devinfo, src);2057uint64_t uncompacted = gfx12_3src_control_index_table[compacted];20582059brw_inst_set_bits(dst, 95, 92, (uncompacted >> 32));2060brw_inst_set_bits(dst, 90, 88, (uncompacted >> 29) & 0x7);2061brw_inst_set_bits(dst, 82, 80, (uncompacted >> 26) & 0x7);2062brw_inst_set_bits(dst, 50, 50, (uncompacted >> 25) & 0x1);2063brw_inst_set_bits(dst, 48, 48, (uncompacted >> 24) & 0x1);2064brw_inst_set_bits(dst, 42, 40, (uncompacted >> 21) & 0x7);2065brw_inst_set_bits(dst, 39, 39, (uncompacted >> 20) & 0x1);2066brw_inst_set_bits(dst, 38, 36, (uncompacted >> 17) & 0x7);2067brw_inst_set_bits(dst, 34, 34, (uncompacted >> 16) & 0x1);2068brw_inst_set_bits(dst, 33, 33, (uncompacted >> 15) & 0x1);2069brw_inst_set_bits(dst, 32, 32, (uncompacted >> 14) & 0x1);2070brw_inst_set_bits(dst, 31, 31, (uncompacted >> 13) & 0x1);2071brw_inst_set_bits(dst, 28, 28, (uncompacted >> 12) & 0x1);2072brw_inst_set_bits(dst, 27, 24, (uncompacted >> 8) & 0xf);2073brw_inst_set_bits(dst, 23, 23, (uncompacted >> 7) & 0x1);2074brw_inst_set_bits(dst, 22, 22, (uncompacted >> 6) & 0x1);2075brw_inst_set_bits(dst, 21, 19, (uncompacted >> 3) & 0x7);2076brw_inst_set_bits(dst, 18, 16, (uncompacted >> 0) & 0x7);2077} else {2078uint32_t compacted = brw_compact_inst_3src_control_index(devinfo, src);2079uint32_t uncompacted = gfx8_3src_control_index_table[compacted];20802081brw_inst_set_bits(dst, 34, 32, (uncompacted >> 21) & 0x7);2082brw_inst_set_bits(dst, 28, 8, (uncompacted >> 0) & 0x1fffff);20832084if (devinfo->ver >= 9 || devinfo->is_cherryview)2085brw_inst_set_bits(dst, 36, 35, (uncompacted >> 24) & 0x3);2086}2087}20882089static void2090set_uncompacted_3src_source_index(const struct intel_device_info *devinfo,2091brw_inst *dst, brw_compact_inst *src)2092{2093assert(devinfo->ver >= 8);20942095uint32_t compacted = brw_compact_inst_3src_source_index(devinfo, src);20962097if (devinfo->ver >= 12) {2098const uint32_t *three_src_source_index_table =2099devinfo->verx10 >= 125 ?2100xehp_3src_source_index_table : gfx12_3src_source_index_table;2101uint32_t uncompacted = three_src_source_index_table[compacted];21022103brw_inst_set_bits(dst, 114, 114, (uncompacted >> 20));2104brw_inst_set_bits(dst, 113, 112, (uncompacted >> 18) & 0x3);2105brw_inst_set_bits(dst, 98, 98, (uncompacted >> 17) & 0x1);2106brw_inst_set_bits(dst, 97, 96, (uncompacted >> 15) & 0x3);2107brw_inst_set_bits(dst, 91, 91, (uncompacted >> 14) & 0x1);2108brw_inst_set_bits(dst, 87, 86, (uncompacted >> 12) & 0x3);2109brw_inst_set_bits(dst, 85, 84, (uncompacted >> 10) & 0x3);2110brw_inst_set_bits(dst, 83, 83, (uncompacted >> 9) & 0x1);2111brw_inst_set_bits(dst, 66, 66, (uncompacted >> 8) & 0x1);2112brw_inst_set_bits(dst, 65, 64, (uncompacted >> 6) & 0x3);2113brw_inst_set_bits(dst, 47, 47, (uncompacted >> 5) & 0x1);2114brw_inst_set_bits(dst, 46, 46, (uncompacted >> 4) & 0x1);2115brw_inst_set_bits(dst, 45, 44, (uncompacted >> 2) & 0x3);2116brw_inst_set_bits(dst, 43, 43, (uncompacted >> 1) & 0x1);2117brw_inst_set_bits(dst, 35, 35, (uncompacted >> 0) & 0x1);2118} else {2119uint64_t uncompacted = gfx8_3src_source_index_table[compacted];21202121brw_inst_set_bits(dst, 83, 83, (uncompacted >> 43) & 0x1);2122brw_inst_set_bits(dst, 114, 107, (uncompacted >> 35) & 0xff);2123brw_inst_set_bits(dst, 93, 86, (uncompacted >> 27) & 0xff);2124brw_inst_set_bits(dst, 72, 65, (uncompacted >> 19) & 0xff);2125brw_inst_set_bits(dst, 55, 37, (uncompacted >> 0) & 0x7ffff);21262127if (devinfo->ver >= 9 || devinfo->is_cherryview) {2128brw_inst_set_bits(dst, 126, 125, (uncompacted >> 47) & 0x3);2129brw_inst_set_bits(dst, 105, 104, (uncompacted >> 45) & 0x3);2130brw_inst_set_bits(dst, 84, 84, (uncompacted >> 44) & 0x1);2131} else {2132brw_inst_set_bits(dst, 125, 125, (uncompacted >> 45) & 0x1);2133brw_inst_set_bits(dst, 104, 104, (uncompacted >> 44) & 0x1);2134}2135}2136}21372138static void2139set_uncompacted_3src_subreg_index(const struct intel_device_info *devinfo,2140brw_inst *dst, brw_compact_inst *src)2141{2142assert(devinfo->ver >= 12);21432144uint32_t compacted = brw_compact_inst_3src_subreg_index(devinfo, src);2145uint32_t uncompacted = gfx12_3src_subreg_table[compacted];21462147brw_inst_set_bits(dst, 119, 115, (uncompacted >> 15));2148brw_inst_set_bits(dst, 103, 99, (uncompacted >> 10) & 0x1f);2149brw_inst_set_bits(dst, 71, 67, (uncompacted >> 5) & 0x1f);2150brw_inst_set_bits(dst, 55, 51, (uncompacted >> 0) & 0x1f);2151}21522153static void2154brw_uncompact_3src_instruction(const struct compaction_state *c,2155brw_inst *dst, brw_compact_inst *src)2156{2157const struct intel_device_info *devinfo = c->devinfo;2158assert(devinfo->ver >= 8);21592160#define uncompact(field) \2161brw_inst_set_3src_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))2162#define uncompact_a16(field) \2163brw_inst_set_3src_a16_##field(devinfo, dst, brw_compact_inst_3src_##field(devinfo, src))21642165uncompact(hw_opcode);21662167if (devinfo->ver >= 12) {2168set_uncompacted_3src_control_index(c, dst, src);2169set_uncompacted_3src_source_index(devinfo, dst, src);2170set_uncompacted_3src_subreg_index(devinfo, dst, src);21712172uncompact(debug_control);2173uncompact(swsb);2174uncompact(dst_reg_nr);2175uncompact(src0_reg_nr);2176uncompact(src1_reg_nr);2177uncompact(src2_reg_nr);2178} else {2179set_uncompacted_3src_control_index(c, dst, src);2180set_uncompacted_3src_source_index(devinfo, dst, src);21812182uncompact(dst_reg_nr);2183uncompact_a16(src0_rep_ctrl);2184uncompact(debug_control);2185uncompact(saturate);2186uncompact_a16(src1_rep_ctrl);2187uncompact_a16(src2_rep_ctrl);2188uncompact(src0_reg_nr);2189uncompact(src1_reg_nr);2190uncompact(src2_reg_nr);2191uncompact_a16(src0_subreg_nr);2192uncompact_a16(src1_subreg_nr);2193uncompact_a16(src2_subreg_nr);2194}2195brw_inst_set_3src_cmpt_control(devinfo, dst, false);21962197#undef uncompact2198#undef uncompact_a162199}22002201static void2202uncompact_instruction(const struct compaction_state *c, brw_inst *dst,2203brw_compact_inst *src)2204{2205const struct intel_device_info *devinfo = c->devinfo;2206memset(dst, 0, sizeof(*dst));22072208if (devinfo->ver >= 8 &&2209is_3src(devinfo, brw_opcode_decode(2210devinfo, brw_compact_inst_3src_hw_opcode(devinfo, src)))) {2211brw_uncompact_3src_instruction(c, dst, src);2212return;2213}22142215#define uncompact(field) \2216brw_inst_set_##field(devinfo, dst, brw_compact_inst_##field(devinfo, src))2217#define uncompact_reg(field) \2218brw_inst_set_##field##_da_reg_nr(devinfo, dst, \2219brw_compact_inst_##field##_reg_nr(devinfo, src))22202221uncompact(hw_opcode);2222uncompact(debug_control);22232224set_uncompacted_control(c, dst, src);2225set_uncompacted_datatype(c, dst, src);2226set_uncompacted_subreg(c, dst, src);2227set_uncompacted_src0(c, dst, src);22282229enum brw_reg_type type;2230if (has_immediate(devinfo, dst, &type)) {2231unsigned imm = uncompact_immediate(devinfo, type,2232brw_compact_inst_imm(devinfo, src));2233brw_inst_set_imm_ud(devinfo, dst, imm);2234} else {2235set_uncompacted_src1(c, dst, src);2236uncompact_reg(src1);2237}22382239if (devinfo->ver >= 12) {2240uncompact(swsb);2241uncompact_reg(dst);2242uncompact_reg(src0);2243} else {2244if (devinfo->ver >= 6) {2245uncompact(acc_wr_control);2246} else {2247uncompact(mask_control_ex);2248}22492250uncompact(cond_modifier);22512252if (devinfo->ver <= 6)2253uncompact(flag_subreg_nr);22542255uncompact_reg(dst);2256uncompact_reg(src0);2257}2258brw_inst_set_cmpt_control(devinfo, dst, false);22592260#undef uncompact2261#undef uncompact_reg2262}22632264void2265brw_uncompact_instruction(const struct intel_device_info *devinfo,2266brw_inst *dst, brw_compact_inst *src)2267{2268struct compaction_state c;2269compaction_state_init(&c, devinfo);2270uncompact_instruction(&c, dst, src);2271}22722273void brw_debug_compact_uncompact(const struct intel_device_info *devinfo,2274brw_inst *orig,2275brw_inst *uncompacted)2276{2277fprintf(stderr, "Instruction compact/uncompact changed (gen%d):\n",2278devinfo->ver);22792280fprintf(stderr, " before: ");2281brw_disassemble_inst(stderr, devinfo, orig, true, 0, NULL);22822283fprintf(stderr, " after: ");2284brw_disassemble_inst(stderr, devinfo, uncompacted, false, 0, NULL);22852286uint32_t *before_bits = (uint32_t *)orig;2287uint32_t *after_bits = (uint32_t *)uncompacted;2288fprintf(stderr, " changed bits:\n");2289for (int i = 0; i < 128; i++) {2290uint32_t before = before_bits[i / 32] & (1 << (i & 31));2291uint32_t after = after_bits[i / 32] & (1 << (i & 31));22922293if (before != after) {2294fprintf(stderr, " bit %d, %s to %s\n", i,2295before ? "set" : "unset",2296after ? "set" : "unset");2297}2298}2299}23002301static int2302compacted_between(int old_ip, int old_target_ip, int *compacted_counts)2303{2304int this_compacted_count = compacted_counts[old_ip];2305int target_compacted_count = compacted_counts[old_target_ip];2306return target_compacted_count - this_compacted_count;2307}23082309static void2310update_uip_jip(const struct intel_device_info *devinfo, brw_inst *insn,2311int this_old_ip, int *compacted_counts)2312{2313/* JIP and UIP are in units of:2314* - bytes on Gfx8+; and2315* - compacted instructions on Gfx6+.2316*/2317int shift = devinfo->ver >= 8 ? 3 : 0;23182319int32_t jip_compacted = brw_inst_jip(devinfo, insn) >> shift;2320jip_compacted -= compacted_between(this_old_ip,2321this_old_ip + (jip_compacted / 2),2322compacted_counts);2323brw_inst_set_jip(devinfo, insn, jip_compacted << shift);23242325if (brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ENDIF ||2326brw_inst_opcode(devinfo, insn) == BRW_OPCODE_WHILE ||2327(brw_inst_opcode(devinfo, insn) == BRW_OPCODE_ELSE && devinfo->ver <= 7))2328return;23292330int32_t uip_compacted = brw_inst_uip(devinfo, insn) >> shift;2331uip_compacted -= compacted_between(this_old_ip,2332this_old_ip + (uip_compacted / 2),2333compacted_counts);2334brw_inst_set_uip(devinfo, insn, uip_compacted << shift);2335}23362337static void2338update_gfx4_jump_count(const struct intel_device_info *devinfo, brw_inst *insn,2339int this_old_ip, int *compacted_counts)2340{2341assert(devinfo->ver == 5 || devinfo->is_g4x);23422343/* Jump Count is in units of:2344* - uncompacted instructions on G45; and2345* - compacted instructions on Gfx5.2346*/2347int shift = devinfo->is_g4x ? 1 : 0;23482349int jump_count_compacted = brw_inst_gfx4_jump_count(devinfo, insn) << shift;23502351int target_old_ip = this_old_ip + (jump_count_compacted / 2);23522353int this_compacted_count = compacted_counts[this_old_ip];2354int target_compacted_count = compacted_counts[target_old_ip];23552356jump_count_compacted -= (target_compacted_count - this_compacted_count);2357brw_inst_set_gfx4_jump_count(devinfo, insn, jump_count_compacted >> shift);2358}23592360static void2361compaction_state_init(struct compaction_state *c,2362const struct intel_device_info *devinfo)2363{2364assert(g45_control_index_table[ARRAY_SIZE(g45_control_index_table) - 1] != 0);2365assert(g45_datatype_table[ARRAY_SIZE(g45_datatype_table) - 1] != 0);2366assert(g45_subreg_table[ARRAY_SIZE(g45_subreg_table) - 1] != 0);2367assert(g45_src_index_table[ARRAY_SIZE(g45_src_index_table) - 1] != 0);2368assert(gfx6_control_index_table[ARRAY_SIZE(gfx6_control_index_table) - 1] != 0);2369assert(gfx6_datatype_table[ARRAY_SIZE(gfx6_datatype_table) - 1] != 0);2370assert(gfx6_subreg_table[ARRAY_SIZE(gfx6_subreg_table) - 1] != 0);2371assert(gfx6_src_index_table[ARRAY_SIZE(gfx6_src_index_table) - 1] != 0);2372assert(gfx7_control_index_table[ARRAY_SIZE(gfx7_control_index_table) - 1] != 0);2373assert(gfx7_datatype_table[ARRAY_SIZE(gfx7_datatype_table) - 1] != 0);2374assert(gfx7_subreg_table[ARRAY_SIZE(gfx7_subreg_table) - 1] != 0);2375assert(gfx7_src_index_table[ARRAY_SIZE(gfx7_src_index_table) - 1] != 0);2376assert(gfx8_control_index_table[ARRAY_SIZE(gfx8_control_index_table) - 1] != 0);2377assert(gfx8_datatype_table[ARRAY_SIZE(gfx8_datatype_table) - 1] != 0);2378assert(gfx8_subreg_table[ARRAY_SIZE(gfx8_subreg_table) - 1] != 0);2379assert(gfx8_src_index_table[ARRAY_SIZE(gfx8_src_index_table) - 1] != 0);2380assert(gfx11_datatype_table[ARRAY_SIZE(gfx11_datatype_table) - 1] != 0);2381assert(gfx12_control_index_table[ARRAY_SIZE(gfx12_control_index_table) - 1] != 0);2382assert(gfx12_datatype_table[ARRAY_SIZE(gfx12_datatype_table) - 1] != 0);2383assert(gfx12_subreg_table[ARRAY_SIZE(gfx12_subreg_table) - 1] != 0);2384assert(gfx12_src0_index_table[ARRAY_SIZE(gfx12_src0_index_table) - 1] != 0);2385assert(gfx12_src1_index_table[ARRAY_SIZE(gfx12_src1_index_table) - 1] != 0);2386assert(xehp_src0_index_table[ARRAY_SIZE(xehp_src0_index_table) - 1] != 0);2387assert(xehp_src1_index_table[ARRAY_SIZE(xehp_src1_index_table) - 1] != 0);23882389c->devinfo = devinfo;2390switch (devinfo->ver) {2391case 12:2392c->control_index_table = gfx12_control_index_table;;2393c->datatype_table = gfx12_datatype_table;2394c->subreg_table = gfx12_subreg_table;2395if (devinfo->verx10 >= 125) {2396c->src0_index_table = xehp_src0_index_table;2397c->src1_index_table = xehp_src1_index_table;2398} else {2399c->src0_index_table = gfx12_src0_index_table;2400c->src1_index_table = gfx12_src1_index_table;2401}2402break;2403case 11:2404c->control_index_table = gfx8_control_index_table;2405c->datatype_table = gfx11_datatype_table;2406c->subreg_table = gfx8_subreg_table;2407c->src0_index_table = gfx8_src_index_table;2408c->src1_index_table = gfx8_src_index_table;2409break;2410case 9:2411case 8:2412c->control_index_table = gfx8_control_index_table;2413c->datatype_table = gfx8_datatype_table;2414c->subreg_table = gfx8_subreg_table;2415c->src0_index_table = gfx8_src_index_table;2416c->src1_index_table = gfx8_src_index_table;2417break;2418case 7:2419c->control_index_table = gfx7_control_index_table;2420c->datatype_table = gfx7_datatype_table;2421c->subreg_table = gfx7_subreg_table;2422c->src0_index_table = gfx7_src_index_table;2423c->src1_index_table = gfx7_src_index_table;2424break;2425case 6:2426c->control_index_table = gfx6_control_index_table;2427c->datatype_table = gfx6_datatype_table;2428c->subreg_table = gfx6_subreg_table;2429c->src0_index_table = gfx6_src_index_table;2430c->src1_index_table = gfx6_src_index_table;2431break;2432case 5:2433case 4:2434c->control_index_table = g45_control_index_table;2435c->datatype_table = g45_datatype_table;2436c->subreg_table = g45_subreg_table;2437c->src0_index_table = g45_src_index_table;2438c->src1_index_table = g45_src_index_table;2439break;2440default:2441unreachable("unknown generation");2442}2443}24442445void2446brw_compact_instructions(struct brw_codegen *p, int start_offset,2447struct disasm_info *disasm)2448{2449if (INTEL_DEBUG & DEBUG_NO_COMPACTION)2450return;24512452const struct intel_device_info *devinfo = p->devinfo;2453void *store = p->store + start_offset / 16;2454/* For an instruction at byte offset 16*i before compaction, this is the2455* number of compacted instructions minus the number of padding NOP/NENOPs2456* that preceded it.2457*/2458int compacted_counts[(p->next_insn_offset - start_offset) / sizeof(brw_inst)];2459/* For an instruction at byte offset 8*i after compaction, this was its IP2460* (in 16-byte units) before compaction.2461*/2462int old_ip[(p->next_insn_offset - start_offset) / sizeof(brw_compact_inst) + 1];24632464if (devinfo->ver == 4 && !devinfo->is_g4x)2465return;24662467struct compaction_state c;2468compaction_state_init(&c, devinfo);24692470int offset = 0;2471int compacted_count = 0;2472for (int src_offset = 0; src_offset < p->next_insn_offset - start_offset;2473src_offset += sizeof(brw_inst)) {2474brw_inst *src = store + src_offset;2475void *dst = store + offset;24762477old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);2478compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;24792480brw_inst inst = precompact(devinfo, *src);2481brw_inst saved = inst;24822483if (try_compact_instruction(&c, dst, &inst)) {2484compacted_count++;24852486if (INTEL_DEBUG) {2487brw_inst uncompacted;2488uncompact_instruction(&c, &uncompacted, dst);2489if (memcmp(&saved, &uncompacted, sizeof(uncompacted))) {2490brw_debug_compact_uncompact(devinfo, &saved, &uncompacted);2491}2492}24932494offset += sizeof(brw_compact_inst);2495} else {2496/* All uncompacted instructions need to be aligned on G45. */2497if ((offset & sizeof(brw_compact_inst)) != 0 && devinfo->is_g4x){2498brw_compact_inst *align = store + offset;2499memset(align, 0, sizeof(*align));2500brw_compact_inst_set_hw_opcode(2501devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NENOP));2502brw_compact_inst_set_cmpt_control(devinfo, align, true);2503offset += sizeof(brw_compact_inst);2504compacted_count--;2505compacted_counts[src_offset / sizeof(brw_inst)] = compacted_count;2506old_ip[offset / sizeof(brw_compact_inst)] = src_offset / sizeof(brw_inst);25072508dst = store + offset;2509}25102511/* If we didn't compact this intruction, we need to move it down into2512* place.2513*/2514if (offset != src_offset) {2515memmove(dst, src, sizeof(brw_inst));2516}2517offset += sizeof(brw_inst);2518}2519}25202521/* Add an entry for the ending offset of the program. This greatly2522* simplifies the linked list walk at the end of the function.2523*/2524old_ip[offset / sizeof(brw_compact_inst)] =2525(p->next_insn_offset - start_offset) / sizeof(brw_inst);25262527/* Fix up control flow offsets. */2528p->next_insn_offset = start_offset + offset;2529for (offset = 0; offset < p->next_insn_offset - start_offset;2530offset = next_offset(devinfo, store, offset)) {2531brw_inst *insn = store + offset;2532int this_old_ip = old_ip[offset / sizeof(brw_compact_inst)];2533int this_compacted_count = compacted_counts[this_old_ip];25342535switch (brw_inst_opcode(devinfo, insn)) {2536case BRW_OPCODE_BREAK:2537case BRW_OPCODE_CONTINUE:2538case BRW_OPCODE_HALT:2539if (devinfo->ver >= 6) {2540update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);2541} else {2542update_gfx4_jump_count(devinfo, insn, this_old_ip,2543compacted_counts);2544}2545break;25462547case BRW_OPCODE_IF:2548case BRW_OPCODE_IFF:2549case BRW_OPCODE_ELSE:2550case BRW_OPCODE_ENDIF:2551case BRW_OPCODE_WHILE:2552if (devinfo->ver >= 7) {2553if (brw_inst_cmpt_control(devinfo, insn)) {2554brw_inst uncompacted;2555uncompact_instruction(&c, &uncompacted,2556(brw_compact_inst *)insn);25572558update_uip_jip(devinfo, &uncompacted, this_old_ip,2559compacted_counts);25602561bool ret = try_compact_instruction(&c, (brw_compact_inst *)insn,2562&uncompacted);2563assert(ret); (void)ret;2564} else {2565update_uip_jip(devinfo, insn, this_old_ip, compacted_counts);2566}2567} else if (devinfo->ver == 6) {2568assert(!brw_inst_cmpt_control(devinfo, insn));25692570/* Jump Count is in units of compacted instructions on Gfx6. */2571int jump_count_compacted = brw_inst_gfx6_jump_count(devinfo, insn);25722573int target_old_ip = this_old_ip + (jump_count_compacted / 2);2574int target_compacted_count = compacted_counts[target_old_ip];2575jump_count_compacted -= (target_compacted_count - this_compacted_count);2576brw_inst_set_gfx6_jump_count(devinfo, insn, jump_count_compacted);2577} else {2578update_gfx4_jump_count(devinfo, insn, this_old_ip,2579compacted_counts);2580}2581break;25822583case BRW_OPCODE_ADD:2584/* Add instructions modifying the IP register use an immediate src1,2585* and Gens that use this cannot compact instructions with immediate2586* operands.2587*/2588if (brw_inst_cmpt_control(devinfo, insn))2589break;25902591if (brw_inst_dst_reg_file(devinfo, insn) == BRW_ARCHITECTURE_REGISTER_FILE &&2592brw_inst_dst_da_reg_nr(devinfo, insn) == BRW_ARF_IP) {2593assert(brw_inst_src1_reg_file(devinfo, insn) == BRW_IMMEDIATE_VALUE);25942595int shift = 3;2596int jump_compacted = brw_inst_imm_d(devinfo, insn) >> shift;25972598int target_old_ip = this_old_ip + (jump_compacted / 2);2599int target_compacted_count = compacted_counts[target_old_ip];2600jump_compacted -= (target_compacted_count - this_compacted_count);2601brw_inst_set_imm_ud(devinfo, insn, jump_compacted << shift);2602}2603break;26042605default:2606break;2607}2608}26092610/* p->nr_insn is counting the number of uncompacted instructions still, so2611* divide. We do want to be sure there's a valid instruction in any2612* alignment padding, so that the next compression pass (for the FS 8/162613* compile passes) parses correctly.2614*/2615if (p->next_insn_offset & sizeof(brw_compact_inst)) {2616brw_compact_inst *align = store + offset;2617memset(align, 0, sizeof(*align));2618brw_compact_inst_set_hw_opcode(2619devinfo, align, brw_opcode_encode(devinfo, BRW_OPCODE_NOP));2620brw_compact_inst_set_cmpt_control(devinfo, align, true);2621p->next_insn_offset += sizeof(brw_compact_inst);2622}2623p->nr_insn = p->next_insn_offset / sizeof(brw_inst);26242625for (int i = 0; i < p->num_relocs; i++) {2626if (p->relocs[i].offset < (uint32_t)start_offset)2627continue;26282629assert(p->relocs[i].offset % 16 == 0);2630unsigned idx = (p->relocs[i].offset - start_offset) / 16;2631p->relocs[i].offset -= compacted_counts[idx] * 8;2632}26332634/* Update the instruction offsets for each group. */2635if (disasm) {2636int offset = 0;26372638foreach_list_typed(struct inst_group, group, link, &disasm->group_list) {2639while (start_offset + old_ip[offset / sizeof(brw_compact_inst)] *2640sizeof(brw_inst) != group->offset) {2641assert(start_offset + old_ip[offset / sizeof(brw_compact_inst)] *2642sizeof(brw_inst) < group->offset);2643offset = next_offset(devinfo, store, offset);2644}26452646group->offset = start_offset + offset;26472648offset = next_offset(devinfo, store, offset);2649}2650}2651}265226532654