Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86EncodingOptimization.cpp
35293 views
//===-- X86EncodingOptimization.cpp - X86 Encoding optimization -*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the implementation of the X86 encoding optimization9//10//===----------------------------------------------------------------------===//1112#include "X86EncodingOptimization.h"13#include "X86BaseInfo.h"14#include "llvm/MC/MCExpr.h"15#include "llvm/MC/MCInst.h"16#include "llvm/MC/MCInstrDesc.h"17#include "llvm/Support/Casting.h"1819using namespace llvm;2021bool X86::optimizeInstFromVEX3ToVEX2(MCInst &MI, const MCInstrDesc &Desc) {22unsigned OpIdx1, OpIdx2;23unsigned Opcode = MI.getOpcode();24unsigned NewOpc = 0;25#define FROM_TO(FROM, TO, IDX1, IDX2) \26case X86::FROM: \27NewOpc = X86::TO; \28OpIdx1 = IDX1; \29OpIdx2 = IDX2; \30break;31#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 1)32switch (Opcode) {33default: {34// If the instruction is a commutable arithmetic instruction we might be35// able to commute the operands to get a 2 byte VEX prefix.36uint64_t TSFlags = Desc.TSFlags;37if (!Desc.isCommutable() || (TSFlags & X86II::EncodingMask) != X86II::VEX ||38(TSFlags & X86II::OpMapMask) != X86II::TB ||39(TSFlags & X86II::FormMask) != X86II::MRMSrcReg ||40(TSFlags & X86II::REX_W) || !(TSFlags & X86II::VEX_4V) ||41MI.getNumOperands() != 3)42return false;43// These two are not truly commutable.44if (Opcode == X86::VMOVHLPSrr || Opcode == X86::VUNPCKHPDrr)45return false;46OpIdx1 = 1;47OpIdx2 = 2;48break;49}50case X86::VCMPPDrri:51case X86::VCMPPDYrri:52case X86::VCMPPSrri:53case X86::VCMPPSYrri:54case X86::VCMPSDrri:55case X86::VCMPSSrri: {56switch (MI.getOperand(3).getImm() & 0x7) {57default:58return false;59case 0x00: // EQUAL60case 0x03: // UNORDERED61case 0x04: // NOT EQUAL62case 0x07: // ORDERED63OpIdx1 = 1;64OpIdx2 = 2;65break;66}67break;68}69// Commute operands to get a smaller encoding by using VEX.R instead of70// VEX.B if one of the registers is extended, but other isn't.71FROM_TO(VMOVZPQILo2PQIrr, VMOVPQI2QIrr, 0, 1)72TO_REV(VMOVAPDrr)73TO_REV(VMOVAPDYrr)74TO_REV(VMOVAPSrr)75TO_REV(VMOVAPSYrr)76TO_REV(VMOVDQArr)77TO_REV(VMOVDQAYrr)78TO_REV(VMOVDQUrr)79TO_REV(VMOVDQUYrr)80TO_REV(VMOVUPDrr)81TO_REV(VMOVUPDYrr)82TO_REV(VMOVUPSrr)83TO_REV(VMOVUPSYrr)84#undef TO_REV85#define TO_REV(FROM) FROM_TO(FROM, FROM##_REV, 0, 2)86TO_REV(VMOVSDrr)87TO_REV(VMOVSSrr)88#undef TO_REV89#undef FROM_TO90}91if (X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx1).getReg()) ||92!X86II::isX86_64ExtendedReg(MI.getOperand(OpIdx2).getReg()))93return false;94if (NewOpc)95MI.setOpcode(NewOpc);96else97std::swap(MI.getOperand(OpIdx1), MI.getOperand(OpIdx2));98return true;99}100101// NOTE: We may write this as an InstAlias if it's only used by AsmParser. See102// validateTargetOperandClass.103bool X86::optimizeShiftRotateWithImmediateOne(MCInst &MI) {104unsigned NewOpc;105#define TO_IMM1(FROM) \106case X86::FROM##i: \107NewOpc = X86::FROM##1; \108break; \109case X86::FROM##i_EVEX: \110NewOpc = X86::FROM##1_EVEX; \111break; \112case X86::FROM##i_ND: \113NewOpc = X86::FROM##1_ND; \114break;115switch (MI.getOpcode()) {116default:117return false;118TO_IMM1(RCR8r)119TO_IMM1(RCR16r)120TO_IMM1(RCR32r)121TO_IMM1(RCR64r)122TO_IMM1(RCL8r)123TO_IMM1(RCL16r)124TO_IMM1(RCL32r)125TO_IMM1(RCL64r)126TO_IMM1(RCR8m)127TO_IMM1(RCR16m)128TO_IMM1(RCR32m)129TO_IMM1(RCR64m)130TO_IMM1(RCL8m)131TO_IMM1(RCL16m)132TO_IMM1(RCL32m)133TO_IMM1(RCL64m)134#undef TO_IMM1135#define TO_IMM1(FROM) \136case X86::FROM##i: \137NewOpc = X86::FROM##1; \138break; \139case X86::FROM##i_EVEX: \140NewOpc = X86::FROM##1_EVEX; \141break; \142case X86::FROM##i_NF: \143NewOpc = X86::FROM##1_NF; \144break; \145case X86::FROM##i_ND: \146NewOpc = X86::FROM##1_ND; \147break; \148case X86::FROM##i_NF_ND: \149NewOpc = X86::FROM##1_NF_ND; \150break;151TO_IMM1(ROR8r)152TO_IMM1(ROR16r)153TO_IMM1(ROR32r)154TO_IMM1(ROR64r)155TO_IMM1(ROL8r)156TO_IMM1(ROL16r)157TO_IMM1(ROL32r)158TO_IMM1(ROL64r)159TO_IMM1(SAR8r)160TO_IMM1(SAR16r)161TO_IMM1(SAR32r)162TO_IMM1(SAR64r)163TO_IMM1(SHR8r)164TO_IMM1(SHR16r)165TO_IMM1(SHR32r)166TO_IMM1(SHR64r)167TO_IMM1(SHL8r)168TO_IMM1(SHL16r)169TO_IMM1(SHL32r)170TO_IMM1(SHL64r)171TO_IMM1(ROR8m)172TO_IMM1(ROR16m)173TO_IMM1(ROR32m)174TO_IMM1(ROR64m)175TO_IMM1(ROL8m)176TO_IMM1(ROL16m)177TO_IMM1(ROL32m)178TO_IMM1(ROL64m)179TO_IMM1(SAR8m)180TO_IMM1(SAR16m)181TO_IMM1(SAR32m)182TO_IMM1(SAR64m)183TO_IMM1(SHR8m)184TO_IMM1(SHR16m)185TO_IMM1(SHR32m)186TO_IMM1(SHR64m)187TO_IMM1(SHL8m)188TO_IMM1(SHL16m)189TO_IMM1(SHL32m)190TO_IMM1(SHL64m)191#undef TO_IMM1192}193MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);194if (!LastOp.isImm() || LastOp.getImm() != 1)195return false;196MI.setOpcode(NewOpc);197MI.erase(&LastOp);198return true;199}200201bool X86::optimizeVPCMPWithImmediateOneOrSix(MCInst &MI) {202unsigned Opc1;203unsigned Opc2;204#define FROM_TO(FROM, TO1, TO2) \205case X86::FROM: \206Opc1 = X86::TO1; \207Opc2 = X86::TO2; \208break;209switch (MI.getOpcode()) {210default:211return false;212FROM_TO(VPCMPBZ128rmi, VPCMPEQBZ128rm, VPCMPGTBZ128rm)213FROM_TO(VPCMPBZ128rmik, VPCMPEQBZ128rmk, VPCMPGTBZ128rmk)214FROM_TO(VPCMPBZ128rri, VPCMPEQBZ128rr, VPCMPGTBZ128rr)215FROM_TO(VPCMPBZ128rrik, VPCMPEQBZ128rrk, VPCMPGTBZ128rrk)216FROM_TO(VPCMPBZ256rmi, VPCMPEQBZ256rm, VPCMPGTBZ256rm)217FROM_TO(VPCMPBZ256rmik, VPCMPEQBZ256rmk, VPCMPGTBZ256rmk)218FROM_TO(VPCMPBZ256rri, VPCMPEQBZ256rr, VPCMPGTBZ256rr)219FROM_TO(VPCMPBZ256rrik, VPCMPEQBZ256rrk, VPCMPGTBZ256rrk)220FROM_TO(VPCMPBZrmi, VPCMPEQBZrm, VPCMPGTBZrm)221FROM_TO(VPCMPBZrmik, VPCMPEQBZrmk, VPCMPGTBZrmk)222FROM_TO(VPCMPBZrri, VPCMPEQBZrr, VPCMPGTBZrr)223FROM_TO(VPCMPBZrrik, VPCMPEQBZrrk, VPCMPGTBZrrk)224FROM_TO(VPCMPDZ128rmi, VPCMPEQDZ128rm, VPCMPGTDZ128rm)225FROM_TO(VPCMPDZ128rmib, VPCMPEQDZ128rmb, VPCMPGTDZ128rmb)226FROM_TO(VPCMPDZ128rmibk, VPCMPEQDZ128rmbk, VPCMPGTDZ128rmbk)227FROM_TO(VPCMPDZ128rmik, VPCMPEQDZ128rmk, VPCMPGTDZ128rmk)228FROM_TO(VPCMPDZ128rri, VPCMPEQDZ128rr, VPCMPGTDZ128rr)229FROM_TO(VPCMPDZ128rrik, VPCMPEQDZ128rrk, VPCMPGTDZ128rrk)230FROM_TO(VPCMPDZ256rmi, VPCMPEQDZ256rm, VPCMPGTDZ256rm)231FROM_TO(VPCMPDZ256rmib, VPCMPEQDZ256rmb, VPCMPGTDZ256rmb)232FROM_TO(VPCMPDZ256rmibk, VPCMPEQDZ256rmbk, VPCMPGTDZ256rmbk)233FROM_TO(VPCMPDZ256rmik, VPCMPEQDZ256rmk, VPCMPGTDZ256rmk)234FROM_TO(VPCMPDZ256rri, VPCMPEQDZ256rr, VPCMPGTDZ256rr)235FROM_TO(VPCMPDZ256rrik, VPCMPEQDZ256rrk, VPCMPGTDZ256rrk)236FROM_TO(VPCMPDZrmi, VPCMPEQDZrm, VPCMPGTDZrm)237FROM_TO(VPCMPDZrmib, VPCMPEQDZrmb, VPCMPGTDZrmb)238FROM_TO(VPCMPDZrmibk, VPCMPEQDZrmbk, VPCMPGTDZrmbk)239FROM_TO(VPCMPDZrmik, VPCMPEQDZrmk, VPCMPGTDZrmk)240FROM_TO(VPCMPDZrri, VPCMPEQDZrr, VPCMPGTDZrr)241FROM_TO(VPCMPDZrrik, VPCMPEQDZrrk, VPCMPGTDZrrk)242FROM_TO(VPCMPQZ128rmi, VPCMPEQQZ128rm, VPCMPGTQZ128rm)243FROM_TO(VPCMPQZ128rmib, VPCMPEQQZ128rmb, VPCMPGTQZ128rmb)244FROM_TO(VPCMPQZ128rmibk, VPCMPEQQZ128rmbk, VPCMPGTQZ128rmbk)245FROM_TO(VPCMPQZ128rmik, VPCMPEQQZ128rmk, VPCMPGTQZ128rmk)246FROM_TO(VPCMPQZ128rri, VPCMPEQQZ128rr, VPCMPGTQZ128rr)247FROM_TO(VPCMPQZ128rrik, VPCMPEQQZ128rrk, VPCMPGTQZ128rrk)248FROM_TO(VPCMPQZ256rmi, VPCMPEQQZ256rm, VPCMPGTQZ256rm)249FROM_TO(VPCMPQZ256rmib, VPCMPEQQZ256rmb, VPCMPGTQZ256rmb)250FROM_TO(VPCMPQZ256rmibk, VPCMPEQQZ256rmbk, VPCMPGTQZ256rmbk)251FROM_TO(VPCMPQZ256rmik, VPCMPEQQZ256rmk, VPCMPGTQZ256rmk)252FROM_TO(VPCMPQZ256rri, VPCMPEQQZ256rr, VPCMPGTQZ256rr)253FROM_TO(VPCMPQZ256rrik, VPCMPEQQZ256rrk, VPCMPGTQZ256rrk)254FROM_TO(VPCMPQZrmi, VPCMPEQQZrm, VPCMPGTQZrm)255FROM_TO(VPCMPQZrmib, VPCMPEQQZrmb, VPCMPGTQZrmb)256FROM_TO(VPCMPQZrmibk, VPCMPEQQZrmbk, VPCMPGTQZrmbk)257FROM_TO(VPCMPQZrmik, VPCMPEQQZrmk, VPCMPGTQZrmk)258FROM_TO(VPCMPQZrri, VPCMPEQQZrr, VPCMPGTQZrr)259FROM_TO(VPCMPQZrrik, VPCMPEQQZrrk, VPCMPGTQZrrk)260FROM_TO(VPCMPWZ128rmi, VPCMPEQWZ128rm, VPCMPGTWZ128rm)261FROM_TO(VPCMPWZ128rmik, VPCMPEQWZ128rmk, VPCMPGTWZ128rmk)262FROM_TO(VPCMPWZ128rri, VPCMPEQWZ128rr, VPCMPGTWZ128rr)263FROM_TO(VPCMPWZ128rrik, VPCMPEQWZ128rrk, VPCMPGTWZ128rrk)264FROM_TO(VPCMPWZ256rmi, VPCMPEQWZ256rm, VPCMPGTWZ256rm)265FROM_TO(VPCMPWZ256rmik, VPCMPEQWZ256rmk, VPCMPGTWZ256rmk)266FROM_TO(VPCMPWZ256rri, VPCMPEQWZ256rr, VPCMPGTWZ256rr)267FROM_TO(VPCMPWZ256rrik, VPCMPEQWZ256rrk, VPCMPGTWZ256rrk)268FROM_TO(VPCMPWZrmi, VPCMPEQWZrm, VPCMPGTWZrm)269FROM_TO(VPCMPWZrmik, VPCMPEQWZrmk, VPCMPGTWZrmk)270FROM_TO(VPCMPWZrri, VPCMPEQWZrr, VPCMPGTWZrr)271FROM_TO(VPCMPWZrrik, VPCMPEQWZrrk, VPCMPGTWZrrk)272#undef FROM_TO273}274MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1);275int64_t Imm = LastOp.getImm();276unsigned NewOpc;277if (Imm == 0)278NewOpc = Opc1;279else if(Imm == 6)280NewOpc = Opc2;281else282return false;283MI.setOpcode(NewOpc);284MI.erase(&LastOp);285return true;286}287288bool X86::optimizeMOVSX(MCInst &MI) {289unsigned NewOpc;290#define FROM_TO(FROM, TO, R0, R1) \291case X86::FROM: \292if (MI.getOperand(0).getReg() != X86::R0 || \293MI.getOperand(1).getReg() != X86::R1) \294return false; \295NewOpc = X86::TO; \296break;297switch (MI.getOpcode()) {298default:299return false;300FROM_TO(MOVSX16rr8, CBW, AX, AL) // movsbw %al, %ax --> cbtw301FROM_TO(MOVSX32rr16, CWDE, EAX, AX) // movswl %ax, %eax --> cwtl302FROM_TO(MOVSX64rr32, CDQE, RAX, EAX) // movslq %eax, %rax --> cltq303#undef FROM_TO304}305MI.clear();306MI.setOpcode(NewOpc);307return true;308}309310bool X86::optimizeINCDEC(MCInst &MI, bool In64BitMode) {311if (In64BitMode)312return false;313unsigned NewOpc;314// If we aren't in 64-bit mode we can use the 1-byte inc/dec instructions.315#define FROM_TO(FROM, TO) \316case X86::FROM: \317NewOpc = X86::TO; \318break;319switch (MI.getOpcode()) {320default:321return false;322FROM_TO(DEC16r, DEC16r_alt)323FROM_TO(DEC32r, DEC32r_alt)324FROM_TO(INC16r, INC16r_alt)325FROM_TO(INC32r, INC32r_alt)326}327MI.setOpcode(NewOpc);328return true;329}330331static bool isARegister(unsigned Reg) {332return Reg == X86::AL || Reg == X86::AX || Reg == X86::EAX || Reg == X86::RAX;333}334335/// Simplify things like MOV32rm to MOV32o32a.336bool X86::optimizeMOV(MCInst &MI, bool In64BitMode) {337// Don't make these simplifications in 64-bit mode; other assemblers don't338// perform them because they make the code larger.339if (In64BitMode)340return false;341unsigned NewOpc;342// We don't currently select the correct instruction form for instructions343// which have a short %eax, etc. form. Handle this by custom lowering, for344// now.345//346// Note, we are currently not handling the following instructions:347// MOV64ao8, MOV64o8a348// XCHG16ar, XCHG32ar, XCHG64ar349switch (MI.getOpcode()) {350default:351return false;352FROM_TO(MOV8mr_NOREX, MOV8o32a)353FROM_TO(MOV8mr, MOV8o32a)354FROM_TO(MOV8rm_NOREX, MOV8ao32)355FROM_TO(MOV8rm, MOV8ao32)356FROM_TO(MOV16mr, MOV16o32a)357FROM_TO(MOV16rm, MOV16ao32)358FROM_TO(MOV32mr, MOV32o32a)359FROM_TO(MOV32rm, MOV32ao32)360}361bool IsStore = MI.getOperand(0).isReg() && MI.getOperand(1).isReg();362unsigned AddrBase = IsStore;363unsigned RegOp = IsStore ? 0 : 5;364unsigned AddrOp = AddrBase + 3;365// Check whether the destination register can be fixed.366unsigned Reg = MI.getOperand(RegOp).getReg();367if (!isARegister(Reg))368return false;369// Check whether this is an absolute address.370// FIXME: We know TLVP symbol refs aren't, but there should be a better way371// to do this here.372bool Absolute = true;373if (MI.getOperand(AddrOp).isExpr()) {374const MCExpr *MCE = MI.getOperand(AddrOp).getExpr();375if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))376if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)377Absolute = false;378}379if (Absolute && (MI.getOperand(AddrBase + X86::AddrBaseReg).getReg() != 0 ||380MI.getOperand(AddrBase + X86::AddrScaleAmt).getImm() != 1 ||381MI.getOperand(AddrBase + X86::AddrIndexReg).getReg() != 0))382return false;383// If so, rewrite the instruction.384MCOperand Saved = MI.getOperand(AddrOp);385MCOperand Seg = MI.getOperand(AddrBase + X86::AddrSegmentReg);386MI.clear();387MI.setOpcode(NewOpc);388MI.addOperand(Saved);389MI.addOperand(Seg);390return true;391}392393/// Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with394/// a short fixed-register form.395static bool optimizeToFixedRegisterForm(MCInst &MI) {396unsigned NewOpc;397switch (MI.getOpcode()) {398default:399return false;400FROM_TO(ADC8ri, ADC8i8)401FROM_TO(ADC16ri, ADC16i16)402FROM_TO(ADC32ri, ADC32i32)403FROM_TO(ADC64ri32, ADC64i32)404FROM_TO(ADD8ri, ADD8i8)405FROM_TO(ADD16ri, ADD16i16)406FROM_TO(ADD32ri, ADD32i32)407FROM_TO(ADD64ri32, ADD64i32)408FROM_TO(AND8ri, AND8i8)409FROM_TO(AND16ri, AND16i16)410FROM_TO(AND32ri, AND32i32)411FROM_TO(AND64ri32, AND64i32)412FROM_TO(CMP8ri, CMP8i8)413FROM_TO(CMP16ri, CMP16i16)414FROM_TO(CMP32ri, CMP32i32)415FROM_TO(CMP64ri32, CMP64i32)416FROM_TO(OR8ri, OR8i8)417FROM_TO(OR16ri, OR16i16)418FROM_TO(OR32ri, OR32i32)419FROM_TO(OR64ri32, OR64i32)420FROM_TO(SBB8ri, SBB8i8)421FROM_TO(SBB16ri, SBB16i16)422FROM_TO(SBB32ri, SBB32i32)423FROM_TO(SBB64ri32, SBB64i32)424FROM_TO(SUB8ri, SUB8i8)425FROM_TO(SUB16ri, SUB16i16)426FROM_TO(SUB32ri, SUB32i32)427FROM_TO(SUB64ri32, SUB64i32)428FROM_TO(TEST8ri, TEST8i8)429FROM_TO(TEST16ri, TEST16i16)430FROM_TO(TEST32ri, TEST32i32)431FROM_TO(TEST64ri32, TEST64i32)432FROM_TO(XOR8ri, XOR8i8)433FROM_TO(XOR16ri, XOR16i16)434FROM_TO(XOR32ri, XOR32i32)435FROM_TO(XOR64ri32, XOR64i32)436}437// Check whether the destination register can be fixed.438unsigned Reg = MI.getOperand(0).getReg();439if (!isARegister(Reg))440return false;441442// If so, rewrite the instruction.443MCOperand Saved = MI.getOperand(MI.getNumOperands() - 1);444MI.clear();445MI.setOpcode(NewOpc);446MI.addOperand(Saved);447return true;448}449450unsigned X86::getOpcodeForShortImmediateForm(unsigned Opcode) {451#define ENTRY(LONG, SHORT) \452case X86::LONG: \453return X86::SHORT;454switch (Opcode) {455default:456return Opcode;457#include "X86EncodingOptimizationForImmediate.def"458}459}460461unsigned X86::getOpcodeForLongImmediateForm(unsigned Opcode) {462#define ENTRY(LONG, SHORT) \463case X86::SHORT: \464return X86::LONG;465switch (Opcode) {466default:467return Opcode;468#include "X86EncodingOptimizationForImmediate.def"469}470}471472static bool optimizeToShortImmediateForm(MCInst &MI) {473unsigned NewOpc;474#define ENTRY(LONG, SHORT) \475case X86::LONG: \476NewOpc = X86::SHORT; \477break;478switch (MI.getOpcode()) {479default:480return false;481#include "X86EncodingOptimizationForImmediate.def"482}483unsigned SkipOperands = X86::isCCMPCC(MI.getOpcode()) ? 2 : 0;484MCOperand &LastOp = MI.getOperand(MI.getNumOperands() - 1 - SkipOperands);485if (LastOp.isExpr()) {486const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(LastOp.getExpr());487if (!SRE || SRE->getKind() != MCSymbolRefExpr::VK_X86_ABS8)488return false;489} else if (LastOp.isImm()) {490if (!isInt<8>(LastOp.getImm()))491return false;492}493MI.setOpcode(NewOpc);494return true;495}496497bool X86::optimizeToFixedRegisterOrShortImmediateForm(MCInst &MI) {498// We may optimize twice here.499bool ShortImm = optimizeToShortImmediateForm(MI);500bool FixedReg = optimizeToFixedRegisterForm(MI);501return ShortImm || FixedReg;502}503504505