Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FixupInstTuning.cpp
35294 views
//===-- X86FixupInstTunings.cpp - replace instructions -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file does a tuning pass replacing slower machine instructions9// with faster ones. We do this here, as opposed to during normal ISel, as10// attempting to get the "right" instruction can break patterns. This pass11// is not meant search for special cases where an instruction can be transformed12// to another, it is only meant to do transformations where the old instruction13// is always replacable with the new instructions. For example:14//15// `vpermq ymm` -> `vshufd ymm`16// -- BAD, not always valid (lane cross/non-repeated mask)17//18// `vpermilps ymm` -> `vshufd ymm`19// -- GOOD, always replaceable20//21//===----------------------------------------------------------------------===//2223#include "X86.h"24#include "X86InstrInfo.h"25#include "X86Subtarget.h"26#include "llvm/ADT/Statistic.h"27#include "llvm/CodeGen/MachineFunctionPass.h"28#include "llvm/CodeGen/MachineInstrBuilder.h"29#include "llvm/CodeGen/MachineRegisterInfo.h"3031using namespace llvm;3233#define DEBUG_TYPE "x86-fixup-inst-tuning"3435STATISTIC(NumInstChanges, "Number of instructions changes");3637namespace {38class X86FixupInstTuningPass : public MachineFunctionPass {39public:40static char ID;4142X86FixupInstTuningPass() : MachineFunctionPass(ID) {}4344StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }4546bool runOnMachineFunction(MachineFunction &MF) override;47bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,48MachineBasicBlock::iterator &I);4950// This pass runs after regalloc and doesn't support VReg operands.51MachineFunctionProperties getRequiredProperties() const override {52return MachineFunctionProperties().set(53MachineFunctionProperties::Property::NoVRegs);54}5556private:57const X86InstrInfo *TII = nullptr;58const X86Subtarget *ST = nullptr;59const MCSchedModel *SM = nullptr;60};61} // end anonymous namespace6263char X86FixupInstTuningPass::ID = 0;6465INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)6667FunctionPass *llvm::createX86FixupInstTuning() {68return new X86FixupInstTuningPass();69}7071template <typename T>72static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {73if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)74return *NewVal < *CurVal;7576return std::nullopt;77}7879bool X86FixupInstTuningPass::processInstruction(80MachineFunction &MF, MachineBasicBlock &MBB,81MachineBasicBlock::iterator &I) {82MachineInstr &MI = *I;83unsigned Opc = MI.getOpcode();84unsigned NumOperands = MI.getDesc().getNumOperands();8586auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {87// We already checked that SchedModel exists in `NewOpcPreferable`.88return MCSchedModel::getReciprocalThroughput(89*ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));90};9192auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {93// We already checked that SchedModel exists in `NewOpcPreferable`.94return MCSchedModel::computeInstrLatency(95*ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));96};9798auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {99if (unsigned Size = TII->get(Opcode).getSize())100return Size;101// Zero size means we where unable to compute it.102return std::nullopt;103};104105auto NewOpcPreferable = [&](unsigned NewOpc,106bool ReplaceInTie = true) -> bool {107std::optional<bool> Res;108if (SM->hasInstrSchedModel()) {109// Compare tput -> lat -> code size.110Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));111if (Res.has_value())112return *Res;113114Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));115if (Res.has_value())116return *Res;117}118119Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));120if (Res.has_value())121return *Res;122123// We either have either were unable to get tput/lat/codesize or all values124// were equal. Return specified option for a tie.125return ReplaceInTie;126};127128// `vpermilpd r, i` -> `vshufpd r, r, i`129// `vpermilpd r, i, k` -> `vshufpd r, r, i, k`130// `vshufpd` is always as fast or faster than `vpermilpd` and takes131// 1 less byte of code size for VEX and EVEX encoding.132auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {133if (!NewOpcPreferable(NewOpc))134return false;135unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();136MI.removeOperand(NumOperands - 1);137MI.addOperand(MI.getOperand(NumOperands - 2));138MI.setDesc(TII->get(NewOpc));139MI.addOperand(MachineOperand::CreateImm(MaskImm));140return true;141};142143// `vpermilps r, i` -> `vshufps r, r, i`144// `vpermilps r, i, k` -> `vshufps r, r, i, k`145// `vshufps` is always as fast or faster than `vpermilps` and takes146// 1 less byte of code size for VEX and EVEX encoding.147auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {148if (!NewOpcPreferable(NewOpc))149return false;150unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();151MI.removeOperand(NumOperands - 1);152MI.addOperand(MI.getOperand(NumOperands - 2));153MI.setDesc(TII->get(NewOpc));154MI.addOperand(MachineOperand::CreateImm(MaskImm));155return true;156};157158// `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.159// `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less160// byte of code size.161auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {162// TODO: Might be work adding bypass delay if -Os/-Oz is enabled as163// `vpshufd` saves a byte of code size.164if (!ST->hasNoDomainDelayShuffle() ||165!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))166return false;167MI.setDesc(TII->get(NewOpc));168return true;169};170171// `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`172// `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`173// `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`174// `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`175// `vunpcklpd r, m` -> `vunpcklqdq r, m, k`176// `vunpckhpd r, m` -> `vunpckhqdq r, m, k`177// `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`178// `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`179// 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`180// -> `vunpck{l|h}qdq`181// 2) If `vshufpd` faster than `vunpck{l|h}pd`182// -> `vshufpd`183//184// `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)185auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {186if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))187return false;188189MI.setDesc(TII->get(NewOpc));190MI.addOperand(MachineOperand::CreateImm(MaskImm));191return true;192};193194auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {195// TODO it may be worth it to set ReplaceInTie to `true` as there is no real196// downside to the integer unpck, but if someone doesn't specify exact197// target we won't find it faster.198if (!ST->hasNoDomainDelayShuffle() ||199!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))200return false;201MI.setDesc(TII->get(NewOpc));202return true;203};204205auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,206unsigned NewOpc) -> bool {207if (ProcessUNPCKToIntDomain(NewOpcIntDomain))208return true;209return ProcessUNPCK(NewOpc, 0x00);210};211auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,212unsigned NewOpc) -> bool {213if (ProcessUNPCKToIntDomain(NewOpcIntDomain))214return true;215return ProcessUNPCK(NewOpc, 0xff);216};217218auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {219return ProcessUNPCKToIntDomain(NewOpcIntDomain);220};221222auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {223return ProcessUNPCKToIntDomain(NewOpc);224};225226switch (Opc) {227case X86::VPERMILPDri:228return ProcessVPERMILPDri(X86::VSHUFPDrri);229case X86::VPERMILPDYri:230return ProcessVPERMILPDri(X86::VSHUFPDYrri);231case X86::VPERMILPDZ128ri:232return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);233case X86::VPERMILPDZ256ri:234return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);235case X86::VPERMILPDZri:236return ProcessVPERMILPDri(X86::VSHUFPDZrri);237case X86::VPERMILPDZ128rikz:238return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);239case X86::VPERMILPDZ256rikz:240return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);241case X86::VPERMILPDZrikz:242return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);243case X86::VPERMILPDZ128rik:244return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);245case X86::VPERMILPDZ256rik:246return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);247case X86::VPERMILPDZrik:248return ProcessVPERMILPDri(X86::VSHUFPDZrrik);249250case X86::VPERMILPSri:251return ProcessVPERMILPSri(X86::VSHUFPSrri);252case X86::VPERMILPSYri:253return ProcessVPERMILPSri(X86::VSHUFPSYrri);254case X86::VPERMILPSZ128ri:255return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);256case X86::VPERMILPSZ256ri:257return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);258case X86::VPERMILPSZri:259return ProcessVPERMILPSri(X86::VSHUFPSZrri);260case X86::VPERMILPSZ128rikz:261return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);262case X86::VPERMILPSZ256rikz:263return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);264case X86::VPERMILPSZrikz:265return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);266case X86::VPERMILPSZ128rik:267return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);268case X86::VPERMILPSZ256rik:269return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);270case X86::VPERMILPSZrik:271return ProcessVPERMILPSri(X86::VSHUFPSZrrik);272case X86::VPERMILPSmi:273return ProcessVPERMILPSmi(X86::VPSHUFDmi);274case X86::VPERMILPSYmi:275// TODO: See if there is a more generic way we can test if the replacement276// instruction is supported.277return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;278case X86::VPERMILPSZ128mi:279return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);280case X86::VPERMILPSZ256mi:281return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);282case X86::VPERMILPSZmi:283return ProcessVPERMILPSmi(X86::VPSHUFDZmi);284case X86::VPERMILPSZ128mikz:285return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);286case X86::VPERMILPSZ256mikz:287return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);288case X86::VPERMILPSZmikz:289return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);290case X86::VPERMILPSZ128mik:291return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);292case X86::VPERMILPSZ256mik:293return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);294case X86::VPERMILPSZmik:295return ProcessVPERMILPSmi(X86::VPSHUFDZmik);296297case X86::MOVLHPSrr:298case X86::UNPCKLPDrr:299return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);300case X86::VMOVLHPSrr:301case X86::VUNPCKLPDrr:302return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);303case X86::VUNPCKLPDYrr:304return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);305// VMOVLHPS is always 128 bits.306case X86::VMOVLHPSZrr:307case X86::VUNPCKLPDZ128rr:308return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);309case X86::VUNPCKLPDZ256rr:310return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);311case X86::VUNPCKLPDZrr:312return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);313case X86::VUNPCKLPDZ128rrk:314return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);315case X86::VUNPCKLPDZ256rrk:316return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);317case X86::VUNPCKLPDZrrk:318return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);319case X86::VUNPCKLPDZ128rrkz:320return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);321case X86::VUNPCKLPDZ256rrkz:322return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);323case X86::VUNPCKLPDZrrkz:324return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);325case X86::UNPCKHPDrr:326return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);327case X86::VUNPCKHPDrr:328return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);329case X86::VUNPCKHPDYrr:330return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);331case X86::VUNPCKHPDZ128rr:332return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);333case X86::VUNPCKHPDZ256rr:334return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);335case X86::VUNPCKHPDZrr:336return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);337case X86::VUNPCKHPDZ128rrk:338return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);339case X86::VUNPCKHPDZ256rrk:340return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);341case X86::VUNPCKHPDZrrk:342return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);343case X86::VUNPCKHPDZ128rrkz:344return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);345case X86::VUNPCKHPDZ256rrkz:346return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);347case X86::VUNPCKHPDZrrkz:348return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);349case X86::UNPCKLPDrm:350return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);351case X86::VUNPCKLPDrm:352return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);353case X86::VUNPCKLPDYrm:354return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);355case X86::VUNPCKLPDZ128rm:356return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);357case X86::VUNPCKLPDZ256rm:358return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);359case X86::VUNPCKLPDZrm:360return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);361case X86::VUNPCKLPDZ128rmk:362return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);363case X86::VUNPCKLPDZ256rmk:364return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);365case X86::VUNPCKLPDZrmk:366return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);367case X86::VUNPCKLPDZ128rmkz:368return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);369case X86::VUNPCKLPDZ256rmkz:370return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);371case X86::VUNPCKLPDZrmkz:372return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);373case X86::UNPCKHPDrm:374return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);375case X86::VUNPCKHPDrm:376return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);377case X86::VUNPCKHPDYrm:378return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);379case X86::VUNPCKHPDZ128rm:380return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);381case X86::VUNPCKHPDZ256rm:382return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);383case X86::VUNPCKHPDZrm:384return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);385case X86::VUNPCKHPDZ128rmk:386return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);387case X86::VUNPCKHPDZ256rmk:388return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);389case X86::VUNPCKHPDZrmk:390return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);391case X86::VUNPCKHPDZ128rmkz:392return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);393case X86::VUNPCKHPDZ256rmkz:394return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);395case X86::VUNPCKHPDZrmkz:396return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);397398case X86::UNPCKLPSrr:399return ProcessUNPCKPS(X86::PUNPCKLDQrr);400case X86::VUNPCKLPSrr:401return ProcessUNPCKPS(X86::VPUNPCKLDQrr);402case X86::VUNPCKLPSYrr:403return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);404case X86::VUNPCKLPSZ128rr:405return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);406case X86::VUNPCKLPSZ256rr:407return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);408case X86::VUNPCKLPSZrr:409return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);410case X86::VUNPCKLPSZ128rrk:411return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);412case X86::VUNPCKLPSZ256rrk:413return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);414case X86::VUNPCKLPSZrrk:415return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);416case X86::VUNPCKLPSZ128rrkz:417return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);418case X86::VUNPCKLPSZ256rrkz:419return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);420case X86::VUNPCKLPSZrrkz:421return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);422case X86::UNPCKHPSrr:423return ProcessUNPCKPS(X86::PUNPCKHDQrr);424case X86::VUNPCKHPSrr:425return ProcessUNPCKPS(X86::VPUNPCKHDQrr);426case X86::VUNPCKHPSYrr:427return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);428case X86::VUNPCKHPSZ128rr:429return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);430case X86::VUNPCKHPSZ256rr:431return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);432case X86::VUNPCKHPSZrr:433return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);434case X86::VUNPCKHPSZ128rrk:435return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);436case X86::VUNPCKHPSZ256rrk:437return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);438case X86::VUNPCKHPSZrrk:439return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);440case X86::VUNPCKHPSZ128rrkz:441return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);442case X86::VUNPCKHPSZ256rrkz:443return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);444case X86::VUNPCKHPSZrrkz:445return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);446case X86::UNPCKLPSrm:447return ProcessUNPCKPS(X86::PUNPCKLDQrm);448case X86::VUNPCKLPSrm:449return ProcessUNPCKPS(X86::VPUNPCKLDQrm);450case X86::VUNPCKLPSYrm:451return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);452case X86::VUNPCKLPSZ128rm:453return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);454case X86::VUNPCKLPSZ256rm:455return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);456case X86::VUNPCKLPSZrm:457return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);458case X86::VUNPCKLPSZ128rmk:459return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);460case X86::VUNPCKLPSZ256rmk:461return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);462case X86::VUNPCKLPSZrmk:463return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);464case X86::VUNPCKLPSZ128rmkz:465return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);466case X86::VUNPCKLPSZ256rmkz:467return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);468case X86::VUNPCKLPSZrmkz:469return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);470case X86::UNPCKHPSrm:471return ProcessUNPCKPS(X86::PUNPCKHDQrm);472case X86::VUNPCKHPSrm:473return ProcessUNPCKPS(X86::VPUNPCKHDQrm);474case X86::VUNPCKHPSYrm:475return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);476case X86::VUNPCKHPSZ128rm:477return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);478case X86::VUNPCKHPSZ256rm:479return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);480case X86::VUNPCKHPSZrm:481return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);482case X86::VUNPCKHPSZ128rmk:483return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);484case X86::VUNPCKHPSZ256rmk:485return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);486case X86::VUNPCKHPSZrmk:487return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);488case X86::VUNPCKHPSZ128rmkz:489return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);490case X86::VUNPCKHPSZ256rmkz:491return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);492case X86::VUNPCKHPSZrmkz:493return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);494default:495return false;496}497}498499bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {500LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);501bool Changed = false;502ST = &MF.getSubtarget<X86Subtarget>();503TII = ST->getInstrInfo();504SM = &ST->getSchedModel();505506for (MachineBasicBlock &MBB : MF) {507for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {508if (processInstruction(MF, MBB, I)) {509++NumInstChanges;510Changed = true;511}512}513}514LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);515return Changed;516}517518519