Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FixupInstTuning.cpp
35294 views
1
//===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file does a tuning pass replacing slower machine instructions
10
// with faster ones. We do this here, as opposed to during normal ISel, as
11
// attempting to get the "right" instruction can break patterns. This pass
12
// is not meant search for special cases where an instruction can be transformed
13
// to another, it is only meant to do transformations where the old instruction
14
// is always replacable with the new instructions. For example:
15
//
16
// `vpermq ymm` -> `vshufd ymm`
17
// -- BAD, not always valid (lane cross/non-repeated mask)
18
//
19
// `vpermilps ymm` -> `vshufd ymm`
20
// -- GOOD, always replaceable
21
//
22
//===----------------------------------------------------------------------===//
23
24
#include "X86.h"
25
#include "X86InstrInfo.h"
26
#include "X86Subtarget.h"
27
#include "llvm/ADT/Statistic.h"
28
#include "llvm/CodeGen/MachineFunctionPass.h"
29
#include "llvm/CodeGen/MachineInstrBuilder.h"
30
#include "llvm/CodeGen/MachineRegisterInfo.h"
31
32
using namespace llvm;
33
34
#define DEBUG_TYPE "x86-fixup-inst-tuning"
35
36
STATISTIC(NumInstChanges, "Number of instructions changes");
37
38
namespace {
39
class X86FixupInstTuningPass : public MachineFunctionPass {
40
public:
41
static char ID;
42
43
X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
44
45
StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
46
47
bool runOnMachineFunction(MachineFunction &MF) override;
48
bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
49
MachineBasicBlock::iterator &I);
50
51
// This pass runs after regalloc and doesn't support VReg operands.
52
MachineFunctionProperties getRequiredProperties() const override {
53
return MachineFunctionProperties().set(
54
MachineFunctionProperties::Property::NoVRegs);
55
}
56
57
private:
58
const X86InstrInfo *TII = nullptr;
59
const X86Subtarget *ST = nullptr;
60
const MCSchedModel *SM = nullptr;
61
};
62
} // end anonymous namespace
63
64
char X86FixupInstTuningPass::ID = 0;
65
66
INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
67
68
FunctionPass *llvm::createX86FixupInstTuning() {
69
return new X86FixupInstTuningPass();
70
}
71
72
template <typename T>
73
static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
74
if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
75
return *NewVal < *CurVal;
76
77
return std::nullopt;
78
}
79
80
bool X86FixupInstTuningPass::processInstruction(
81
MachineFunction &MF, MachineBasicBlock &MBB,
82
MachineBasicBlock::iterator &I) {
83
MachineInstr &MI = *I;
84
unsigned Opc = MI.getOpcode();
85
unsigned NumOperands = MI.getDesc().getNumOperands();
86
87
auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
88
// We already checked that SchedModel exists in `NewOpcPreferable`.
89
return MCSchedModel::getReciprocalThroughput(
90
*ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
91
};
92
93
auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
94
// We already checked that SchedModel exists in `NewOpcPreferable`.
95
return MCSchedModel::computeInstrLatency(
96
*ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
97
};
98
99
auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
100
if (unsigned Size = TII->get(Opcode).getSize())
101
return Size;
102
// Zero size means we where unable to compute it.
103
return std::nullopt;
104
};
105
106
auto NewOpcPreferable = [&](unsigned NewOpc,
107
bool ReplaceInTie = true) -> bool {
108
std::optional<bool> Res;
109
if (SM->hasInstrSchedModel()) {
110
// Compare tput -> lat -> code size.
111
Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
112
if (Res.has_value())
113
return *Res;
114
115
Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
116
if (Res.has_value())
117
return *Res;
118
}
119
120
Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
121
if (Res.has_value())
122
return *Res;
123
124
// We either have either were unable to get tput/lat/codesize or all values
125
// were equal. Return specified option for a tie.
126
return ReplaceInTie;
127
};
128
129
// `vpermilpd r, i` -> `vshufpd r, r, i`
130
// `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
131
// `vshufpd` is always as fast or faster than `vpermilpd` and takes
132
// 1 less byte of code size for VEX and EVEX encoding.
133
auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
134
if (!NewOpcPreferable(NewOpc))
135
return false;
136
unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
137
MI.removeOperand(NumOperands - 1);
138
MI.addOperand(MI.getOperand(NumOperands - 2));
139
MI.setDesc(TII->get(NewOpc));
140
MI.addOperand(MachineOperand::CreateImm(MaskImm));
141
return true;
142
};
143
144
// `vpermilps r, i` -> `vshufps r, r, i`
145
// `vpermilps r, i, k` -> `vshufps r, r, i, k`
146
// `vshufps` is always as fast or faster than `vpermilps` and takes
147
// 1 less byte of code size for VEX and EVEX encoding.
148
auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
149
if (!NewOpcPreferable(NewOpc))
150
return false;
151
unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
152
MI.removeOperand(NumOperands - 1);
153
MI.addOperand(MI.getOperand(NumOperands - 2));
154
MI.setDesc(TII->get(NewOpc));
155
MI.addOperand(MachineOperand::CreateImm(MaskImm));
156
return true;
157
};
158
159
// `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
160
// `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
161
// byte of code size.
162
auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
163
// TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
164
// `vpshufd` saves a byte of code size.
165
if (!ST->hasNoDomainDelayShuffle() ||
166
!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
167
return false;
168
MI.setDesc(TII->get(NewOpc));
169
return true;
170
};
171
172
// `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
173
// `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
174
// `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
175
// `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
176
// `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
177
// `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
178
// `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
179
// `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
180
// 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
181
// -> `vunpck{l|h}qdq`
182
// 2) If `vshufpd` faster than `vunpck{l|h}pd`
183
// -> `vshufpd`
184
//
185
// `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
186
auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
187
if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
188
return false;
189
190
MI.setDesc(TII->get(NewOpc));
191
MI.addOperand(MachineOperand::CreateImm(MaskImm));
192
return true;
193
};
194
195
auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
196
// TODO it may be worth it to set ReplaceInTie to `true` as there is no real
197
// downside to the integer unpck, but if someone doesn't specify exact
198
// target we won't find it faster.
199
if (!ST->hasNoDomainDelayShuffle() ||
200
!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
201
return false;
202
MI.setDesc(TII->get(NewOpc));
203
return true;
204
};
205
206
auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
207
unsigned NewOpc) -> bool {
208
if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
209
return true;
210
return ProcessUNPCK(NewOpc, 0x00);
211
};
212
auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
213
unsigned NewOpc) -> bool {
214
if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
215
return true;
216
return ProcessUNPCK(NewOpc, 0xff);
217
};
218
219
auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
220
return ProcessUNPCKToIntDomain(NewOpcIntDomain);
221
};
222
223
auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
224
return ProcessUNPCKToIntDomain(NewOpc);
225
};
226
227
switch (Opc) {
228
case X86::VPERMILPDri:
229
return ProcessVPERMILPDri(X86::VSHUFPDrri);
230
case X86::VPERMILPDYri:
231
return ProcessVPERMILPDri(X86::VSHUFPDYrri);
232
case X86::VPERMILPDZ128ri:
233
return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
234
case X86::VPERMILPDZ256ri:
235
return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
236
case X86::VPERMILPDZri:
237
return ProcessVPERMILPDri(X86::VSHUFPDZrri);
238
case X86::VPERMILPDZ128rikz:
239
return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
240
case X86::VPERMILPDZ256rikz:
241
return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
242
case X86::VPERMILPDZrikz:
243
return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
244
case X86::VPERMILPDZ128rik:
245
return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
246
case X86::VPERMILPDZ256rik:
247
return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
248
case X86::VPERMILPDZrik:
249
return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
250
251
case X86::VPERMILPSri:
252
return ProcessVPERMILPSri(X86::VSHUFPSrri);
253
case X86::VPERMILPSYri:
254
return ProcessVPERMILPSri(X86::VSHUFPSYrri);
255
case X86::VPERMILPSZ128ri:
256
return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
257
case X86::VPERMILPSZ256ri:
258
return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
259
case X86::VPERMILPSZri:
260
return ProcessVPERMILPSri(X86::VSHUFPSZrri);
261
case X86::VPERMILPSZ128rikz:
262
return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
263
case X86::VPERMILPSZ256rikz:
264
return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
265
case X86::VPERMILPSZrikz:
266
return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
267
case X86::VPERMILPSZ128rik:
268
return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
269
case X86::VPERMILPSZ256rik:
270
return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
271
case X86::VPERMILPSZrik:
272
return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
273
case X86::VPERMILPSmi:
274
return ProcessVPERMILPSmi(X86::VPSHUFDmi);
275
case X86::VPERMILPSYmi:
276
// TODO: See if there is a more generic way we can test if the replacement
277
// instruction is supported.
278
return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
279
case X86::VPERMILPSZ128mi:
280
return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
281
case X86::VPERMILPSZ256mi:
282
return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
283
case X86::VPERMILPSZmi:
284
return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
285
case X86::VPERMILPSZ128mikz:
286
return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
287
case X86::VPERMILPSZ256mikz:
288
return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
289
case X86::VPERMILPSZmikz:
290
return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
291
case X86::VPERMILPSZ128mik:
292
return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
293
case X86::VPERMILPSZ256mik:
294
return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
295
case X86::VPERMILPSZmik:
296
return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
297
298
case X86::MOVLHPSrr:
299
case X86::UNPCKLPDrr:
300
return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
301
case X86::VMOVLHPSrr:
302
case X86::VUNPCKLPDrr:
303
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
304
case X86::VUNPCKLPDYrr:
305
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
306
// VMOVLHPS is always 128 bits.
307
case X86::VMOVLHPSZrr:
308
case X86::VUNPCKLPDZ128rr:
309
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
310
case X86::VUNPCKLPDZ256rr:
311
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
312
case X86::VUNPCKLPDZrr:
313
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
314
case X86::VUNPCKLPDZ128rrk:
315
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
316
case X86::VUNPCKLPDZ256rrk:
317
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
318
case X86::VUNPCKLPDZrrk:
319
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
320
case X86::VUNPCKLPDZ128rrkz:
321
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
322
case X86::VUNPCKLPDZ256rrkz:
323
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
324
case X86::VUNPCKLPDZrrkz:
325
return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
326
case X86::UNPCKHPDrr:
327
return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
328
case X86::VUNPCKHPDrr:
329
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
330
case X86::VUNPCKHPDYrr:
331
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
332
case X86::VUNPCKHPDZ128rr:
333
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
334
case X86::VUNPCKHPDZ256rr:
335
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
336
case X86::VUNPCKHPDZrr:
337
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
338
case X86::VUNPCKHPDZ128rrk:
339
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
340
case X86::VUNPCKHPDZ256rrk:
341
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
342
case X86::VUNPCKHPDZrrk:
343
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
344
case X86::VUNPCKHPDZ128rrkz:
345
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
346
case X86::VUNPCKHPDZ256rrkz:
347
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
348
case X86::VUNPCKHPDZrrkz:
349
return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
350
case X86::UNPCKLPDrm:
351
return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
352
case X86::VUNPCKLPDrm:
353
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
354
case X86::VUNPCKLPDYrm:
355
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
356
case X86::VUNPCKLPDZ128rm:
357
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
358
case X86::VUNPCKLPDZ256rm:
359
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
360
case X86::VUNPCKLPDZrm:
361
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
362
case X86::VUNPCKLPDZ128rmk:
363
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
364
case X86::VUNPCKLPDZ256rmk:
365
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
366
case X86::VUNPCKLPDZrmk:
367
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
368
case X86::VUNPCKLPDZ128rmkz:
369
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
370
case X86::VUNPCKLPDZ256rmkz:
371
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
372
case X86::VUNPCKLPDZrmkz:
373
return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
374
case X86::UNPCKHPDrm:
375
return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
376
case X86::VUNPCKHPDrm:
377
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
378
case X86::VUNPCKHPDYrm:
379
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
380
case X86::VUNPCKHPDZ128rm:
381
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
382
case X86::VUNPCKHPDZ256rm:
383
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
384
case X86::VUNPCKHPDZrm:
385
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
386
case X86::VUNPCKHPDZ128rmk:
387
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
388
case X86::VUNPCKHPDZ256rmk:
389
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
390
case X86::VUNPCKHPDZrmk:
391
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
392
case X86::VUNPCKHPDZ128rmkz:
393
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
394
case X86::VUNPCKHPDZ256rmkz:
395
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
396
case X86::VUNPCKHPDZrmkz:
397
return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
398
399
case X86::UNPCKLPSrr:
400
return ProcessUNPCKPS(X86::PUNPCKLDQrr);
401
case X86::VUNPCKLPSrr:
402
return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
403
case X86::VUNPCKLPSYrr:
404
return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
405
case X86::VUNPCKLPSZ128rr:
406
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
407
case X86::VUNPCKLPSZ256rr:
408
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
409
case X86::VUNPCKLPSZrr:
410
return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
411
case X86::VUNPCKLPSZ128rrk:
412
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
413
case X86::VUNPCKLPSZ256rrk:
414
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
415
case X86::VUNPCKLPSZrrk:
416
return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
417
case X86::VUNPCKLPSZ128rrkz:
418
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
419
case X86::VUNPCKLPSZ256rrkz:
420
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
421
case X86::VUNPCKLPSZrrkz:
422
return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
423
case X86::UNPCKHPSrr:
424
return ProcessUNPCKPS(X86::PUNPCKHDQrr);
425
case X86::VUNPCKHPSrr:
426
return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
427
case X86::VUNPCKHPSYrr:
428
return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
429
case X86::VUNPCKHPSZ128rr:
430
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
431
case X86::VUNPCKHPSZ256rr:
432
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
433
case X86::VUNPCKHPSZrr:
434
return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
435
case X86::VUNPCKHPSZ128rrk:
436
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
437
case X86::VUNPCKHPSZ256rrk:
438
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
439
case X86::VUNPCKHPSZrrk:
440
return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
441
case X86::VUNPCKHPSZ128rrkz:
442
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
443
case X86::VUNPCKHPSZ256rrkz:
444
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
445
case X86::VUNPCKHPSZrrkz:
446
return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
447
case X86::UNPCKLPSrm:
448
return ProcessUNPCKPS(X86::PUNPCKLDQrm);
449
case X86::VUNPCKLPSrm:
450
return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
451
case X86::VUNPCKLPSYrm:
452
return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
453
case X86::VUNPCKLPSZ128rm:
454
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
455
case X86::VUNPCKLPSZ256rm:
456
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
457
case X86::VUNPCKLPSZrm:
458
return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
459
case X86::VUNPCKLPSZ128rmk:
460
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
461
case X86::VUNPCKLPSZ256rmk:
462
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
463
case X86::VUNPCKLPSZrmk:
464
return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
465
case X86::VUNPCKLPSZ128rmkz:
466
return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
467
case X86::VUNPCKLPSZ256rmkz:
468
return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
469
case X86::VUNPCKLPSZrmkz:
470
return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
471
case X86::UNPCKHPSrm:
472
return ProcessUNPCKPS(X86::PUNPCKHDQrm);
473
case X86::VUNPCKHPSrm:
474
return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
475
case X86::VUNPCKHPSYrm:
476
return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
477
case X86::VUNPCKHPSZ128rm:
478
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
479
case X86::VUNPCKHPSZ256rm:
480
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
481
case X86::VUNPCKHPSZrm:
482
return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
483
case X86::VUNPCKHPSZ128rmk:
484
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
485
case X86::VUNPCKHPSZ256rmk:
486
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
487
case X86::VUNPCKHPSZrmk:
488
return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
489
case X86::VUNPCKHPSZ128rmkz:
490
return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
491
case X86::VUNPCKHPSZ256rmkz:
492
return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
493
case X86::VUNPCKHPSZrmkz:
494
return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
495
default:
496
return false;
497
}
498
}
499
500
bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
501
LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
502
bool Changed = false;
503
ST = &MF.getSubtarget<X86Subtarget>();
504
TII = ST->getInstrInfo();
505
SM = &ST->getSchedModel();
506
507
for (MachineBasicBlock &MBB : MF) {
508
for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
509
if (processInstruction(MF, MBB, I)) {
510
++NumInstChanges;
511
Changed = true;
512
}
513
}
514
}
515
LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
516
return Changed;
517
}
518
519