Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
35267 views
1
//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains a pass that expands pseudo instructions into target
10
// instructions to allow proper scheduling, if-conversion, other late
11
// optimizations, or simply the encoding of the instructions.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "X86.h"
16
#include "X86FrameLowering.h"
17
#include "X86InstrBuilder.h"
18
#include "X86InstrInfo.h"
19
#include "X86MachineFunctionInfo.h"
20
#include "X86Subtarget.h"
21
#include "llvm/CodeGen/LivePhysRegs.h"
22
#include "llvm/CodeGen/MachineFunctionPass.h"
23
#include "llvm/CodeGen/MachineInstrBuilder.h"
24
#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.
25
#include "llvm/IR/EHPersonalities.h"
26
#include "llvm/IR/GlobalValue.h"
27
#include "llvm/Target/TargetMachine.h"
28
using namespace llvm;
29
30
#define DEBUG_TYPE "x86-pseudo"
31
#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"
32
33
namespace {
34
class X86ExpandPseudo : public MachineFunctionPass {
35
public:
36
static char ID;
37
X86ExpandPseudo() : MachineFunctionPass(ID) {}
38
39
void getAnalysisUsage(AnalysisUsage &AU) const override {
40
AU.setPreservesCFG();
41
AU.addPreservedID(MachineLoopInfoID);
42
AU.addPreservedID(MachineDominatorsID);
43
MachineFunctionPass::getAnalysisUsage(AU);
44
}
45
46
const X86Subtarget *STI = nullptr;
47
const X86InstrInfo *TII = nullptr;
48
const X86RegisterInfo *TRI = nullptr;
49
const X86MachineFunctionInfo *X86FI = nullptr;
50
const X86FrameLowering *X86FL = nullptr;
51
52
bool runOnMachineFunction(MachineFunction &MF) override;
53
54
MachineFunctionProperties getRequiredProperties() const override {
55
return MachineFunctionProperties().set(
56
MachineFunctionProperties::Property::NoVRegs);
57
}
58
59
StringRef getPassName() const override {
60
return "X86 pseudo instruction expansion pass";
61
}
62
63
private:
64
void expandICallBranchFunnel(MachineBasicBlock *MBB,
65
MachineBasicBlock::iterator MBBI);
66
void expandCALL_RVMARKER(MachineBasicBlock &MBB,
67
MachineBasicBlock::iterator MBBI);
68
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
69
bool expandMBB(MachineBasicBlock &MBB);
70
71
/// This function expands pseudos which affects control flow.
72
/// It is done in separate pass to simplify blocks navigation in main
73
/// pass(calling expandMBB).
74
bool expandPseudosWhichAffectControlFlow(MachineFunction &MF);
75
76
/// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions,
77
/// placed into separate block guarded by check for al register(for SystemV
78
/// abi).
79
void expandVastartSaveXmmRegs(
80
MachineBasicBlock *EntryBlk,
81
MachineBasicBlock::iterator VAStartPseudoInstr) const;
82
};
83
char X86ExpandPseudo::ID = 0;
84
85
} // End anonymous namespace.
86
87
INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,
88
false)
89
90
void X86ExpandPseudo::expandICallBranchFunnel(
91
MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {
92
MachineBasicBlock *JTMBB = MBB;
93
MachineInstr *JTInst = &*MBBI;
94
MachineFunction *MF = MBB->getParent();
95
const BasicBlock *BB = MBB->getBasicBlock();
96
auto InsPt = MachineFunction::iterator(MBB);
97
++InsPt;
98
99
std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;
100
const DebugLoc &DL = JTInst->getDebugLoc();
101
MachineOperand Selector = JTInst->getOperand(0);
102
const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();
103
104
auto CmpTarget = [&](unsigned Target) {
105
if (Selector.isReg())
106
MBB->addLiveIn(Selector.getReg());
107
BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)
108
.addReg(X86::RIP)
109
.addImm(1)
110
.addReg(0)
111
.addGlobalAddress(CombinedGlobal,
112
JTInst->getOperand(2 + 2 * Target).getImm())
113
.addReg(0);
114
BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))
115
.add(Selector)
116
.addReg(X86::R11);
117
};
118
119
auto CreateMBB = [&]() {
120
auto *NewMBB = MF->CreateMachineBasicBlock(BB);
121
MBB->addSuccessor(NewMBB);
122
if (!MBB->isLiveIn(X86::EFLAGS))
123
MBB->addLiveIn(X86::EFLAGS);
124
return NewMBB;
125
};
126
127
auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {
128
BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);
129
130
auto *ElseMBB = CreateMBB();
131
MF->insert(InsPt, ElseMBB);
132
MBB = ElseMBB;
133
MBBI = MBB->end();
134
};
135
136
auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {
137
auto *ThenMBB = CreateMBB();
138
TargetMBBs.push_back({ThenMBB, Target});
139
EmitCondJump(CC, ThenMBB);
140
};
141
142
auto EmitTailCall = [&](unsigned Target) {
143
BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))
144
.add(JTInst->getOperand(3 + 2 * Target));
145
};
146
147
std::function<void(unsigned, unsigned)> EmitBranchFunnel =
148
[&](unsigned FirstTarget, unsigned NumTargets) {
149
if (NumTargets == 1) {
150
EmitTailCall(FirstTarget);
151
return;
152
}
153
154
if (NumTargets == 2) {
155
CmpTarget(FirstTarget + 1);
156
EmitCondJumpTarget(X86::COND_B, FirstTarget);
157
EmitTailCall(FirstTarget + 1);
158
return;
159
}
160
161
if (NumTargets < 6) {
162
CmpTarget(FirstTarget + 1);
163
EmitCondJumpTarget(X86::COND_B, FirstTarget);
164
EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);
165
EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);
166
return;
167
}
168
169
auto *ThenMBB = CreateMBB();
170
CmpTarget(FirstTarget + (NumTargets / 2));
171
EmitCondJump(X86::COND_B, ThenMBB);
172
EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));
173
EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,
174
NumTargets - (NumTargets / 2) - 1);
175
176
MF->insert(InsPt, ThenMBB);
177
MBB = ThenMBB;
178
MBBI = MBB->end();
179
EmitBranchFunnel(FirstTarget, NumTargets / 2);
180
};
181
182
EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);
183
for (auto P : TargetMBBs) {
184
MF->insert(InsPt, P.first);
185
BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))
186
.add(JTInst->getOperand(3 + 2 * P.second));
187
}
188
JTMBB->erase(JTInst);
189
}
190
191
void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,
192
MachineBasicBlock::iterator MBBI) {
193
// Expand CALL_RVMARKER pseudo to call instruction, followed by the special
194
//"movq %rax, %rdi" marker.
195
MachineInstr &MI = *MBBI;
196
197
MachineInstr *OriginalCall;
198
assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) &&
199
"invalid operand for regular call");
200
unsigned Opc = -1;
201
if (MI.getOpcode() == X86::CALL64m_RVMARKER)
202
Opc = X86::CALL64m;
203
else if (MI.getOpcode() == X86::CALL64r_RVMARKER)
204
Opc = X86::CALL64r;
205
else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER)
206
Opc = X86::CALL64pcrel32;
207
else
208
llvm_unreachable("unexpected opcode");
209
210
OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
211
bool RAXImplicitDead = false;
212
for (MachineOperand &Op : llvm::drop_begin(MI.operands())) {
213
// RAX may be 'implicit dead', if there are no other users of the return
214
// value. We introduce a new use, so change it to 'implicit def'.
215
if (Op.isReg() && Op.isImplicit() && Op.isDead() &&
216
TRI->regsOverlap(Op.getReg(), X86::RAX)) {
217
Op.setIsDead(false);
218
Op.setIsDef(true);
219
RAXImplicitDead = true;
220
}
221
OriginalCall->addOperand(Op);
222
}
223
224
// Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be
225
// live across the earlier call. The call to the ObjC runtime function returns
226
// the first argument, so the value of %rax is unchanged after the ObjC
227
// runtime call. On Windows targets, the runtime call follows the regular
228
// x64 calling convention and expects the first argument in %rcx.
229
auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI;
230
auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr))
231
.addReg(TargetReg, RegState::Define)
232
.addReg(X86::RAX)
233
.getInstr();
234
if (MI.shouldUpdateCallSiteInfo())
235
MBB.getParent()->moveCallSiteInfo(&MI, Marker);
236
237
// Emit call to ObjC runtime.
238
const uint32_t *RegMask =
239
TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);
240
MachineInstr *RtCall =
241
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))
242
.addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)
243
.addRegMask(RegMask)
244
.addReg(X86::RAX,
245
RegState::Implicit |
246
(RAXImplicitDead ? (RegState::Dead | RegState::Define)
247
: RegState::Define))
248
.getInstr();
249
MI.eraseFromParent();
250
251
auto &TM = MBB.getParent()->getTarget();
252
// On Darwin platforms, wrap the expanded sequence in a bundle to prevent
253
// later optimizations from breaking up the sequence.
254
if (TM.getTargetTriple().isOSDarwin())
255
finalizeBundle(MBB, OriginalCall->getIterator(),
256
std::next(RtCall->getIterator()));
257
}
258
259
/// If \p MBBI is a pseudo instruction, this method expands
260
/// it to the corresponding (sequence of) actual instruction(s).
261
/// \returns true if \p MBBI has been expanded.
262
bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
263
MachineBasicBlock::iterator MBBI) {
264
MachineInstr &MI = *MBBI;
265
unsigned Opcode = MI.getOpcode();
266
const DebugLoc &DL = MBBI->getDebugLoc();
267
#define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)
268
switch (Opcode) {
269
default:
270
return false;
271
case X86::TCRETURNdi:
272
case X86::TCRETURNdicc:
273
case X86::TCRETURNri:
274
case X86::TCRETURNmi:
275
case X86::TCRETURNdi64:
276
case X86::TCRETURNdi64cc:
277
case X86::TCRETURNri64:
278
case X86::TCRETURNmi64: {
279
bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
280
MachineOperand &JumpTarget = MBBI->getOperand(0);
281
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands
282
: 1);
283
assert(StackAdjust.isImm() && "Expecting immediate value.");
284
285
// Adjust stack pointer.
286
int StackAdj = StackAdjust.getImm();
287
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
288
int Offset = 0;
289
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
290
291
// Incoporate the retaddr area.
292
Offset = StackAdj - MaxTCDelta;
293
assert(Offset >= 0 && "Offset should never be negative");
294
295
if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
296
assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
297
}
298
299
if (Offset) {
300
// Check for possible merge with preceding ADD instruction.
301
Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
302
X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);
303
}
304
305
// Jump to label or value in register.
306
bool IsWin64 = STI->isTargetWin64();
307
if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
308
Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
309
unsigned Op;
310
switch (Opcode) {
311
case X86::TCRETURNdi:
312
Op = X86::TAILJMPd;
313
break;
314
case X86::TCRETURNdicc:
315
Op = X86::TAILJMPd_CC;
316
break;
317
case X86::TCRETURNdi64cc:
318
assert(!MBB.getParent()->hasWinCFI() &&
319
"Conditional tail calls confuse "
320
"the Win64 unwinder.");
321
Op = X86::TAILJMPd64_CC;
322
break;
323
default:
324
// Note: Win64 uses REX prefixes indirect jumps out of functions, but
325
// not direct ones.
326
Op = X86::TAILJMPd64;
327
break;
328
}
329
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
330
if (JumpTarget.isGlobal()) {
331
MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
332
JumpTarget.getTargetFlags());
333
} else {
334
assert(JumpTarget.isSymbol());
335
MIB.addExternalSymbol(JumpTarget.getSymbolName(),
336
JumpTarget.getTargetFlags());
337
}
338
if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
339
MIB.addImm(MBBI->getOperand(2).getImm());
340
}
341
342
} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
343
unsigned Op = (Opcode == X86::TCRETURNmi)
344
? X86::TAILJMPm
345
: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
346
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
347
for (unsigned i = 0; i != X86::AddrNumOperands; ++i)
348
MIB.add(MBBI->getOperand(i));
349
} else if (Opcode == X86::TCRETURNri64) {
350
JumpTarget.setIsKill();
351
BuildMI(MBB, MBBI, DL,
352
TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
353
.add(JumpTarget);
354
} else {
355
JumpTarget.setIsKill();
356
BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))
357
.add(JumpTarget);
358
}
359
360
MachineInstr &NewMI = *std::prev(MBBI);
361
NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);
362
NewMI.setCFIType(*MBB.getParent(), MI.getCFIType());
363
364
// Update the call site info.
365
if (MBBI->isCandidateForCallSiteEntry())
366
MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);
367
368
// Delete the pseudo instruction TCRETURN.
369
MBB.erase(MBBI);
370
371
return true;
372
}
373
case X86::EH_RETURN:
374
case X86::EH_RETURN64: {
375
MachineOperand &DestAddr = MBBI->getOperand(0);
376
assert(DestAddr.isReg() && "Offset should be in register!");
377
const bool Uses64BitFramePtr =
378
STI->isTarget64BitLP64() || STI->isTargetNaCl64();
379
Register StackPtr = TRI->getStackRegister();
380
BuildMI(MBB, MBBI, DL,
381
TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)
382
.addReg(DestAddr.getReg());
383
// The EH_RETURN pseudo is really removed during the MC Lowering.
384
return true;
385
}
386
case X86::IRET: {
387
// Adjust stack to erase error code
388
int64_t StackAdj = MBBI->getOperand(0).getImm();
389
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true);
390
// Replace pseudo with machine iret
391
unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32;
392
// Use UIRET if UINTR is present (except for building kernel)
393
if (STI->is64Bit() && STI->hasUINTR() &&
394
MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel)
395
RetOp = X86::UIRET;
396
BuildMI(MBB, MBBI, DL, TII->get(RetOp));
397
MBB.erase(MBBI);
398
return true;
399
}
400
case X86::RET: {
401
// Adjust stack to erase error code
402
int64_t StackAdj = MBBI->getOperand(0).getImm();
403
MachineInstrBuilder MIB;
404
if (StackAdj == 0) {
405
MIB = BuildMI(MBB, MBBI, DL,
406
TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32));
407
} else if (isUInt<16>(StackAdj)) {
408
MIB = BuildMI(MBB, MBBI, DL,
409
TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32))
410
.addImm(StackAdj);
411
} else {
412
assert(!STI->is64Bit() &&
413
"shouldn't need to do this for x86_64 targets!");
414
// A ret can only handle immediates as big as 2**16-1. If we need to pop
415
// off bytes before the return address, we must do it manually.
416
BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);
417
X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);
418
BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);
419
MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32));
420
}
421
for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)
422
MIB.add(MBBI->getOperand(I));
423
MBB.erase(MBBI);
424
return true;
425
}
426
case X86::LCMPXCHG16B_SAVE_RBX: {
427
// Perform the following transformation.
428
// SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx
429
// =>
430
// RBX = InArg
431
// actualcmpxchg Addr
432
// RBX = SaveRbx
433
const MachineOperand &InArg = MBBI->getOperand(6);
434
Register SaveRbx = MBBI->getOperand(7).getReg();
435
436
// Copy the input argument of the pseudo into the argument of the
437
// actual instruction.
438
// NOTE: We don't copy the kill flag since the input might be the same reg
439
// as one of the other operands of LCMPXCHG16B.
440
TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);
441
// Create the actual instruction.
442
MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));
443
// Copy the operands related to the address.
444
for (unsigned Idx = 1; Idx < 6; ++Idx)
445
NewInstr->addOperand(MBBI->getOperand(Idx));
446
// Finally, restore the value of RBX.
447
TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx,
448
/*SrcIsKill*/ true);
449
450
// Delete the pseudo.
451
MBBI->eraseFromParent();
452
return true;
453
}
454
// Loading/storing mask pairs requires two kmov operations. The second one of
455
// these needs a 2 byte displacement relative to the specified address (with
456
// 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the
457
// same spill size, they all are stored using MASKPAIR16STORE, loaded using
458
// MASKPAIR16LOAD.
459
//
460
// The displacement value might wrap around in theory, thus the asserts in
461
// both cases.
462
case X86::MASKPAIR16LOAD: {
463
int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();
464
assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
465
Register Reg = MBBI->getOperand(0).getReg();
466
bool DstIsDead = MBBI->getOperand(0).isDead();
467
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
468
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
469
470
auto MIBLo =
471
BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm)))
472
.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));
473
auto MIBHi =
474
BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm)))
475
.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));
476
477
for (int i = 0; i < X86::AddrNumOperands; ++i) {
478
MIBLo.add(MBBI->getOperand(1 + i));
479
if (i == X86::AddrDisp)
480
MIBHi.addImm(Disp + 2);
481
else
482
MIBHi.add(MBBI->getOperand(1 + i));
483
}
484
485
// Split the memory operand, adjusting the offset and size for the halves.
486
MachineMemOperand *OldMMO = MBBI->memoperands().front();
487
MachineFunction *MF = MBB.getParent();
488
MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
489
MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
490
491
MIBLo.setMemRefs(MMOLo);
492
MIBHi.setMemRefs(MMOHi);
493
494
// Delete the pseudo.
495
MBB.erase(MBBI);
496
return true;
497
}
498
case X86::MASKPAIR16STORE: {
499
int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();
500
assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");
501
Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg();
502
bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();
503
Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);
504
Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);
505
506
auto MIBLo =
507
BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk)));
508
auto MIBHi =
509
BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk)));
510
511
for (int i = 0; i < X86::AddrNumOperands; ++i) {
512
MIBLo.add(MBBI->getOperand(i));
513
if (i == X86::AddrDisp)
514
MIBHi.addImm(Disp + 2);
515
else
516
MIBHi.add(MBBI->getOperand(i));
517
}
518
MIBLo.addReg(Reg0, getKillRegState(SrcIsKill));
519
MIBHi.addReg(Reg1, getKillRegState(SrcIsKill));
520
521
// Split the memory operand, adjusting the offset and size for the halves.
522
MachineMemOperand *OldMMO = MBBI->memoperands().front();
523
MachineFunction *MF = MBB.getParent();
524
MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);
525
MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);
526
527
MIBLo.setMemRefs(MMOLo);
528
MIBHi.setMemRefs(MMOHi);
529
530
// Delete the pseudo.
531
MBB.erase(MBBI);
532
return true;
533
}
534
case X86::MWAITX_SAVE_RBX: {
535
// Perform the following transformation.
536
// SaveRbx = pseudomwaitx InArg, SaveRbx
537
// =>
538
// [E|R]BX = InArg
539
// actualmwaitx
540
// [E|R]BX = SaveRbx
541
const MachineOperand &InArg = MBBI->getOperand(1);
542
// Copy the input argument of the pseudo into the argument of the
543
// actual instruction.
544
TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill());
545
// Create the actual instruction.
546
BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr));
547
// Finally, restore the value of RBX.
548
Register SaveRbx = MBBI->getOperand(2).getReg();
549
TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true);
550
// Delete the pseudo.
551
MBBI->eraseFromParent();
552
return true;
553
}
554
case TargetOpcode::ICALL_BRANCH_FUNNEL:
555
expandICallBranchFunnel(&MBB, MBBI);
556
return true;
557
case X86::PLDTILECFGV: {
558
MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG)));
559
return true;
560
}
561
case X86::PTILELOADDV:
562
case X86::PTILELOADDT1V: {
563
for (unsigned i = 2; i > 0; --i)
564
MI.removeOperand(i);
565
unsigned Opc = Opcode == X86::PTILELOADDV
566
? GET_EGPR_IF_ENABLED(X86::TILELOADD)
567
: GET_EGPR_IF_ENABLED(X86::TILELOADDT1);
568
MI.setDesc(TII->get(Opc));
569
return true;
570
}
571
case X86::PTCMMIMFP16PSV:
572
case X86::PTCMMRLFP16PSV:
573
case X86::PTDPBSSDV:
574
case X86::PTDPBSUDV:
575
case X86::PTDPBUSDV:
576
case X86::PTDPBUUDV:
577
case X86::PTDPBF16PSV:
578
case X86::PTDPFP16PSV: {
579
MI.untieRegOperand(4);
580
for (unsigned i = 3; i > 0; --i)
581
MI.removeOperand(i);
582
unsigned Opc;
583
switch (Opcode) {
584
case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break;
585
case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break;
586
case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;
587
case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break;
588
case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break;
589
case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break;
590
case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break;
591
case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break;
592
default: llvm_unreachable("Impossible Opcode!");
593
}
594
MI.setDesc(TII->get(Opc));
595
MI.tieOperands(0, 1);
596
return true;
597
}
598
case X86::PTILESTOREDV: {
599
for (int i = 1; i >= 0; --i)
600
MI.removeOperand(i);
601
MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED)));
602
return true;
603
}
604
#undef GET_EGPR_IF_ENABLED
605
case X86::PTILEZEROV: {
606
for (int i = 2; i > 0; --i) // Remove row, col
607
MI.removeOperand(i);
608
MI.setDesc(TII->get(X86::TILEZERO));
609
return true;
610
}
611
case X86::CALL64pcrel32_RVMARKER:
612
case X86::CALL64r_RVMARKER:
613
case X86::CALL64m_RVMARKER:
614
expandCALL_RVMARKER(MBB, MBBI);
615
return true;
616
case X86::ADD32mi_ND:
617
case X86::ADD64mi32_ND:
618
case X86::SUB32mi_ND:
619
case X86::SUB64mi32_ND:
620
case X86::AND32mi_ND:
621
case X86::AND64mi32_ND:
622
case X86::OR32mi_ND:
623
case X86::OR64mi32_ND:
624
case X86::XOR32mi_ND:
625
case X86::XOR64mi32_ND:
626
case X86::ADC32mi_ND:
627
case X86::ADC64mi32_ND:
628
case X86::SBB32mi_ND:
629
case X86::SBB64mi32_ND: {
630
// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte
631
// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1
632
// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of
633
// immediate = 15 bytes in total, e.g.
634
//
635
// subq $184, %fs:257(%rbx, %rcx), %rax
636
//
637
// In such a case, no additional (ADSIZE or segment override) prefix can be
638
// used. To resolve the issue, we split the “long” instruction into 2
639
// instructions:
640
//
641
// movq %fs:257(%rbx, %rcx),%rax
642
// subq $184, %rax
643
//
644
// Therefore we consider the OPmi_ND to be a pseudo instruction to some
645
// extent.
646
const MachineOperand &ImmOp =
647
MI.getOperand(MI.getNumExplicitOperands() - 1);
648
// If the immediate is a expr, conservatively estimate 4 bytes.
649
if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))
650
return false;
651
int MemOpNo = X86::getFirstAddrOperandIdx(MI);
652
const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);
653
Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();
654
// If the displacement is a expr, conservatively estimate 4 bytes.
655
if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))
656
return false;
657
// There can only be one of three: SIB, segment override register, ADSIZE
658
Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();
659
unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();
660
if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))
661
++Count;
662
if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||
663
X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))
664
++Count;
665
if (Count < 2)
666
return false;
667
unsigned Opc, LoadOpc;
668
switch (Opcode) {
669
#define MI_TO_RI(OP) \
670
case X86::OP##32mi_ND: \
671
Opc = X86::OP##32ri; \
672
LoadOpc = X86::MOV32rm; \
673
break; \
674
case X86::OP##64mi32_ND: \
675
Opc = X86::OP##64ri32; \
676
LoadOpc = X86::MOV64rm; \
677
break;
678
679
default:
680
llvm_unreachable("Unexpected Opcode");
681
MI_TO_RI(ADD);
682
MI_TO_RI(SUB);
683
MI_TO_RI(AND);
684
MI_TO_RI(OR);
685
MI_TO_RI(XOR);
686
MI_TO_RI(ADC);
687
MI_TO_RI(SBB);
688
#undef MI_TO_RI
689
}
690
// Insert OPri.
691
Register DestReg = MI.getOperand(0).getReg();
692
BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)
693
.addReg(DestReg)
694
.add(ImmOp);
695
// Change OPmi_ND to MOVrm.
696
for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)
697
MI.removeOperand(MI.getNumOperands() - 1);
698
MI.setDesc(TII->get(LoadOpc));
699
return true;
700
}
701
}
702
llvm_unreachable("Previous switch has a fallthrough?");
703
}
704
705
// This function creates additional block for storing varargs guarded
706
// registers. It adds check for %al into entry block, to skip
707
// GuardedRegsBlk if xmm registers should not be stored.
708
//
709
// EntryBlk[VAStartPseudoInstr] EntryBlk
710
// | | .
711
// | | .
712
// | | GuardedRegsBlk
713
// | => | .
714
// | | .
715
// | TailBlk
716
// | |
717
// | |
718
//
719
void X86ExpandPseudo::expandVastartSaveXmmRegs(
720
MachineBasicBlock *EntryBlk,
721
MachineBasicBlock::iterator VAStartPseudoInstr) const {
722
assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS);
723
724
MachineFunction *Func = EntryBlk->getParent();
725
const TargetInstrInfo *TII = STI->getInstrInfo();
726
const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc();
727
Register CountReg = VAStartPseudoInstr->getOperand(0).getReg();
728
729
// Calculate liveins for newly created blocks.
730
LivePhysRegs LiveRegs(*STI->getRegisterInfo());
731
SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;
732
733
LiveRegs.addLiveIns(*EntryBlk);
734
for (MachineInstr &MI : EntryBlk->instrs()) {
735
if (MI.getOpcode() == VAStartPseudoInstr->getOpcode())
736
break;
737
738
LiveRegs.stepForward(MI, Clobbers);
739
}
740
741
// Create the new basic blocks. One block contains all the XMM stores,
742
// and another block is the final destination regardless of whether any
743
// stores were performed.
744
const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock();
745
MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator();
746
MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk);
747
MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk);
748
Func->insert(EntryBlkIter, GuardedRegsBlk);
749
Func->insert(EntryBlkIter, TailBlk);
750
751
// Transfer the remainder of EntryBlk and its successor edges to TailBlk.
752
TailBlk->splice(TailBlk->begin(), EntryBlk,
753
std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)),
754
EntryBlk->end());
755
TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);
756
757
uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();
758
uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();
759
760
// TODO: add support for YMM and ZMM here.
761
unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
762
763
// In the XMM save block, save all the XMM argument registers.
764
for (int64_t OpndIdx = 7, RegIdx = 0;
765
OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;
766
OpndIdx++, RegIdx++) {
767
auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));
768
for (int i = 0; i < X86::AddrNumOperands; ++i) {
769
if (i == X86::AddrDisp)
770
NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);
771
else
772
NewMI.add(VAStartPseudoInstr->getOperand(i + 1));
773
}
774
NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());
775
assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical());
776
}
777
778
// The original block will now fall through to the GuardedRegsBlk.
779
EntryBlk->addSuccessor(GuardedRegsBlk);
780
// The GuardedRegsBlk will fall through to the TailBlk.
781
GuardedRegsBlk->addSuccessor(TailBlk);
782
783
if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) {
784
// If %al is 0, branch around the XMM save block.
785
BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr))
786
.addReg(CountReg)
787
.addReg(CountReg);
788
BuildMI(EntryBlk, DL, TII->get(X86::JCC_1))
789
.addMBB(TailBlk)
790
.addImm(X86::COND_E);
791
EntryBlk->addSuccessor(TailBlk);
792
}
793
794
// Add liveins to the created block.
795
addLiveIns(*GuardedRegsBlk, LiveRegs);
796
addLiveIns(*TailBlk, LiveRegs);
797
798
// Delete the pseudo.
799
VAStartPseudoInstr->eraseFromParent();
800
}
801
802
/// Expand all pseudo instructions contained in \p MBB.
803
/// \returns true if any expansion occurred for \p MBB.
804
bool X86ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
805
bool Modified = false;
806
807
// MBBI may be invalidated by the expansion.
808
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
809
while (MBBI != E) {
810
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
811
Modified |= expandMI(MBB, MBBI);
812
MBBI = NMBBI;
813
}
814
815
return Modified;
816
}
817
818
bool X86ExpandPseudo::expandPseudosWhichAffectControlFlow(MachineFunction &MF) {
819
// Currently pseudo which affects control flow is only
820
// X86::VASTART_SAVE_XMM_REGS which is located in Entry block.
821
// So we do not need to evaluate other blocks.
822
for (MachineInstr &Instr : MF.front().instrs()) {
823
if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) {
824
expandVastartSaveXmmRegs(&(MF.front()), Instr);
825
return true;
826
}
827
}
828
829
return false;
830
}
831
832
bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
833
STI = &MF.getSubtarget<X86Subtarget>();
834
TII = STI->getInstrInfo();
835
TRI = STI->getRegisterInfo();
836
X86FI = MF.getInfo<X86MachineFunctionInfo>();
837
X86FL = STI->getFrameLowering();
838
839
bool Modified = expandPseudosWhichAffectControlFlow(MF);
840
841
for (MachineBasicBlock &MBB : MF)
842
Modified |= expandMBB(MBB);
843
return Modified;
844
}
845
846
/// Returns an instance of the pseudo instruction expansion pass.
847
FunctionPass *llvm::createX86ExpandPseudoPass() {
848
return new X86ExpandPseudo();
849
}
850
851