Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
35269 views
1
//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file contains a pass that expands pseudo instructions into target
10
// instructions to allow proper scheduling and other late optimizations. This
11
// pass should be run after register allocation but before the post-regalloc
12
// scheduling pass.
13
//
14
//===----------------------------------------------------------------------===//
15
16
#include "AArch64ExpandImm.h"
17
#include "AArch64InstrInfo.h"
18
#include "AArch64MachineFunctionInfo.h"
19
#include "AArch64Subtarget.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "Utils/AArch64BaseInfo.h"
22
#include "llvm/CodeGen/LivePhysRegs.h"
23
#include "llvm/CodeGen/MachineBasicBlock.h"
24
#include "llvm/CodeGen/MachineConstantPool.h"
25
#include "llvm/CodeGen/MachineFunction.h"
26
#include "llvm/CodeGen/MachineFunctionPass.h"
27
#include "llvm/CodeGen/MachineInstr.h"
28
#include "llvm/CodeGen/MachineInstrBuilder.h"
29
#include "llvm/CodeGen/MachineOperand.h"
30
#include "llvm/CodeGen/TargetSubtargetInfo.h"
31
#include "llvm/IR/DebugLoc.h"
32
#include "llvm/MC/MCInstrDesc.h"
33
#include "llvm/Pass.h"
34
#include "llvm/Support/CodeGen.h"
35
#include "llvm/Support/MathExtras.h"
36
#include "llvm/Target/TargetMachine.h"
37
#include "llvm/TargetParser/Triple.h"
38
#include <cassert>
39
#include <cstdint>
40
#include <iterator>
41
#include <utility>
42
43
using namespace llvm;
44
45
#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46
47
namespace {
48
49
class AArch64ExpandPseudo : public MachineFunctionPass {
50
public:
51
const AArch64InstrInfo *TII;
52
53
static char ID;
54
55
AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56
initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57
}
58
59
bool runOnMachineFunction(MachineFunction &Fn) override;
60
61
StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62
63
private:
64
bool expandMBB(MachineBasicBlock &MBB);
65
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66
MachineBasicBlock::iterator &NextMBBI);
67
bool expandMultiVecPseudo(MachineBasicBlock &MBB,
68
MachineBasicBlock::iterator MBBI,
69
TargetRegisterClass ContiguousClass,
70
TargetRegisterClass StridedClass,
71
unsigned ContiguousOpc, unsigned StridedOpc);
72
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73
unsigned BitSize);
74
75
bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76
MachineBasicBlock::iterator MBBI);
77
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79
unsigned ExtendImm, unsigned ZeroReg,
80
MachineBasicBlock::iterator &NextMBBI);
81
bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82
MachineBasicBlock::iterator MBBI,
83
MachineBasicBlock::iterator &NextMBBI);
84
bool expandSetTagLoop(MachineBasicBlock &MBB,
85
MachineBasicBlock::iterator MBBI,
86
MachineBasicBlock::iterator &NextMBBI);
87
bool expandSVESpillFill(MachineBasicBlock &MBB,
88
MachineBasicBlock::iterator MBBI, unsigned Opc,
89
unsigned N);
90
bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91
MachineBasicBlock::iterator MBBI);
92
bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93
bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94
MachineBasicBlock::iterator MBBI);
95
MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96
MachineBasicBlock::iterator MBBI);
97
MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98
MachineBasicBlock::iterator MBBI);
99
};
100
101
} // end anonymous namespace
102
103
char AArch64ExpandPseudo::ID = 0;
104
105
INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106
AARCH64_EXPAND_PSEUDO_NAME, false, false)
107
108
/// Transfer implicit operands on the pseudo instruction to the
109
/// instructions created from the expansion.
110
static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111
MachineInstrBuilder &DefMI) {
112
const MCInstrDesc &Desc = OldMI.getDesc();
113
for (const MachineOperand &MO :
114
llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
115
assert(MO.isReg() && MO.getReg());
116
if (MO.isUse())
117
UseMI.add(MO);
118
else
119
DefMI.add(MO);
120
}
121
}
122
123
/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124
/// real move-immediate instructions to synthesize the immediate.
125
bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126
MachineBasicBlock::iterator MBBI,
127
unsigned BitSize) {
128
MachineInstr &MI = *MBBI;
129
Register DstReg = MI.getOperand(0).getReg();
130
uint64_t RenamableState =
131
MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
132
uint64_t Imm = MI.getOperand(1).getImm();
133
134
if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135
// Useless def, and we don't want to risk creating an invalid ORR (which
136
// would really write to sp).
137
MI.eraseFromParent();
138
return true;
139
}
140
141
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142
AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143
assert(Insn.size() != 0);
144
145
SmallVector<MachineInstrBuilder, 4> MIBS;
146
for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147
bool LastItem = std::next(I) == E;
148
switch (I->Opcode)
149
{
150
default: llvm_unreachable("unhandled!"); break;
151
152
case AArch64::ORRWri:
153
case AArch64::ORRXri:
154
if (I->Op1 == 0) {
155
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156
.add(MI.getOperand(0))
157
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158
.addImm(I->Op2));
159
} else {
160
Register DstReg = MI.getOperand(0).getReg();
161
bool DstIsDead = MI.getOperand(0).isDead();
162
MIBS.push_back(
163
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164
.addReg(DstReg, RegState::Define |
165
getDeadRegState(DstIsDead && LastItem) |
166
RenamableState)
167
.addReg(DstReg)
168
.addImm(I->Op2));
169
}
170
break;
171
case AArch64::ORRWrs:
172
case AArch64::ORRXrs: {
173
Register DstReg = MI.getOperand(0).getReg();
174
bool DstIsDead = MI.getOperand(0).isDead();
175
MIBS.push_back(
176
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
177
.addReg(DstReg, RegState::Define |
178
getDeadRegState(DstIsDead && LastItem) |
179
RenamableState)
180
.addReg(DstReg)
181
.addReg(DstReg)
182
.addImm(I->Op2));
183
} break;
184
case AArch64::ANDXri:
185
case AArch64::EORXri:
186
if (I->Op1 == 0) {
187
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
188
.add(MI.getOperand(0))
189
.addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
190
.addImm(I->Op2));
191
} else {
192
Register DstReg = MI.getOperand(0).getReg();
193
bool DstIsDead = MI.getOperand(0).isDead();
194
MIBS.push_back(
195
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196
.addReg(DstReg, RegState::Define |
197
getDeadRegState(DstIsDead && LastItem) |
198
RenamableState)
199
.addReg(DstReg)
200
.addImm(I->Op2));
201
}
202
break;
203
case AArch64::MOVNWi:
204
case AArch64::MOVNXi:
205
case AArch64::MOVZWi:
206
case AArch64::MOVZXi: {
207
bool DstIsDead = MI.getOperand(0).isDead();
208
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
209
.addReg(DstReg, RegState::Define |
210
getDeadRegState(DstIsDead && LastItem) |
211
RenamableState)
212
.addImm(I->Op1)
213
.addImm(I->Op2));
214
} break;
215
case AArch64::MOVKWi:
216
case AArch64::MOVKXi: {
217
Register DstReg = MI.getOperand(0).getReg();
218
bool DstIsDead = MI.getOperand(0).isDead();
219
MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
220
.addReg(DstReg,
221
RegState::Define |
222
getDeadRegState(DstIsDead && LastItem) |
223
RenamableState)
224
.addReg(DstReg)
225
.addImm(I->Op1)
226
.addImm(I->Op2));
227
} break;
228
}
229
}
230
transferImpOps(MI, MIBS.front(), MIBS.back());
231
MI.eraseFromParent();
232
return true;
233
}
234
235
bool AArch64ExpandPseudo::expandCMP_SWAP(
236
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
237
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
238
MachineBasicBlock::iterator &NextMBBI) {
239
MachineInstr &MI = *MBBI;
240
MIMetadata MIMD(MI);
241
const MachineOperand &Dest = MI.getOperand(0);
242
Register StatusReg = MI.getOperand(1).getReg();
243
bool StatusDead = MI.getOperand(1).isDead();
244
// Duplicating undef operands into 2 instructions does not guarantee the same
245
// value on both; However undef should be replaced by xzr anyway.
246
assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
247
Register AddrReg = MI.getOperand(2).getReg();
248
Register DesiredReg = MI.getOperand(3).getReg();
249
Register NewReg = MI.getOperand(4).getReg();
250
251
MachineFunction *MF = MBB.getParent();
252
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
253
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
254
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
255
256
MF->insert(++MBB.getIterator(), LoadCmpBB);
257
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
258
MF->insert(++StoreBB->getIterator(), DoneBB);
259
260
// .Lloadcmp:
261
// mov wStatus, 0
262
// ldaxr xDest, [xAddr]
263
// cmp xDest, xDesired
264
// b.ne .Ldone
265
if (!StatusDead)
266
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
267
.addImm(0).addImm(0);
268
BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
269
.addReg(AddrReg);
270
BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
271
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
272
.addReg(DesiredReg)
273
.addImm(ExtendImm);
274
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
275
.addImm(AArch64CC::NE)
276
.addMBB(DoneBB)
277
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
278
LoadCmpBB->addSuccessor(DoneBB);
279
LoadCmpBB->addSuccessor(StoreBB);
280
281
// .Lstore:
282
// stlxr wStatus, xNew, [xAddr]
283
// cbnz wStatus, .Lloadcmp
284
BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
285
.addReg(NewReg)
286
.addReg(AddrReg);
287
BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
288
.addReg(StatusReg, getKillRegState(StatusDead))
289
.addMBB(LoadCmpBB);
290
StoreBB->addSuccessor(LoadCmpBB);
291
StoreBB->addSuccessor(DoneBB);
292
293
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
294
DoneBB->transferSuccessors(&MBB);
295
296
MBB.addSuccessor(LoadCmpBB);
297
298
NextMBBI = MBB.end();
299
MI.eraseFromParent();
300
301
// Recompute livein lists.
302
LivePhysRegs LiveRegs;
303
computeAndAddLiveIns(LiveRegs, *DoneBB);
304
computeAndAddLiveIns(LiveRegs, *StoreBB);
305
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
306
// Do an extra pass around the loop to get loop carried registers right.
307
StoreBB->clearLiveIns();
308
computeAndAddLiveIns(LiveRegs, *StoreBB);
309
LoadCmpBB->clearLiveIns();
310
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
311
312
return true;
313
}
314
315
bool AArch64ExpandPseudo::expandCMP_SWAP_128(
316
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
317
MachineBasicBlock::iterator &NextMBBI) {
318
MachineInstr &MI = *MBBI;
319
MIMetadata MIMD(MI);
320
MachineOperand &DestLo = MI.getOperand(0);
321
MachineOperand &DestHi = MI.getOperand(1);
322
Register StatusReg = MI.getOperand(2).getReg();
323
bool StatusDead = MI.getOperand(2).isDead();
324
// Duplicating undef operands into 2 instructions does not guarantee the same
325
// value on both; However undef should be replaced by xzr anyway.
326
assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
327
Register AddrReg = MI.getOperand(3).getReg();
328
Register DesiredLoReg = MI.getOperand(4).getReg();
329
Register DesiredHiReg = MI.getOperand(5).getReg();
330
Register NewLoReg = MI.getOperand(6).getReg();
331
Register NewHiReg = MI.getOperand(7).getReg();
332
333
unsigned LdxpOp, StxpOp;
334
335
switch (MI.getOpcode()) {
336
case AArch64::CMP_SWAP_128_MONOTONIC:
337
LdxpOp = AArch64::LDXPX;
338
StxpOp = AArch64::STXPX;
339
break;
340
case AArch64::CMP_SWAP_128_RELEASE:
341
LdxpOp = AArch64::LDXPX;
342
StxpOp = AArch64::STLXPX;
343
break;
344
case AArch64::CMP_SWAP_128_ACQUIRE:
345
LdxpOp = AArch64::LDAXPX;
346
StxpOp = AArch64::STXPX;
347
break;
348
case AArch64::CMP_SWAP_128:
349
LdxpOp = AArch64::LDAXPX;
350
StxpOp = AArch64::STLXPX;
351
break;
352
default:
353
llvm_unreachable("Unexpected opcode");
354
}
355
356
MachineFunction *MF = MBB.getParent();
357
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
358
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
359
auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
360
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361
362
MF->insert(++MBB.getIterator(), LoadCmpBB);
363
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
364
MF->insert(++StoreBB->getIterator(), FailBB);
365
MF->insert(++FailBB->getIterator(), DoneBB);
366
367
// .Lloadcmp:
368
// ldaxp xDestLo, xDestHi, [xAddr]
369
// cmp xDestLo, xDesiredLo
370
// sbcs xDestHi, xDesiredHi
371
// b.ne .Ldone
372
BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
373
.addReg(DestLo.getReg(), RegState::Define)
374
.addReg(DestHi.getReg(), RegState::Define)
375
.addReg(AddrReg);
376
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
377
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
378
.addReg(DesiredLoReg)
379
.addImm(0);
380
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
381
.addUse(AArch64::WZR)
382
.addUse(AArch64::WZR)
383
.addImm(AArch64CC::EQ);
384
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
385
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
386
.addReg(DesiredHiReg)
387
.addImm(0);
388
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
389
.addUse(StatusReg, RegState::Kill)
390
.addUse(StatusReg, RegState::Kill)
391
.addImm(AArch64CC::EQ);
392
BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
393
.addUse(StatusReg, getKillRegState(StatusDead))
394
.addMBB(FailBB);
395
LoadCmpBB->addSuccessor(FailBB);
396
LoadCmpBB->addSuccessor(StoreBB);
397
398
// .Lstore:
399
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
400
// cbnz wStatus, .Lloadcmp
401
BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
402
.addReg(NewLoReg)
403
.addReg(NewHiReg)
404
.addReg(AddrReg);
405
BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
406
.addReg(StatusReg, getKillRegState(StatusDead))
407
.addMBB(LoadCmpBB);
408
BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
409
StoreBB->addSuccessor(LoadCmpBB);
410
StoreBB->addSuccessor(DoneBB);
411
412
// .Lfail:
413
// stlxp wStatus, xDestLo, xDestHi, [xAddr]
414
// cbnz wStatus, .Lloadcmp
415
BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
416
.addReg(DestLo.getReg())
417
.addReg(DestHi.getReg())
418
.addReg(AddrReg);
419
BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
420
.addReg(StatusReg, getKillRegState(StatusDead))
421
.addMBB(LoadCmpBB);
422
FailBB->addSuccessor(LoadCmpBB);
423
FailBB->addSuccessor(DoneBB);
424
425
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
426
DoneBB->transferSuccessors(&MBB);
427
428
MBB.addSuccessor(LoadCmpBB);
429
430
NextMBBI = MBB.end();
431
MI.eraseFromParent();
432
433
// Recompute liveness bottom up.
434
LivePhysRegs LiveRegs;
435
computeAndAddLiveIns(LiveRegs, *DoneBB);
436
computeAndAddLiveIns(LiveRegs, *FailBB);
437
computeAndAddLiveIns(LiveRegs, *StoreBB);
438
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
439
440
// Do an extra pass in the loop to get the loop carried dependencies right.
441
FailBB->clearLiveIns();
442
computeAndAddLiveIns(LiveRegs, *FailBB);
443
StoreBB->clearLiveIns();
444
computeAndAddLiveIns(LiveRegs, *StoreBB);
445
LoadCmpBB->clearLiveIns();
446
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
447
448
return true;
449
}
450
451
/// \brief Expand Pseudos to Instructions with destructive operands.
452
///
453
/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
454
/// or for fixing relaxed register allocation conditions to comply with
455
/// the instructions register constraints. The latter case may be cheaper
456
/// than setting the register constraints in the register allocator,
457
/// since that will insert regular MOV instructions rather than MOVPRFX.
458
///
459
/// Example (after register allocation):
460
///
461
/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
462
///
463
/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
464
/// * We cannot map directly to FSUB_ZPmZ_B because the register
465
/// constraints of the instruction are not met.
466
/// * Also the _ZERO specifies the false lanes need to be zeroed.
467
///
468
/// We first try to see if the destructive operand == result operand,
469
/// if not, we try to swap the operands, e.g.
470
///
471
/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
472
///
473
/// But because FSUB_ZPmZ is not commutative, this is semantically
474
/// different, so we need a reverse instruction:
475
///
476
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
477
///
478
/// Then we implement the zeroing of the false lanes of Z0 by adding
479
/// a zeroing MOVPRFX instruction:
480
///
481
/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
482
/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
483
///
484
/// Note that this can only be done for _ZERO or _UNDEF variants where
485
/// we can guarantee the false lanes to be zeroed (by implementing this)
486
/// or that they are undef (don't care / not used), otherwise the
487
/// swapping of operands is illegal because the operation is not
488
/// (or cannot be emulated to be) fully commutative.
489
bool AArch64ExpandPseudo::expand_DestructiveOp(
490
MachineInstr &MI,
491
MachineBasicBlock &MBB,
492
MachineBasicBlock::iterator MBBI) {
493
unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
494
uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
495
uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
496
bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
497
Register DstReg = MI.getOperand(0).getReg();
498
bool DstIsDead = MI.getOperand(0).isDead();
499
bool UseRev = false;
500
unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
501
502
switch (DType) {
503
case AArch64::DestructiveBinaryComm:
504
case AArch64::DestructiveBinaryCommWithRev:
505
if (DstReg == MI.getOperand(3).getReg()) {
506
// FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
507
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
508
UseRev = true;
509
break;
510
}
511
[[fallthrough]];
512
case AArch64::DestructiveBinary:
513
case AArch64::DestructiveBinaryImm:
514
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
515
break;
516
case AArch64::DestructiveUnaryPassthru:
517
std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
518
break;
519
case AArch64::DestructiveTernaryCommWithRev:
520
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
521
if (DstReg == MI.getOperand(3).getReg()) {
522
// FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
523
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
524
UseRev = true;
525
} else if (DstReg == MI.getOperand(4).getReg()) {
526
// FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
527
std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
528
UseRev = true;
529
}
530
break;
531
default:
532
llvm_unreachable("Unsupported Destructive Operand type");
533
}
534
535
// MOVPRFX can only be used if the destination operand
536
// is the destructive operand, not as any other operand,
537
// so the Destructive Operand must be unique.
538
bool DOPRegIsUnique = false;
539
switch (DType) {
540
case AArch64::DestructiveBinary:
541
DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
542
break;
543
case AArch64::DestructiveBinaryComm:
544
case AArch64::DestructiveBinaryCommWithRev:
545
DOPRegIsUnique =
546
DstReg != MI.getOperand(DOPIdx).getReg() ||
547
MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
548
break;
549
case AArch64::DestructiveUnaryPassthru:
550
case AArch64::DestructiveBinaryImm:
551
DOPRegIsUnique = true;
552
break;
553
case AArch64::DestructiveTernaryCommWithRev:
554
DOPRegIsUnique =
555
DstReg != MI.getOperand(DOPIdx).getReg() ||
556
(MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
557
MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
558
break;
559
}
560
561
// Resolve the reverse opcode
562
if (UseRev) {
563
int NewOpcode;
564
// e.g. DIV -> DIVR
565
if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
566
Opcode = NewOpcode;
567
// e.g. DIVR -> DIV
568
else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
569
Opcode = NewOpcode;
570
}
571
572
// Get the right MOVPRFX
573
uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
574
unsigned MovPrfx, LSLZero, MovPrfxZero;
575
switch (ElementSize) {
576
case AArch64::ElementSizeNone:
577
case AArch64::ElementSizeB:
578
MovPrfx = AArch64::MOVPRFX_ZZ;
579
LSLZero = AArch64::LSL_ZPmI_B;
580
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
581
break;
582
case AArch64::ElementSizeH:
583
MovPrfx = AArch64::MOVPRFX_ZZ;
584
LSLZero = AArch64::LSL_ZPmI_H;
585
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
586
break;
587
case AArch64::ElementSizeS:
588
MovPrfx = AArch64::MOVPRFX_ZZ;
589
LSLZero = AArch64::LSL_ZPmI_S;
590
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
591
break;
592
case AArch64::ElementSizeD:
593
MovPrfx = AArch64::MOVPRFX_ZZ;
594
LSLZero = AArch64::LSL_ZPmI_D;
595
MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
596
break;
597
default:
598
llvm_unreachable("Unsupported ElementSize");
599
}
600
601
//
602
// Create the destructive operation (if required)
603
//
604
MachineInstrBuilder PRFX, DOP;
605
if (FalseZero) {
606
// If we cannot prefix the requested instruction we'll instead emit a
607
// prefixed_zeroing_mov for DestructiveBinary.
608
assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
609
DType == AArch64::DestructiveBinaryComm ||
610
DType == AArch64::DestructiveBinaryCommWithRev) &&
611
"The destructive operand should be unique");
612
assert(ElementSize != AArch64::ElementSizeNone &&
613
"This instruction is unpredicated");
614
615
// Merge source operand into destination register
616
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
617
.addReg(DstReg, RegState::Define)
618
.addReg(MI.getOperand(PredIdx).getReg())
619
.addReg(MI.getOperand(DOPIdx).getReg());
620
621
// After the movprfx, the destructive operand is same as Dst
622
DOPIdx = 0;
623
624
// Create the additional LSL to zero the lanes when the DstReg is not
625
// unique. Zeros the lanes in z0 that aren't active in p0 with sequence
626
// movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
627
if ((DType == AArch64::DestructiveBinary ||
628
DType == AArch64::DestructiveBinaryComm ||
629
DType == AArch64::DestructiveBinaryCommWithRev) &&
630
!DOPRegIsUnique) {
631
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
632
.addReg(DstReg, RegState::Define)
633
.add(MI.getOperand(PredIdx))
634
.addReg(DstReg)
635
.addImm(0);
636
}
637
} else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
638
assert(DOPRegIsUnique && "The destructive operand should be unique");
639
PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
640
.addReg(DstReg, RegState::Define)
641
.addReg(MI.getOperand(DOPIdx).getReg());
642
DOPIdx = 0;
643
}
644
645
//
646
// Create the destructive operation
647
//
648
DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
649
.addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
650
651
switch (DType) {
652
case AArch64::DestructiveUnaryPassthru:
653
DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
654
.add(MI.getOperand(PredIdx))
655
.add(MI.getOperand(SrcIdx));
656
break;
657
case AArch64::DestructiveBinary:
658
case AArch64::DestructiveBinaryImm:
659
case AArch64::DestructiveBinaryComm:
660
case AArch64::DestructiveBinaryCommWithRev:
661
DOP.add(MI.getOperand(PredIdx))
662
.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
663
.add(MI.getOperand(SrcIdx));
664
break;
665
case AArch64::DestructiveTernaryCommWithRev:
666
DOP.add(MI.getOperand(PredIdx))
667
.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
668
.add(MI.getOperand(SrcIdx))
669
.add(MI.getOperand(Src2Idx));
670
break;
671
}
672
673
if (PRFX) {
674
finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
675
transferImpOps(MI, PRFX, DOP);
676
} else
677
transferImpOps(MI, DOP, DOP);
678
679
MI.eraseFromParent();
680
return true;
681
}
682
683
bool AArch64ExpandPseudo::expandSetTagLoop(
684
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
685
MachineBasicBlock::iterator &NextMBBI) {
686
MachineInstr &MI = *MBBI;
687
DebugLoc DL = MI.getDebugLoc();
688
Register SizeReg = MI.getOperand(0).getReg();
689
Register AddressReg = MI.getOperand(1).getReg();
690
691
MachineFunction *MF = MBB.getParent();
692
693
bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
694
const unsigned OpCode1 =
695
ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
696
const unsigned OpCode2 =
697
ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
698
699
unsigned Size = MI.getOperand(2).getImm();
700
assert(Size > 0 && Size % 16 == 0);
701
if (Size % (16 * 2) != 0) {
702
BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
703
.addReg(AddressReg)
704
.addReg(AddressReg)
705
.addImm(1);
706
Size -= 16;
707
}
708
MachineBasicBlock::iterator I =
709
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
710
.addImm(Size);
711
expandMOVImm(MBB, I, 64);
712
713
auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
714
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
715
716
MF->insert(++MBB.getIterator(), LoopBB);
717
MF->insert(++LoopBB->getIterator(), DoneBB);
718
719
BuildMI(LoopBB, DL, TII->get(OpCode2))
720
.addDef(AddressReg)
721
.addReg(AddressReg)
722
.addReg(AddressReg)
723
.addImm(2)
724
.cloneMemRefs(MI)
725
.setMIFlags(MI.getFlags());
726
BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
727
.addDef(SizeReg)
728
.addReg(SizeReg)
729
.addImm(16 * 2)
730
.addImm(0);
731
BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
732
.addImm(AArch64CC::NE)
733
.addMBB(LoopBB)
734
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
735
736
LoopBB->addSuccessor(LoopBB);
737
LoopBB->addSuccessor(DoneBB);
738
739
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
740
DoneBB->transferSuccessors(&MBB);
741
742
MBB.addSuccessor(LoopBB);
743
744
NextMBBI = MBB.end();
745
MI.eraseFromParent();
746
// Recompute liveness bottom up.
747
LivePhysRegs LiveRegs;
748
computeAndAddLiveIns(LiveRegs, *DoneBB);
749
computeAndAddLiveIns(LiveRegs, *LoopBB);
750
// Do an extra pass in the loop to get the loop carried dependencies right.
751
// FIXME: is this necessary?
752
LoopBB->clearLiveIns();
753
computeAndAddLiveIns(LiveRegs, *LoopBB);
754
DoneBB->clearLiveIns();
755
computeAndAddLiveIns(LiveRegs, *DoneBB);
756
757
return true;
758
}
759
760
bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
761
MachineBasicBlock::iterator MBBI,
762
unsigned Opc, unsigned N) {
763
assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
764
Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
765
"Unexpected opcode");
766
unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
767
? RegState::Define
768
: 0;
769
unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
770
? AArch64::zsub0
771
: AArch64::psub0;
772
const TargetRegisterInfo *TRI =
773
MBB.getParent()->getSubtarget().getRegisterInfo();
774
MachineInstr &MI = *MBBI;
775
for (unsigned Offset = 0; Offset < N; ++Offset) {
776
int ImmOffset = MI.getOperand(2).getImm() + Offset;
777
bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
778
assert(ImmOffset >= -256 && ImmOffset < 256 &&
779
"Immediate spill offset out of range");
780
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
781
.addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
782
RState)
783
.addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
784
.addImm(ImmOffset);
785
}
786
MI.eraseFromParent();
787
return true;
788
}
789
790
// Create a call with the passed opcode and explicit operands, copying over all
791
// the implicit operands from *MBBI, starting at the regmask.
792
static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
793
MachineBasicBlock::iterator MBBI,
794
const AArch64InstrInfo *TII,
795
unsigned Opcode,
796
ArrayRef<MachineOperand> ExplicitOps,
797
unsigned RegMaskStartIdx) {
798
// Build the MI, with explicit operands first (including the call target).
799
MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
800
.add(ExplicitOps)
801
.getInstr();
802
803
// Register arguments are added during ISel, but cannot be added as explicit
804
// operands of the branch as it expects to be B <target> which is only one
805
// operand. Instead they are implicit operands used by the branch.
806
while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
807
const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
808
assert(MOP.isReg() && "can only add register operands");
809
Call->addOperand(MachineOperand::CreateReg(
810
MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
811
/*isDead=*/false, /*isUndef=*/MOP.isUndef()));
812
RegMaskStartIdx++;
813
}
814
for (const MachineOperand &MO :
815
llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
816
Call->addOperand(MO);
817
818
return Call;
819
}
820
821
// Create a call to CallTarget, copying over all the operands from *MBBI,
822
// starting at the regmask.
823
static MachineInstr *createCall(MachineBasicBlock &MBB,
824
MachineBasicBlock::iterator MBBI,
825
const AArch64InstrInfo *TII,
826
MachineOperand &CallTarget,
827
unsigned RegMaskStartIdx) {
828
unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
829
830
assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
831
"invalid operand for regular call");
832
return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
833
}
834
835
bool AArch64ExpandPseudo::expandCALL_RVMARKER(
836
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
837
// Expand CALL_RVMARKER pseudo to:
838
// - a branch to the call target, followed by
839
// - the special `mov x29, x29` marker, and
840
// - another branch, to the runtime function
841
// Mark the sequence as bundle, to avoid passes moving other code in between.
842
MachineInstr &MI = *MBBI;
843
MachineOperand &RVTarget = MI.getOperand(0);
844
assert(RVTarget.isGlobal() && "invalid operand for attached call");
845
846
MachineInstr *OriginalCall = nullptr;
847
848
if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
849
// ptrauth call.
850
const MachineOperand &CallTarget = MI.getOperand(1);
851
const MachineOperand &Key = MI.getOperand(2);
852
const MachineOperand &IntDisc = MI.getOperand(3);
853
const MachineOperand &AddrDisc = MI.getOperand(4);
854
855
assert((Key.getImm() == AArch64PACKey::IA ||
856
Key.getImm() == AArch64PACKey::IB) &&
857
"Invalid auth call key");
858
859
MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
860
861
OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
862
/*RegMaskStartIdx=*/5);
863
} else {
864
assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
865
OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
866
// Regmask starts after the RV and call targets.
867
/*RegMaskStartIdx=*/2);
868
}
869
870
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
871
.addReg(AArch64::FP, RegState::Define)
872
.addReg(AArch64::XZR)
873
.addReg(AArch64::FP)
874
.addImm(0);
875
876
auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
877
.add(RVTarget)
878
.getInstr();
879
880
if (MI.shouldUpdateCallSiteInfo())
881
MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
882
883
MI.eraseFromParent();
884
finalizeBundle(MBB, OriginalCall->getIterator(),
885
std::next(RVCall->getIterator()));
886
return true;
887
}
888
889
bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
890
MachineBasicBlock::iterator MBBI) {
891
// Expand CALL_BTI pseudo to:
892
// - a branch to the call target
893
// - a BTI instruction
894
// Mark the sequence as a bundle, to avoid passes moving other code in
895
// between.
896
MachineInstr &MI = *MBBI;
897
MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
898
// Regmask starts after the call target.
899
/*RegMaskStartIdx=*/1);
900
901
Call->setCFIType(*MBB.getParent(), MI.getCFIType());
902
903
MachineInstr *BTI =
904
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
905
// BTI J so that setjmp can to BR to this.
906
.addImm(36)
907
.getInstr();
908
909
if (MI.shouldUpdateCallSiteInfo())
910
MBB.getParent()->moveCallSiteInfo(&MI, Call);
911
912
MI.eraseFromParent();
913
finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
914
return true;
915
}
916
917
bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
918
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
919
Register CtxReg = MBBI->getOperand(0).getReg();
920
Register BaseReg = MBBI->getOperand(1).getReg();
921
int Offset = MBBI->getOperand(2).getImm();
922
DebugLoc DL(MBBI->getDebugLoc());
923
auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
924
925
if (STI.getTargetTriple().getArchName() != "arm64e") {
926
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
927
.addUse(CtxReg)
928
.addUse(BaseReg)
929
.addImm(Offset / 8)
930
.setMIFlag(MachineInstr::FrameSetup);
931
MBBI->eraseFromParent();
932
return true;
933
}
934
935
// We need to sign the context in an address-discriminated way. 0xc31a is a
936
// fixed random value, chosen as part of the ABI.
937
// add x16, xBase, #Offset
938
// movk x16, #0xc31a, lsl #48
939
// mov x17, x22/xzr
940
// pacdb x17, x16
941
// str x17, [xBase, #Offset]
942
unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
943
BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
944
.addUse(BaseReg)
945
.addImm(abs(Offset))
946
.addImm(0)
947
.setMIFlag(MachineInstr::FrameSetup);
948
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
949
.addUse(AArch64::X16)
950
.addImm(0xc31a)
951
.addImm(48)
952
.setMIFlag(MachineInstr::FrameSetup);
953
// We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
954
// move it somewhere before signing.
955
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
956
.addUse(AArch64::XZR)
957
.addUse(CtxReg)
958
.addImm(0)
959
.setMIFlag(MachineInstr::FrameSetup);
960
BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
961
.addUse(AArch64::X17)
962
.addUse(AArch64::X16)
963
.setMIFlag(MachineInstr::FrameSetup);
964
BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
965
.addUse(AArch64::X17)
966
.addUse(BaseReg)
967
.addImm(Offset / 8)
968
.setMIFlag(MachineInstr::FrameSetup);
969
970
MBBI->eraseFromParent();
971
return true;
972
}
973
974
MachineBasicBlock *
975
AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
976
MachineBasicBlock::iterator MBBI) {
977
MachineInstr &MI = *MBBI;
978
assert((std::next(MBBI) != MBB.end() ||
979
MI.getParent()->successors().begin() !=
980
MI.getParent()->successors().end()) &&
981
"Unexpected unreachable in block that restores ZA");
982
983
// Compare TPIDR2_EL0 value against 0.
984
DebugLoc DL = MI.getDebugLoc();
985
MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
986
.add(MI.getOperand(0));
987
988
// Split MBB and create two new blocks:
989
// - MBB now contains all instructions before RestoreZAPseudo.
990
// - SMBB contains the RestoreZAPseudo instruction only.
991
// - EndBB contains all instructions after RestoreZAPseudo.
992
MachineInstr &PrevMI = *std::prev(MBBI);
993
MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
994
MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
995
? *SMBB->successors().begin()
996
: SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
997
998
// Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
999
Cbz.addMBB(SMBB);
1000
BuildMI(&MBB, DL, TII->get(AArch64::B))
1001
.addMBB(EndBB);
1002
MBB.addSuccessor(EndBB);
1003
1004
// Replace the pseudo with a call (BL).
1005
MachineInstrBuilder MIB =
1006
BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1007
MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1008
for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1009
MIB.add(MI.getOperand(I));
1010
BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1011
1012
MI.eraseFromParent();
1013
return EndBB;
1014
}
1015
1016
MachineBasicBlock *
1017
AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1018
MachineBasicBlock::iterator MBBI) {
1019
MachineInstr &MI = *MBBI;
1020
// In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1021
// Exception handling code generated by Clang may introduce unreachables and it
1022
// seems unnecessary to restore pstate.sm when that happens. Note that it is
1023
// not just an optimisation, the code below expects a successor instruction/block
1024
// in order to split the block at MBBI.
1025
if (std::next(MBBI) == MBB.end() &&
1026
MI.getParent()->successors().begin() ==
1027
MI.getParent()->successors().end()) {
1028
MI.eraseFromParent();
1029
return &MBB;
1030
}
1031
1032
// Expand the pseudo into smstart or smstop instruction. The pseudo has the
1033
// following operands:
1034
//
1035
// MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1036
//
1037
// The pseudo is expanded into a conditional smstart/smstop, with a
1038
// check if pstate.sm (register) equals the expected value, and if not,
1039
// invokes the smstart/smstop.
1040
//
1041
// As an example, the following block contains a normal call from a
1042
// streaming-compatible function:
1043
//
1044
// OrigBB:
1045
// MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1046
// bl @normal_callee
1047
// MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1048
//
1049
// ...which will be transformed into:
1050
//
1051
// OrigBB:
1052
// TBNZx %0:gpr64, 0, SMBB
1053
// b EndBB
1054
//
1055
// SMBB:
1056
// MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1057
//
1058
// EndBB:
1059
// bl @normal_callee
1060
// MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1061
//
1062
DebugLoc DL = MI.getDebugLoc();
1063
1064
// Create the conditional branch based on the third operand of the
1065
// instruction, which tells us if we are wrapping a normal or streaming
1066
// function.
1067
// We test the live value of pstate.sm and toggle pstate.sm if this is not the
1068
// expected value for the callee (0 for a normal callee and 1 for a streaming
1069
// callee).
1070
unsigned Opc;
1071
switch (MI.getOperand(2).getImm()) {
1072
case AArch64SME::Always:
1073
llvm_unreachable("Should have matched to instruction directly");
1074
case AArch64SME::IfCallerIsStreaming:
1075
Opc = AArch64::TBNZW;
1076
break;
1077
case AArch64SME::IfCallerIsNonStreaming:
1078
Opc = AArch64::TBZW;
1079
break;
1080
}
1081
auto PStateSM = MI.getOperand(3).getReg();
1082
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1083
unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1084
MachineInstrBuilder Tbx =
1085
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1086
1087
// Split MBB and create two new blocks:
1088
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1089
// - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1090
// - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1091
MachineInstr &PrevMI = *std::prev(MBBI);
1092
MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1093
MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1094
? *SMBB->successors().begin()
1095
: SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1096
1097
// Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1098
Tbx.addMBB(SMBB);
1099
BuildMI(&MBB, DL, TII->get(AArch64::B))
1100
.addMBB(EndBB);
1101
MBB.addSuccessor(EndBB);
1102
1103
// Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1104
MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1105
TII->get(AArch64::MSRpstatesvcrImm1));
1106
// Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1107
// these contain the CopyFromReg for the first argument and the flag to
1108
// indicate whether the callee is streaming or normal).
1109
MIB.add(MI.getOperand(0));
1110
MIB.add(MI.getOperand(1));
1111
for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1112
MIB.add(MI.getOperand(i));
1113
1114
BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1115
1116
MI.eraseFromParent();
1117
return EndBB;
1118
}
1119
1120
bool AArch64ExpandPseudo::expandMultiVecPseudo(
1121
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1122
TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1123
unsigned ContiguousOp, unsigned StridedOpc) {
1124
MachineInstr &MI = *MBBI;
1125
Register Tuple = MI.getOperand(0).getReg();
1126
1127
auto ContiguousRange = ContiguousClass.getRegisters();
1128
auto StridedRange = StridedClass.getRegisters();
1129
unsigned Opc;
1130
if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1131
Opc = ContiguousOp;
1132
} else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1133
Opc = StridedOpc;
1134
} else
1135
llvm_unreachable("Cannot expand Multi-Vector pseudo");
1136
1137
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1138
.add(MI.getOperand(0))
1139
.add(MI.getOperand(1))
1140
.add(MI.getOperand(2))
1141
.add(MI.getOperand(3));
1142
transferImpOps(MI, MIB, MIB);
1143
MI.eraseFromParent();
1144
return true;
1145
}
1146
1147
/// If MBBI references a pseudo instruction that should be expanded here,
1148
/// do the expansion and return true. Otherwise return false.
1149
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1150
MachineBasicBlock::iterator MBBI,
1151
MachineBasicBlock::iterator &NextMBBI) {
1152
MachineInstr &MI = *MBBI;
1153
unsigned Opcode = MI.getOpcode();
1154
1155
// Check if we can expand the destructive op
1156
int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1157
if (OrigInstr != -1) {
1158
auto &Orig = TII->get(OrigInstr);
1159
if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1160
AArch64::NotDestructive) {
1161
return expand_DestructiveOp(MI, MBB, MBBI);
1162
}
1163
}
1164
1165
switch (Opcode) {
1166
default:
1167
break;
1168
1169
case AArch64::BSPv8i8:
1170
case AArch64::BSPv16i8: {
1171
Register DstReg = MI.getOperand(0).getReg();
1172
if (DstReg == MI.getOperand(3).getReg()) {
1173
// Expand to BIT
1174
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1175
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1176
: AArch64::BITv16i8))
1177
.add(MI.getOperand(0))
1178
.add(MI.getOperand(3))
1179
.add(MI.getOperand(2))
1180
.add(MI.getOperand(1));
1181
} else if (DstReg == MI.getOperand(2).getReg()) {
1182
// Expand to BIF
1183
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1184
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1185
: AArch64::BIFv16i8))
1186
.add(MI.getOperand(0))
1187
.add(MI.getOperand(2))
1188
.add(MI.getOperand(3))
1189
.add(MI.getOperand(1));
1190
} else {
1191
// Expand to BSL, use additional move if required
1192
if (DstReg == MI.getOperand(1).getReg()) {
1193
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1194
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1195
: AArch64::BSLv16i8))
1196
.add(MI.getOperand(0))
1197
.add(MI.getOperand(1))
1198
.add(MI.getOperand(2))
1199
.add(MI.getOperand(3));
1200
} else {
1201
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1202
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1203
: AArch64::ORRv16i8))
1204
.addReg(DstReg,
1205
RegState::Define |
1206
getRenamableRegState(MI.getOperand(0).isRenamable()))
1207
.add(MI.getOperand(1))
1208
.add(MI.getOperand(1));
1209
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1210
TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1211
: AArch64::BSLv16i8))
1212
.add(MI.getOperand(0))
1213
.addReg(DstReg,
1214
RegState::Kill |
1215
getRenamableRegState(MI.getOperand(0).isRenamable()))
1216
.add(MI.getOperand(2))
1217
.add(MI.getOperand(3));
1218
}
1219
}
1220
MI.eraseFromParent();
1221
return true;
1222
}
1223
1224
case AArch64::ADDWrr:
1225
case AArch64::SUBWrr:
1226
case AArch64::ADDXrr:
1227
case AArch64::SUBXrr:
1228
case AArch64::ADDSWrr:
1229
case AArch64::SUBSWrr:
1230
case AArch64::ADDSXrr:
1231
case AArch64::SUBSXrr:
1232
case AArch64::ANDWrr:
1233
case AArch64::ANDXrr:
1234
case AArch64::BICWrr:
1235
case AArch64::BICXrr:
1236
case AArch64::ANDSWrr:
1237
case AArch64::ANDSXrr:
1238
case AArch64::BICSWrr:
1239
case AArch64::BICSXrr:
1240
case AArch64::EONWrr:
1241
case AArch64::EONXrr:
1242
case AArch64::EORWrr:
1243
case AArch64::EORXrr:
1244
case AArch64::ORNWrr:
1245
case AArch64::ORNXrr:
1246
case AArch64::ORRWrr:
1247
case AArch64::ORRXrr: {
1248
unsigned Opcode;
1249
switch (MI.getOpcode()) {
1250
default:
1251
return false;
1252
case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1253
case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1254
case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1255
case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1256
case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1257
case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1258
case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1259
case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1260
case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1261
case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1262
case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1263
case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1264
case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1265
case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1266
case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1267
case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1268
case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1269
case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1270
case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1271
case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1272
case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1273
case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1274
case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1275
case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1276
}
1277
MachineFunction &MF = *MBB.getParent();
1278
// Try to create new inst without implicit operands added.
1279
MachineInstr *NewMI = MF.CreateMachineInstr(
1280
TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1281
MBB.insert(MBBI, NewMI);
1282
MachineInstrBuilder MIB1(MF, NewMI);
1283
MIB1->setPCSections(MF, MI.getPCSections());
1284
MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1285
.add(MI.getOperand(1))
1286
.add(MI.getOperand(2))
1287
.addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1288
transferImpOps(MI, MIB1, MIB1);
1289
if (auto DebugNumber = MI.peekDebugInstrNum())
1290
NewMI->setDebugInstrNum(DebugNumber);
1291
MI.eraseFromParent();
1292
return true;
1293
}
1294
1295
case AArch64::LOADgot: {
1296
MachineFunction *MF = MBB.getParent();
1297
Register DstReg = MI.getOperand(0).getReg();
1298
const MachineOperand &MO1 = MI.getOperand(1);
1299
unsigned Flags = MO1.getTargetFlags();
1300
1301
if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1302
// Tiny codemodel expand to LDR
1303
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1304
TII->get(AArch64::LDRXl), DstReg);
1305
1306
if (MO1.isGlobal()) {
1307
MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1308
} else if (MO1.isSymbol()) {
1309
MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1310
} else {
1311
assert(MO1.isCPI() &&
1312
"Only expect globals, externalsymbols, or constant pools");
1313
MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1314
}
1315
} else {
1316
// Small codemodel expand into ADRP + LDR.
1317
MachineFunction &MF = *MI.getParent()->getParent();
1318
DebugLoc DL = MI.getDebugLoc();
1319
MachineInstrBuilder MIB1 =
1320
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1321
1322
MachineInstrBuilder MIB2;
1323
if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1324
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1325
unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1326
unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1327
MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1328
.addDef(Reg32)
1329
.addReg(DstReg, RegState::Kill)
1330
.addReg(DstReg, DstFlags | RegState::Implicit);
1331
} else {
1332
Register DstReg = MI.getOperand(0).getReg();
1333
MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1334
.add(MI.getOperand(0))
1335
.addUse(DstReg, RegState::Kill);
1336
}
1337
1338
if (MO1.isGlobal()) {
1339
MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1340
MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1341
Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1342
} else if (MO1.isSymbol()) {
1343
MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1344
MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1345
AArch64II::MO_PAGEOFF |
1346
AArch64II::MO_NC);
1347
} else {
1348
assert(MO1.isCPI() &&
1349
"Only expect globals, externalsymbols, or constant pools");
1350
MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1351
Flags | AArch64II::MO_PAGE);
1352
MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1353
Flags | AArch64II::MO_PAGEOFF |
1354
AArch64II::MO_NC);
1355
}
1356
1357
transferImpOps(MI, MIB1, MIB2);
1358
}
1359
MI.eraseFromParent();
1360
return true;
1361
}
1362
case AArch64::MOVaddrBA: {
1363
MachineFunction &MF = *MI.getParent()->getParent();
1364
if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1365
// blockaddress expressions have to come from a constant pool because the
1366
// largest addend (and hence offset within a function) allowed for ADRP is
1367
// only 8MB.
1368
const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1369
assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1370
1371
MachineConstantPool *MCP = MF.getConstantPool();
1372
unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1373
1374
Register DstReg = MI.getOperand(0).getReg();
1375
auto MIB1 =
1376
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1377
.addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1378
auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1379
TII->get(AArch64::LDRXui), DstReg)
1380
.addUse(DstReg)
1381
.addConstantPoolIndex(
1382
CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1383
transferImpOps(MI, MIB1, MIB2);
1384
MI.eraseFromParent();
1385
return true;
1386
}
1387
}
1388
[[fallthrough]];
1389
case AArch64::MOVaddr:
1390
case AArch64::MOVaddrJT:
1391
case AArch64::MOVaddrCP:
1392
case AArch64::MOVaddrTLS:
1393
case AArch64::MOVaddrEXT: {
1394
// Expand into ADRP + ADD.
1395
Register DstReg = MI.getOperand(0).getReg();
1396
assert(DstReg != AArch64::XZR);
1397
MachineInstrBuilder MIB1 =
1398
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1399
.add(MI.getOperand(1));
1400
1401
if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1402
// MO_TAGGED on the page indicates a tagged address. Set the tag now.
1403
// We do so by creating a MOVK that sets bits 48-63 of the register to
1404
// (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1405
// the small code model so we can assume a binary size of <= 4GB, which
1406
// makes the untagged PC relative offset positive. The binary must also be
1407
// loaded into address range [0, 2^48). Both of these properties need to
1408
// be ensured at runtime when using tagged addresses.
1409
auto Tag = MI.getOperand(1);
1410
Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1411
Tag.setOffset(0x100000000);
1412
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1413
.addReg(DstReg)
1414
.add(Tag)
1415
.addImm(48);
1416
}
1417
1418
MachineInstrBuilder MIB2 =
1419
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1420
.add(MI.getOperand(0))
1421
.addReg(DstReg)
1422
.add(MI.getOperand(2))
1423
.addImm(0);
1424
1425
transferImpOps(MI, MIB1, MIB2);
1426
MI.eraseFromParent();
1427
return true;
1428
}
1429
case AArch64::ADDlowTLS:
1430
// Produce a plain ADD
1431
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1432
.add(MI.getOperand(0))
1433
.add(MI.getOperand(1))
1434
.add(MI.getOperand(2))
1435
.addImm(0);
1436
MI.eraseFromParent();
1437
return true;
1438
1439
case AArch64::MOVbaseTLS: {
1440
Register DstReg = MI.getOperand(0).getReg();
1441
auto SysReg = AArch64SysReg::TPIDR_EL0;
1442
MachineFunction *MF = MBB.getParent();
1443
if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1444
SysReg = AArch64SysReg::TPIDR_EL3;
1445
else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1446
SysReg = AArch64SysReg::TPIDR_EL2;
1447
else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1448
SysReg = AArch64SysReg::TPIDR_EL1;
1449
else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1450
SysReg = AArch64SysReg::TPIDRRO_EL0;
1451
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1452
.addImm(SysReg);
1453
MI.eraseFromParent();
1454
return true;
1455
}
1456
1457
case AArch64::MOVi32imm:
1458
return expandMOVImm(MBB, MBBI, 32);
1459
case AArch64::MOVi64imm:
1460
return expandMOVImm(MBB, MBBI, 64);
1461
case AArch64::RET_ReallyLR: {
1462
// Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1463
// function and missing live-ins. We are fine in practice because callee
1464
// saved register handling ensures the register value is restored before
1465
// RET, but we need the undef flag here to appease the MachineVerifier
1466
// liveness checks.
1467
MachineInstrBuilder MIB =
1468
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1469
.addReg(AArch64::LR, RegState::Undef);
1470
transferImpOps(MI, MIB, MIB);
1471
MI.eraseFromParent();
1472
return true;
1473
}
1474
case AArch64::CMP_SWAP_8:
1475
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1476
AArch64::SUBSWrx,
1477
AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1478
AArch64::WZR, NextMBBI);
1479
case AArch64::CMP_SWAP_16:
1480
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1481
AArch64::SUBSWrx,
1482
AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1483
AArch64::WZR, NextMBBI);
1484
case AArch64::CMP_SWAP_32:
1485
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1486
AArch64::SUBSWrs,
1487
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1488
AArch64::WZR, NextMBBI);
1489
case AArch64::CMP_SWAP_64:
1490
return expandCMP_SWAP(MBB, MBBI,
1491
AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1492
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1493
AArch64::XZR, NextMBBI);
1494
case AArch64::CMP_SWAP_128:
1495
case AArch64::CMP_SWAP_128_RELEASE:
1496
case AArch64::CMP_SWAP_128_ACQUIRE:
1497
case AArch64::CMP_SWAP_128_MONOTONIC:
1498
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1499
1500
case AArch64::AESMCrrTied:
1501
case AArch64::AESIMCrrTied: {
1502
MachineInstrBuilder MIB =
1503
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1504
TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1505
AArch64::AESIMCrr))
1506
.add(MI.getOperand(0))
1507
.add(MI.getOperand(1));
1508
transferImpOps(MI, MIB, MIB);
1509
MI.eraseFromParent();
1510
return true;
1511
}
1512
case AArch64::IRGstack: {
1513
MachineFunction &MF = *MBB.getParent();
1514
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1515
const AArch64FrameLowering *TFI =
1516
MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1517
1518
// IRG does not allow immediate offset. getTaggedBasePointerOffset should
1519
// almost always point to SP-after-prologue; if not, emit a longer
1520
// instruction sequence.
1521
int BaseOffset = -AFI->getTaggedBasePointerOffset();
1522
Register FrameReg;
1523
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1524
MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1525
/*PreferFP=*/false,
1526
/*ForSimm=*/true);
1527
Register SrcReg = FrameReg;
1528
if (FrameRegOffset) {
1529
// Use output register as temporary.
1530
SrcReg = MI.getOperand(0).getReg();
1531
emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1532
FrameRegOffset, TII);
1533
}
1534
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1535
.add(MI.getOperand(0))
1536
.addUse(SrcReg)
1537
.add(MI.getOperand(2));
1538
MI.eraseFromParent();
1539
return true;
1540
}
1541
case AArch64::TAGPstack: {
1542
int64_t Offset = MI.getOperand(2).getImm();
1543
BuildMI(MBB, MBBI, MI.getDebugLoc(),
1544
TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1545
.add(MI.getOperand(0))
1546
.add(MI.getOperand(1))
1547
.addImm(std::abs(Offset))
1548
.add(MI.getOperand(4));
1549
MI.eraseFromParent();
1550
return true;
1551
}
1552
case AArch64::STGloop_wback:
1553
case AArch64::STZGloop_wback:
1554
return expandSetTagLoop(MBB, MBBI, NextMBBI);
1555
case AArch64::STGloop:
1556
case AArch64::STZGloop:
1557
report_fatal_error(
1558
"Non-writeback variants of STGloop / STZGloop should not "
1559
"survive past PrologEpilogInserter.");
1560
case AArch64::STR_ZZZZXI:
1561
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1562
case AArch64::STR_ZZZXI:
1563
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1564
case AArch64::STR_ZZXI:
1565
return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1566
case AArch64::STR_PPXI:
1567
return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1568
case AArch64::LDR_ZZZZXI:
1569
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1570
case AArch64::LDR_ZZZXI:
1571
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1572
case AArch64::LDR_ZZXI:
1573
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1574
case AArch64::LDR_PPXI:
1575
return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1576
case AArch64::BLR_RVMARKER:
1577
case AArch64::BLRA_RVMARKER:
1578
return expandCALL_RVMARKER(MBB, MBBI);
1579
case AArch64::BLR_BTI:
1580
return expandCALL_BTI(MBB, MBBI);
1581
case AArch64::StoreSwiftAsyncContext:
1582
return expandStoreSwiftAsyncContext(MBB, MBBI);
1583
case AArch64::RestoreZAPseudo: {
1584
auto *NewMBB = expandRestoreZA(MBB, MBBI);
1585
if (NewMBB != &MBB)
1586
NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1587
return true;
1588
}
1589
case AArch64::MSRpstatePseudo: {
1590
auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1591
if (NewMBB != &MBB)
1592
NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1593
return true;
1594
}
1595
case AArch64::COALESCER_BARRIER_FPR16:
1596
case AArch64::COALESCER_BARRIER_FPR32:
1597
case AArch64::COALESCER_BARRIER_FPR64:
1598
case AArch64::COALESCER_BARRIER_FPR128:
1599
MI.eraseFromParent();
1600
return true;
1601
case AArch64::LD1B_2Z_IMM_PSEUDO:
1602
return expandMultiVecPseudo(
1603
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1604
AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1605
case AArch64::LD1H_2Z_IMM_PSEUDO:
1606
return expandMultiVecPseudo(
1607
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1608
AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1609
case AArch64::LD1W_2Z_IMM_PSEUDO:
1610
return expandMultiVecPseudo(
1611
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1612
AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1613
case AArch64::LD1D_2Z_IMM_PSEUDO:
1614
return expandMultiVecPseudo(
1615
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1616
AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1617
case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1618
return expandMultiVecPseudo(
1619
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1620
AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1621
case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1622
return expandMultiVecPseudo(
1623
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1624
AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1625
case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1626
return expandMultiVecPseudo(
1627
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1628
AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1629
case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1630
return expandMultiVecPseudo(
1631
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1632
AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1633
case AArch64::LD1B_2Z_PSEUDO:
1634
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1635
AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1636
AArch64::LD1B_2Z_STRIDED);
1637
case AArch64::LD1H_2Z_PSEUDO:
1638
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1639
AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1640
AArch64::LD1H_2Z_STRIDED);
1641
case AArch64::LD1W_2Z_PSEUDO:
1642
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1643
AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1644
AArch64::LD1W_2Z_STRIDED);
1645
case AArch64::LD1D_2Z_PSEUDO:
1646
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1647
AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1648
AArch64::LD1D_2Z_STRIDED);
1649
case AArch64::LDNT1B_2Z_PSEUDO:
1650
return expandMultiVecPseudo(
1651
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1652
AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1653
case AArch64::LDNT1H_2Z_PSEUDO:
1654
return expandMultiVecPseudo(
1655
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1656
AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1657
case AArch64::LDNT1W_2Z_PSEUDO:
1658
return expandMultiVecPseudo(
1659
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1660
AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1661
case AArch64::LDNT1D_2Z_PSEUDO:
1662
return expandMultiVecPseudo(
1663
MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1664
AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1665
case AArch64::LD1B_4Z_IMM_PSEUDO:
1666
return expandMultiVecPseudo(
1667
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1668
AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1669
case AArch64::LD1H_4Z_IMM_PSEUDO:
1670
return expandMultiVecPseudo(
1671
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1672
AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1673
case AArch64::LD1W_4Z_IMM_PSEUDO:
1674
return expandMultiVecPseudo(
1675
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1676
AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1677
case AArch64::LD1D_4Z_IMM_PSEUDO:
1678
return expandMultiVecPseudo(
1679
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1680
AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1681
case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1682
return expandMultiVecPseudo(
1683
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1684
AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1685
case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1686
return expandMultiVecPseudo(
1687
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1688
AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1689
case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1690
return expandMultiVecPseudo(
1691
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1692
AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1693
case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1694
return expandMultiVecPseudo(
1695
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1696
AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1697
case AArch64::LD1B_4Z_PSEUDO:
1698
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1699
AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1700
AArch64::LD1B_4Z_STRIDED);
1701
case AArch64::LD1H_4Z_PSEUDO:
1702
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1703
AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1704
AArch64::LD1H_4Z_STRIDED);
1705
case AArch64::LD1W_4Z_PSEUDO:
1706
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1707
AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1708
AArch64::LD1W_4Z_STRIDED);
1709
case AArch64::LD1D_4Z_PSEUDO:
1710
return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1711
AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1712
AArch64::LD1D_4Z_STRIDED);
1713
case AArch64::LDNT1B_4Z_PSEUDO:
1714
return expandMultiVecPseudo(
1715
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1716
AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1717
case AArch64::LDNT1H_4Z_PSEUDO:
1718
return expandMultiVecPseudo(
1719
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1720
AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1721
case AArch64::LDNT1W_4Z_PSEUDO:
1722
return expandMultiVecPseudo(
1723
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1724
AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1725
case AArch64::LDNT1D_4Z_PSEUDO:
1726
return expandMultiVecPseudo(
1727
MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1728
AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1729
}
1730
return false;
1731
}
1732
1733
/// Iterate over the instructions in basic block MBB and expand any
1734
/// pseudo instructions. Return true if anything was modified.
1735
bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1736
bool Modified = false;
1737
1738
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1739
while (MBBI != E) {
1740
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1741
Modified |= expandMI(MBB, MBBI, NMBBI);
1742
MBBI = NMBBI;
1743
}
1744
1745
return Modified;
1746
}
1747
1748
bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1749
TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1750
1751
bool Modified = false;
1752
for (auto &MBB : MF)
1753
Modified |= expandMBB(MBB);
1754
return Modified;
1755
}
1756
1757
/// Returns an instance of the pseudo instruction expansion pass.
1758
FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1759
return new AArch64ExpandPseudo();
1760
}
1761
1762