Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
35268 views
1
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the AArch64-specific support for the FastISel class. Some
10
// of the target-specific code is generated by tablegen in the file
11
// AArch64GenFastISel.inc, which is #included here.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#include "AArch64.h"
16
#include "AArch64CallingConvention.h"
17
#include "AArch64MachineFunctionInfo.h"
18
#include "AArch64RegisterInfo.h"
19
#include "AArch64Subtarget.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "Utils/AArch64BaseInfo.h"
22
#include "llvm/ADT/APFloat.h"
23
#include "llvm/ADT/APInt.h"
24
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/Analysis/BranchProbabilityInfo.h"
27
#include "llvm/CodeGen/CallingConvLower.h"
28
#include "llvm/CodeGen/FastISel.h"
29
#include "llvm/CodeGen/FunctionLoweringInfo.h"
30
#include "llvm/CodeGen/ISDOpcodes.h"
31
#include "llvm/CodeGen/MachineBasicBlock.h"
32
#include "llvm/CodeGen/MachineConstantPool.h"
33
#include "llvm/CodeGen/MachineFrameInfo.h"
34
#include "llvm/CodeGen/MachineInstr.h"
35
#include "llvm/CodeGen/MachineInstrBuilder.h"
36
#include "llvm/CodeGen/MachineMemOperand.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
39
#include "llvm/CodeGen/ValueTypes.h"
40
#include "llvm/CodeGenTypes/MachineValueType.h"
41
#include "llvm/IR/Argument.h"
42
#include "llvm/IR/Attributes.h"
43
#include "llvm/IR/BasicBlock.h"
44
#include "llvm/IR/CallingConv.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/Constants.h"
47
#include "llvm/IR/DataLayout.h"
48
#include "llvm/IR/DerivedTypes.h"
49
#include "llvm/IR/Function.h"
50
#include "llvm/IR/GetElementPtrTypeIterator.h"
51
#include "llvm/IR/GlobalValue.h"
52
#include "llvm/IR/InstrTypes.h"
53
#include "llvm/IR/Instruction.h"
54
#include "llvm/IR/Instructions.h"
55
#include "llvm/IR/IntrinsicInst.h"
56
#include "llvm/IR/Intrinsics.h"
57
#include "llvm/IR/IntrinsicsAArch64.h"
58
#include "llvm/IR/Module.h"
59
#include "llvm/IR/Operator.h"
60
#include "llvm/IR/Type.h"
61
#include "llvm/IR/User.h"
62
#include "llvm/IR/Value.h"
63
#include "llvm/MC/MCInstrDesc.h"
64
#include "llvm/MC/MCRegisterInfo.h"
65
#include "llvm/MC/MCSymbol.h"
66
#include "llvm/Support/AtomicOrdering.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CodeGen.h"
69
#include "llvm/Support/Compiler.h"
70
#include "llvm/Support/ErrorHandling.h"
71
#include "llvm/Support/MathExtras.h"
72
#include <algorithm>
73
#include <cassert>
74
#include <cstdint>
75
#include <iterator>
76
#include <utility>
77
78
using namespace llvm;
79
80
namespace {
81
82
class AArch64FastISel final : public FastISel {
83
class Address {
84
public:
85
using BaseKind = enum {
86
RegBase,
87
FrameIndexBase
88
};
89
90
private:
91
BaseKind Kind = RegBase;
92
AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
93
union {
94
unsigned Reg;
95
int FI;
96
} Base;
97
unsigned OffsetReg = 0;
98
unsigned Shift = 0;
99
int64_t Offset = 0;
100
const GlobalValue *GV = nullptr;
101
102
public:
103
Address() { Base.Reg = 0; }
104
105
void setKind(BaseKind K) { Kind = K; }
106
BaseKind getKind() const { return Kind; }
107
void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
108
AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
109
bool isRegBase() const { return Kind == RegBase; }
110
bool isFIBase() const { return Kind == FrameIndexBase; }
111
112
void setReg(unsigned Reg) {
113
assert(isRegBase() && "Invalid base register access!");
114
Base.Reg = Reg;
115
}
116
117
unsigned getReg() const {
118
assert(isRegBase() && "Invalid base register access!");
119
return Base.Reg;
120
}
121
122
void setOffsetReg(unsigned Reg) {
123
OffsetReg = Reg;
124
}
125
126
unsigned getOffsetReg() const {
127
return OffsetReg;
128
}
129
130
void setFI(unsigned FI) {
131
assert(isFIBase() && "Invalid base frame index access!");
132
Base.FI = FI;
133
}
134
135
unsigned getFI() const {
136
assert(isFIBase() && "Invalid base frame index access!");
137
return Base.FI;
138
}
139
140
void setOffset(int64_t O) { Offset = O; }
141
int64_t getOffset() { return Offset; }
142
void setShift(unsigned S) { Shift = S; }
143
unsigned getShift() { return Shift; }
144
145
void setGlobalValue(const GlobalValue *G) { GV = G; }
146
const GlobalValue *getGlobalValue() { return GV; }
147
};
148
149
/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150
/// make the right decision when generating code for different targets.
151
const AArch64Subtarget *Subtarget;
152
LLVMContext *Context;
153
154
bool fastLowerArguments() override;
155
bool fastLowerCall(CallLoweringInfo &CLI) override;
156
bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157
158
private:
159
// Selection routines.
160
bool selectAddSub(const Instruction *I);
161
bool selectLogicalOp(const Instruction *I);
162
bool selectLoad(const Instruction *I);
163
bool selectStore(const Instruction *I);
164
bool selectBranch(const Instruction *I);
165
bool selectIndirectBr(const Instruction *I);
166
bool selectCmp(const Instruction *I);
167
bool selectSelect(const Instruction *I);
168
bool selectFPExt(const Instruction *I);
169
bool selectFPTrunc(const Instruction *I);
170
bool selectFPToInt(const Instruction *I, bool Signed);
171
bool selectIntToFP(const Instruction *I, bool Signed);
172
bool selectRem(const Instruction *I, unsigned ISDOpcode);
173
bool selectRet(const Instruction *I);
174
bool selectTrunc(const Instruction *I);
175
bool selectIntExt(const Instruction *I);
176
bool selectMul(const Instruction *I);
177
bool selectShift(const Instruction *I);
178
bool selectBitCast(const Instruction *I);
179
bool selectFRem(const Instruction *I);
180
bool selectSDiv(const Instruction *I);
181
bool selectGetElementPtr(const Instruction *I);
182
bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183
184
// Utility helper routines.
185
bool isTypeLegal(Type *Ty, MVT &VT);
186
bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187
bool isValueAvailable(const Value *V) const;
188
bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189
bool computeCallAddress(const Value *V, Address &Addr);
190
bool simplifyAddress(Address &Addr, MVT VT);
191
void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192
MachineMemOperand::Flags Flags,
193
unsigned ScaleFactor, MachineMemOperand *MMO);
194
bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195
bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196
MaybeAlign Alignment);
197
bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198
const Value *Cond);
199
bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200
bool optimizeSelect(const SelectInst *SI);
201
unsigned getRegForGEPIndex(const Value *Idx);
202
203
// Emit helper routines.
204
unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205
const Value *RHS, bool SetFlags = false,
206
bool WantResult = true, bool IsZExt = false);
207
unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208
unsigned RHSReg, bool SetFlags = false,
209
bool WantResult = true);
210
unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211
uint64_t Imm, bool SetFlags = false,
212
bool WantResult = true);
213
unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214
unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215
uint64_t ShiftImm, bool SetFlags = false,
216
bool WantResult = true);
217
unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218
unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219
uint64_t ShiftImm, bool SetFlags = false,
220
bool WantResult = true);
221
222
// Emit functions.
223
bool emitCompareAndBranch(const BranchInst *BI);
224
bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225
bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226
bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227
bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228
unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229
MachineMemOperand *MMO = nullptr);
230
bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231
MachineMemOperand *MMO = nullptr);
232
bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233
MachineMemOperand *MMO = nullptr);
234
unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235
unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236
unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237
bool SetFlags = false, bool WantResult = true,
238
bool IsZExt = false);
239
unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240
unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241
bool SetFlags = false, bool WantResult = true,
242
bool IsZExt = false);
243
unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244
bool WantResult = true);
245
unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246
AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247
bool WantResult = true);
248
unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249
const Value *RHS);
250
unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251
uint64_t Imm);
252
unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253
unsigned RHSReg, uint64_t ShiftImm);
254
unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255
unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256
unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257
unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258
unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259
unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260
bool IsZExt = true);
261
unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262
unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263
bool IsZExt = true);
264
unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265
unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266
bool IsZExt = false);
267
268
unsigned materializeInt(const ConstantInt *CI, MVT VT);
269
unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270
unsigned materializeGV(const GlobalValue *GV);
271
272
// Call handling routines.
273
private:
274
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275
bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276
unsigned &NumBytes);
277
bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278
279
public:
280
// Backend specific FastISel code.
281
unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282
unsigned fastMaterializeConstant(const Constant *C) override;
283
unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284
285
explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286
const TargetLibraryInfo *LibInfo)
287
: FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288
Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289
Context = &FuncInfo.Fn->getContext();
290
}
291
292
bool fastSelectInstruction(const Instruction *I) override;
293
294
#include "AArch64GenFastISel.inc"
295
};
296
297
} // end anonymous namespace
298
299
/// Check if the sign-/zero-extend will be a noop.
300
static bool isIntExtFree(const Instruction *I) {
301
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302
"Unexpected integer extend instruction.");
303
assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304
"Unexpected value type.");
305
bool IsZExt = isa<ZExtInst>(I);
306
307
if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308
if (LI->hasOneUse())
309
return true;
310
311
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313
return true;
314
315
return false;
316
}
317
318
/// Determine the implicit scale factor that is applied by a memory
319
/// operation for a given value type.
320
static unsigned getImplicitScaleFactor(MVT VT) {
321
switch (VT.SimpleTy) {
322
default:
323
return 0; // invalid
324
case MVT::i1: // fall-through
325
case MVT::i8:
326
return 1;
327
case MVT::i16:
328
return 2;
329
case MVT::i32: // fall-through
330
case MVT::f32:
331
return 4;
332
case MVT::i64: // fall-through
333
case MVT::f64:
334
return 8;
335
}
336
}
337
338
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339
if (CC == CallingConv::GHC)
340
return CC_AArch64_GHC;
341
if (CC == CallingConv::CFGuard_Check)
342
return CC_AArch64_Win64_CFGuard_Check;
343
if (Subtarget->isTargetDarwin())
344
return CC_AArch64_DarwinPCS;
345
if (Subtarget->isTargetWindows())
346
return CC_AArch64_Win64PCS;
347
return CC_AArch64_AAPCS;
348
}
349
350
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351
assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352
"Alloca should always return a pointer.");
353
354
// Don't handle dynamic allocas.
355
if (!FuncInfo.StaticAllocaMap.count(AI))
356
return 0;
357
358
DenseMap<const AllocaInst *, int>::iterator SI =
359
FuncInfo.StaticAllocaMap.find(AI);
360
361
if (SI != FuncInfo.StaticAllocaMap.end()) {
362
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364
ResultReg)
365
.addFrameIndex(SI->second)
366
.addImm(0)
367
.addImm(0);
368
return ResultReg;
369
}
370
371
return 0;
372
}
373
374
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375
if (VT > MVT::i64)
376
return 0;
377
378
if (!CI->isZero())
379
return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380
381
// Create a copy from the zero register to materialize a "0" value.
382
const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383
: &AArch64::GPR32RegClass;
384
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385
Register ResultReg = createResultReg(RC);
386
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387
ResultReg).addReg(ZeroReg, getKillRegState(true));
388
return ResultReg;
389
}
390
391
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392
// Positive zero (+0.0) has to be materialized with a fmov from the zero
393
// register, because the immediate version of fmov cannot encode zero.
394
if (CFP->isNullValue())
395
return fastMaterializeFloatZero(CFP);
396
397
if (VT != MVT::f32 && VT != MVT::f64)
398
return 0;
399
400
const APFloat Val = CFP->getValueAPF();
401
bool Is64Bit = (VT == MVT::f64);
402
// This checks to see if we can use FMOV instructions to materialize
403
// a constant, otherwise we have to materialize via the constant pool.
404
int Imm =
405
Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406
if (Imm != -1) {
407
unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408
return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409
}
410
411
// For the large code model materialize the FP constant in code.
412
if (TM.getCodeModel() == CodeModel::Large) {
413
unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414
const TargetRegisterClass *RC = Is64Bit ?
415
&AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416
417
Register TmpReg = createResultReg(RC);
418
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
.addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
420
421
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423
TII.get(TargetOpcode::COPY), ResultReg)
424
.addReg(TmpReg, getKillRegState(true));
425
426
return ResultReg;
427
}
428
429
// Materialize via constant pool. MachineConstantPool wants an explicit
430
// alignment.
431
Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432
433
unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
437
438
unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441
.addReg(ADRPReg)
442
.addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
443
return ResultReg;
444
}
445
446
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447
// We can't handle thread-local variables quickly yet.
448
if (GV->isThreadLocal())
449
return 0;
450
451
// MachO still uses GOT for large code-model accesses, but ELF requires
452
// movz/movk sequences, which FastISel doesn't handle yet.
453
if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454
return 0;
455
456
unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457
458
EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459
if (!DestEVT.isSimple())
460
return 0;
461
462
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463
unsigned ResultReg;
464
465
if (OpFlags & AArch64II::MO_GOT) {
466
// ADRP + LDRX
467
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468
ADRPReg)
469
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470
471
unsigned LdrOpc;
472
if (Subtarget->isTargetILP32()) {
473
ResultReg = createResultReg(&AArch64::GPR32RegClass);
474
LdrOpc = AArch64::LDRWui;
475
} else {
476
ResultReg = createResultReg(&AArch64::GPR64RegClass);
477
LdrOpc = AArch64::LDRXui;
478
}
479
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480
ResultReg)
481
.addReg(ADRPReg)
482
.addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
483
AArch64II::MO_NC | OpFlags);
484
if (!Subtarget->isTargetILP32())
485
return ResultReg;
486
487
// LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488
// so we must extend the result on ILP32.
489
Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491
TII.get(TargetOpcode::SUBREG_TO_REG))
492
.addDef(Result64)
493
.addImm(0)
494
.addReg(ResultReg, RegState::Kill)
495
.addImm(AArch64::sub_32);
496
return Result64;
497
} else {
498
// ADRP + ADDX
499
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500
ADRPReg)
501
.addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502
503
if (OpFlags & AArch64II::MO_TAGGED) {
504
// MO_TAGGED on the page indicates a tagged address. Set the tag now.
505
// We do so by creating a MOVK that sets bits 48-63 of the register to
506
// (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507
// the small code model so we can assume a binary size of <= 4GB, which
508
// makes the untagged PC relative offset positive. The binary must also be
509
// loaded into address range [0, 2^48). Both of these properties need to
510
// be ensured at runtime when using tagged addresses.
511
//
512
// TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513
// also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514
// are not exactly 1:1 with FastISel so we cannot easily abstract this
515
// out. At some point, it would be nice to find a way to not have this
516
// duplciate code.
517
unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519
DstReg)
520
.addReg(ADRPReg)
521
.addGlobalAddress(GV, /*Offset=*/0x100000000,
522
AArch64II::MO_PREL | AArch64II::MO_G3)
523
.addImm(48);
524
ADRPReg = DstReg;
525
}
526
527
ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529
ResultReg)
530
.addReg(ADRPReg)
531
.addGlobalAddress(GV, 0,
532
AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
533
.addImm(0);
534
}
535
return ResultReg;
536
}
537
538
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539
EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540
541
// Only handle simple types.
542
if (!CEVT.isSimple())
543
return 0;
544
MVT VT = CEVT.getSimpleVT();
545
// arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546
// 'null' pointers need to have a somewhat special treatment.
547
if (isa<ConstantPointerNull>(C)) {
548
assert(VT == MVT::i64 && "Expected 64-bit pointers");
549
return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550
}
551
552
if (const auto *CI = dyn_cast<ConstantInt>(C))
553
return materializeInt(CI, VT);
554
else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555
return materializeFP(CFP, VT);
556
else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557
return materializeGV(GV);
558
559
return 0;
560
}
561
562
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563
assert(CFP->isNullValue() &&
564
"Floating-point constant is not a positive zero.");
565
MVT VT;
566
if (!isTypeLegal(CFP->getType(), VT))
567
return 0;
568
569
if (VT != MVT::f32 && VT != MVT::f64)
570
return 0;
571
572
bool Is64Bit = (VT == MVT::f64);
573
unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574
unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575
return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576
}
577
578
/// Check if the multiply is by a power-of-2 constant.
579
static bool isMulPowOf2(const Value *I) {
580
if (const auto *MI = dyn_cast<MulOperator>(I)) {
581
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582
if (C->getValue().isPowerOf2())
583
return true;
584
if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585
if (C->getValue().isPowerOf2())
586
return true;
587
}
588
return false;
589
}
590
591
// Computes the address to get to an object.
592
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593
{
594
const User *U = nullptr;
595
unsigned Opcode = Instruction::UserOp1;
596
if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597
// Don't walk into other basic blocks unless the object is an alloca from
598
// another block, otherwise it may not have a virtual register assigned.
599
if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600
FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
601
Opcode = I->getOpcode();
602
U = I;
603
}
604
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605
Opcode = C->getOpcode();
606
U = C;
607
}
608
609
if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610
if (Ty->getAddressSpace() > 255)
611
// Fast instruction selection doesn't support the special
612
// address spaces.
613
return false;
614
615
switch (Opcode) {
616
default:
617
break;
618
case Instruction::BitCast:
619
// Look through bitcasts.
620
return computeAddress(U->getOperand(0), Addr, Ty);
621
622
case Instruction::IntToPtr:
623
// Look past no-op inttoptrs.
624
if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625
TLI.getPointerTy(DL))
626
return computeAddress(U->getOperand(0), Addr, Ty);
627
break;
628
629
case Instruction::PtrToInt:
630
// Look past no-op ptrtoints.
631
if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632
return computeAddress(U->getOperand(0), Addr, Ty);
633
break;
634
635
case Instruction::GetElementPtr: {
636
Address SavedAddr = Addr;
637
uint64_t TmpOffset = Addr.getOffset();
638
639
// Iterate through the GEP folding the constants into offsets where
640
// we can.
641
for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642
GTI != E; ++GTI) {
643
const Value *Op = GTI.getOperand();
644
if (StructType *STy = GTI.getStructTypeOrNull()) {
645
const StructLayout *SL = DL.getStructLayout(STy);
646
unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647
TmpOffset += SL->getElementOffset(Idx);
648
} else {
649
uint64_t S = GTI.getSequentialElementStride(DL);
650
while (true) {
651
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652
// Constant-offset addressing.
653
TmpOffset += CI->getSExtValue() * S;
654
break;
655
}
656
if (canFoldAddIntoGEP(U, Op)) {
657
// A compatible add with a constant operand. Fold the constant.
658
ConstantInt *CI =
659
cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660
TmpOffset += CI->getSExtValue() * S;
661
// Iterate on the other operand.
662
Op = cast<AddOperator>(Op)->getOperand(0);
663
continue;
664
}
665
// Unsupported
666
goto unsupported_gep;
667
}
668
}
669
}
670
671
// Try to grab the base operand now.
672
Addr.setOffset(TmpOffset);
673
if (computeAddress(U->getOperand(0), Addr, Ty))
674
return true;
675
676
// We failed, restore everything and try the other options.
677
Addr = SavedAddr;
678
679
unsupported_gep:
680
break;
681
}
682
case Instruction::Alloca: {
683
const AllocaInst *AI = cast<AllocaInst>(Obj);
684
DenseMap<const AllocaInst *, int>::iterator SI =
685
FuncInfo.StaticAllocaMap.find(AI);
686
if (SI != FuncInfo.StaticAllocaMap.end()) {
687
Addr.setKind(Address::FrameIndexBase);
688
Addr.setFI(SI->second);
689
return true;
690
}
691
break;
692
}
693
case Instruction::Add: {
694
// Adds of constants are common and easy enough.
695
const Value *LHS = U->getOperand(0);
696
const Value *RHS = U->getOperand(1);
697
698
if (isa<ConstantInt>(LHS))
699
std::swap(LHS, RHS);
700
701
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702
Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703
return computeAddress(LHS, Addr, Ty);
704
}
705
706
Address Backup = Addr;
707
if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708
return true;
709
Addr = Backup;
710
711
break;
712
}
713
case Instruction::Sub: {
714
// Subs of constants are common and easy enough.
715
const Value *LHS = U->getOperand(0);
716
const Value *RHS = U->getOperand(1);
717
718
if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719
Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720
return computeAddress(LHS, Addr, Ty);
721
}
722
break;
723
}
724
case Instruction::Shl: {
725
if (Addr.getOffsetReg())
726
break;
727
728
const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729
if (!CI)
730
break;
731
732
unsigned Val = CI->getZExtValue();
733
if (Val < 1 || Val > 3)
734
break;
735
736
uint64_t NumBytes = 0;
737
if (Ty && Ty->isSized()) {
738
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739
NumBytes = NumBits / 8;
740
if (!isPowerOf2_64(NumBits))
741
NumBytes = 0;
742
}
743
744
if (NumBytes != (1ULL << Val))
745
break;
746
747
Addr.setShift(Val);
748
Addr.setExtendType(AArch64_AM::LSL);
749
750
const Value *Src = U->getOperand(0);
751
if (const auto *I = dyn_cast<Instruction>(Src)) {
752
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
753
// Fold the zext or sext when it won't become a noop.
754
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755
if (!isIntExtFree(ZE) &&
756
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757
Addr.setExtendType(AArch64_AM::UXTW);
758
Src = ZE->getOperand(0);
759
}
760
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761
if (!isIntExtFree(SE) &&
762
SE->getOperand(0)->getType()->isIntegerTy(32)) {
763
Addr.setExtendType(AArch64_AM::SXTW);
764
Src = SE->getOperand(0);
765
}
766
}
767
}
768
}
769
770
if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771
if (AI->getOpcode() == Instruction::And) {
772
const Value *LHS = AI->getOperand(0);
773
const Value *RHS = AI->getOperand(1);
774
775
if (const auto *C = dyn_cast<ConstantInt>(LHS))
776
if (C->getValue() == 0xffffffff)
777
std::swap(LHS, RHS);
778
779
if (const auto *C = dyn_cast<ConstantInt>(RHS))
780
if (C->getValue() == 0xffffffff) {
781
Addr.setExtendType(AArch64_AM::UXTW);
782
Register Reg = getRegForValue(LHS);
783
if (!Reg)
784
return false;
785
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786
Addr.setOffsetReg(Reg);
787
return true;
788
}
789
}
790
791
Register Reg = getRegForValue(Src);
792
if (!Reg)
793
return false;
794
Addr.setOffsetReg(Reg);
795
return true;
796
}
797
case Instruction::Mul: {
798
if (Addr.getOffsetReg())
799
break;
800
801
if (!isMulPowOf2(U))
802
break;
803
804
const Value *LHS = U->getOperand(0);
805
const Value *RHS = U->getOperand(1);
806
807
// Canonicalize power-of-2 value to the RHS.
808
if (const auto *C = dyn_cast<ConstantInt>(LHS))
809
if (C->getValue().isPowerOf2())
810
std::swap(LHS, RHS);
811
812
assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813
const auto *C = cast<ConstantInt>(RHS);
814
unsigned Val = C->getValue().logBase2();
815
if (Val < 1 || Val > 3)
816
break;
817
818
uint64_t NumBytes = 0;
819
if (Ty && Ty->isSized()) {
820
uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821
NumBytes = NumBits / 8;
822
if (!isPowerOf2_64(NumBits))
823
NumBytes = 0;
824
}
825
826
if (NumBytes != (1ULL << Val))
827
break;
828
829
Addr.setShift(Val);
830
Addr.setExtendType(AArch64_AM::LSL);
831
832
const Value *Src = LHS;
833
if (const auto *I = dyn_cast<Instruction>(Src)) {
834
if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835
// Fold the zext or sext when it won't become a noop.
836
if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837
if (!isIntExtFree(ZE) &&
838
ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839
Addr.setExtendType(AArch64_AM::UXTW);
840
Src = ZE->getOperand(0);
841
}
842
} else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843
if (!isIntExtFree(SE) &&
844
SE->getOperand(0)->getType()->isIntegerTy(32)) {
845
Addr.setExtendType(AArch64_AM::SXTW);
846
Src = SE->getOperand(0);
847
}
848
}
849
}
850
}
851
852
Register Reg = getRegForValue(Src);
853
if (!Reg)
854
return false;
855
Addr.setOffsetReg(Reg);
856
return true;
857
}
858
case Instruction::And: {
859
if (Addr.getOffsetReg())
860
break;
861
862
if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863
break;
864
865
const Value *LHS = U->getOperand(0);
866
const Value *RHS = U->getOperand(1);
867
868
if (const auto *C = dyn_cast<ConstantInt>(LHS))
869
if (C->getValue() == 0xffffffff)
870
std::swap(LHS, RHS);
871
872
if (const auto *C = dyn_cast<ConstantInt>(RHS))
873
if (C->getValue() == 0xffffffff) {
874
Addr.setShift(0);
875
Addr.setExtendType(AArch64_AM::LSL);
876
Addr.setExtendType(AArch64_AM::UXTW);
877
878
Register Reg = getRegForValue(LHS);
879
if (!Reg)
880
return false;
881
Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882
Addr.setOffsetReg(Reg);
883
return true;
884
}
885
break;
886
}
887
case Instruction::SExt:
888
case Instruction::ZExt: {
889
if (!Addr.getReg() || Addr.getOffsetReg())
890
break;
891
892
const Value *Src = nullptr;
893
// Fold the zext or sext when it won't become a noop.
894
if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895
if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896
Addr.setExtendType(AArch64_AM::UXTW);
897
Src = ZE->getOperand(0);
898
}
899
} else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900
if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901
Addr.setExtendType(AArch64_AM::SXTW);
902
Src = SE->getOperand(0);
903
}
904
}
905
906
if (!Src)
907
break;
908
909
Addr.setShift(0);
910
Register Reg = getRegForValue(Src);
911
if (!Reg)
912
return false;
913
Addr.setOffsetReg(Reg);
914
return true;
915
}
916
} // end switch
917
918
if (Addr.isRegBase() && !Addr.getReg()) {
919
Register Reg = getRegForValue(Obj);
920
if (!Reg)
921
return false;
922
Addr.setReg(Reg);
923
return true;
924
}
925
926
if (!Addr.getOffsetReg()) {
927
Register Reg = getRegForValue(Obj);
928
if (!Reg)
929
return false;
930
Addr.setOffsetReg(Reg);
931
return true;
932
}
933
934
return false;
935
}
936
937
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938
const User *U = nullptr;
939
unsigned Opcode = Instruction::UserOp1;
940
bool InMBB = true;
941
942
if (const auto *I = dyn_cast<Instruction>(V)) {
943
Opcode = I->getOpcode();
944
U = I;
945
InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946
} else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947
Opcode = C->getOpcode();
948
U = C;
949
}
950
951
switch (Opcode) {
952
default: break;
953
case Instruction::BitCast:
954
// Look past bitcasts if its operand is in the same BB.
955
if (InMBB)
956
return computeCallAddress(U->getOperand(0), Addr);
957
break;
958
case Instruction::IntToPtr:
959
// Look past no-op inttoptrs if its operand is in the same BB.
960
if (InMBB &&
961
TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962
TLI.getPointerTy(DL))
963
return computeCallAddress(U->getOperand(0), Addr);
964
break;
965
case Instruction::PtrToInt:
966
// Look past no-op ptrtoints if its operand is in the same BB.
967
if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968
return computeCallAddress(U->getOperand(0), Addr);
969
break;
970
}
971
972
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973
Addr.setGlobalValue(GV);
974
return true;
975
}
976
977
// If all else fails, try to materialize the value in a register.
978
if (!Addr.getGlobalValue()) {
979
Addr.setReg(getRegForValue(V));
980
return Addr.getReg() != 0;
981
}
982
983
return false;
984
}
985
986
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987
EVT evt = TLI.getValueType(DL, Ty, true);
988
989
if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990
return false;
991
992
// Only handle simple types.
993
if (evt == MVT::Other || !evt.isSimple())
994
return false;
995
VT = evt.getSimpleVT();
996
997
// This is a legal type, but it's not something we handle in fast-isel.
998
if (VT == MVT::f128)
999
return false;
1000
1001
// Handle all other legal types, i.e. a register that will directly hold this
1002
// value.
1003
return TLI.isTypeLegal(VT);
1004
}
1005
1006
/// Determine if the value type is supported by FastISel.
1007
///
1008
/// FastISel for AArch64 can handle more value types than are legal. This adds
1009
/// simple value type such as i1, i8, and i16.
1010
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011
if (Ty->isVectorTy() && !IsVectorAllowed)
1012
return false;
1013
1014
if (isTypeLegal(Ty, VT))
1015
return true;
1016
1017
// If this is a type than can be sign or zero-extended to a basic operation
1018
// go ahead and accept it now.
1019
if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020
return true;
1021
1022
return false;
1023
}
1024
1025
bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026
if (!isa<Instruction>(V))
1027
return true;
1028
1029
const auto *I = cast<Instruction>(V);
1030
return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1031
}
1032
1033
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034
if (Subtarget->isTargetILP32())
1035
return false;
1036
1037
unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038
if (!ScaleFactor)
1039
return false;
1040
1041
bool ImmediateOffsetNeedsLowering = false;
1042
bool RegisterOffsetNeedsLowering = false;
1043
int64_t Offset = Addr.getOffset();
1044
if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045
ImmediateOffsetNeedsLowering = true;
1046
else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047
!isUInt<12>(Offset / ScaleFactor))
1048
ImmediateOffsetNeedsLowering = true;
1049
1050
// Cannot encode an offset register and an immediate offset in the same
1051
// instruction. Fold the immediate offset into the load/store instruction and
1052
// emit an additional add to take care of the offset register.
1053
if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054
RegisterOffsetNeedsLowering = true;
1055
1056
// Cannot encode zero register as base.
1057
if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058
RegisterOffsetNeedsLowering = true;
1059
1060
// If this is a stack pointer and the offset needs to be simplified then put
1061
// the alloca address into a register, set the base type back to register and
1062
// continue. This should almost never happen.
1063
if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064
{
1065
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067
ResultReg)
1068
.addFrameIndex(Addr.getFI())
1069
.addImm(0)
1070
.addImm(0);
1071
Addr.setKind(Address::RegBase);
1072
Addr.setReg(ResultReg);
1073
}
1074
1075
if (RegisterOffsetNeedsLowering) {
1076
unsigned ResultReg = 0;
1077
if (Addr.getReg()) {
1078
if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079
Addr.getExtendType() == AArch64_AM::UXTW )
1080
ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081
Addr.getOffsetReg(), Addr.getExtendType(),
1082
Addr.getShift());
1083
else
1084
ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085
Addr.getOffsetReg(), AArch64_AM::LSL,
1086
Addr.getShift());
1087
} else {
1088
if (Addr.getExtendType() == AArch64_AM::UXTW)
1089
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090
Addr.getShift(), /*IsZExt=*/true);
1091
else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092
ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093
Addr.getShift(), /*IsZExt=*/false);
1094
else
1095
ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096
Addr.getShift());
1097
}
1098
if (!ResultReg)
1099
return false;
1100
1101
Addr.setReg(ResultReg);
1102
Addr.setOffsetReg(0);
1103
Addr.setShift(0);
1104
Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105
}
1106
1107
// Since the offset is too large for the load/store instruction get the
1108
// reg+offset into a register.
1109
if (ImmediateOffsetNeedsLowering) {
1110
unsigned ResultReg;
1111
if (Addr.getReg())
1112
// Try to fold the immediate into the add instruction.
1113
ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114
else
1115
ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116
1117
if (!ResultReg)
1118
return false;
1119
Addr.setReg(ResultReg);
1120
Addr.setOffset(0);
1121
}
1122
return true;
1123
}
1124
1125
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126
const MachineInstrBuilder &MIB,
1127
MachineMemOperand::Flags Flags,
1128
unsigned ScaleFactor,
1129
MachineMemOperand *MMO) {
1130
int64_t Offset = Addr.getOffset() / ScaleFactor;
1131
// Frame base works a bit differently. Handle it separately.
1132
if (Addr.isFIBase()) {
1133
int FI = Addr.getFI();
1134
// FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1135
// and alignment should be based on the VT.
1136
MMO = FuncInfo.MF->getMachineMemOperand(
1137
MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138
MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139
// Now add the rest of the operands.
1140
MIB.addFrameIndex(FI).addImm(Offset);
1141
} else {
1142
assert(Addr.isRegBase() && "Unexpected address kind.");
1143
const MCInstrDesc &II = MIB->getDesc();
1144
unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145
Addr.setReg(
1146
constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147
Addr.setOffsetReg(
1148
constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149
if (Addr.getOffsetReg()) {
1150
assert(Addr.getOffset() == 0 && "Unexpected offset");
1151
bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152
Addr.getExtendType() == AArch64_AM::SXTX;
1153
MIB.addReg(Addr.getReg());
1154
MIB.addReg(Addr.getOffsetReg());
1155
MIB.addImm(IsSigned);
1156
MIB.addImm(Addr.getShift() != 0);
1157
} else
1158
MIB.addReg(Addr.getReg()).addImm(Offset);
1159
}
1160
1161
if (MMO)
1162
MIB.addMemOperand(MMO);
1163
}
1164
1165
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166
const Value *RHS, bool SetFlags,
1167
bool WantResult, bool IsZExt) {
1168
AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1169
bool NeedExtend = false;
1170
switch (RetVT.SimpleTy) {
1171
default:
1172
return 0;
1173
case MVT::i1:
1174
NeedExtend = true;
1175
break;
1176
case MVT::i8:
1177
NeedExtend = true;
1178
ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179
break;
1180
case MVT::i16:
1181
NeedExtend = true;
1182
ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183
break;
1184
case MVT::i32: // fall-through
1185
case MVT::i64:
1186
break;
1187
}
1188
MVT SrcVT = RetVT;
1189
RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190
1191
// Canonicalize immediates to the RHS first.
1192
if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193
std::swap(LHS, RHS);
1194
1195
// Canonicalize mul by power of 2 to the RHS.
1196
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197
if (isMulPowOf2(LHS))
1198
std::swap(LHS, RHS);
1199
1200
// Canonicalize shift immediate to the RHS.
1201
if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202
if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203
if (isa<ConstantInt>(SI->getOperand(1)))
1204
if (SI->getOpcode() == Instruction::Shl ||
1205
SI->getOpcode() == Instruction::LShr ||
1206
SI->getOpcode() == Instruction::AShr )
1207
std::swap(LHS, RHS);
1208
1209
Register LHSReg = getRegForValue(LHS);
1210
if (!LHSReg)
1211
return 0;
1212
1213
if (NeedExtend)
1214
LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215
1216
unsigned ResultReg = 0;
1217
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218
uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219
if (C->isNegative())
1220
ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221
WantResult);
1222
else
1223
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224
WantResult);
1225
} else if (const auto *C = dyn_cast<Constant>(RHS))
1226
if (C->isNullValue())
1227
ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228
1229
if (ResultReg)
1230
return ResultReg;
1231
1232
// Only extend the RHS within the instruction if there is a valid extend type.
1233
if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234
isValueAvailable(RHS)) {
1235
Register RHSReg = getRegForValue(RHS);
1236
if (!RHSReg)
1237
return 0;
1238
return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239
SetFlags, WantResult);
1240
}
1241
1242
// Check if the mul can be folded into the instruction.
1243
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244
if (isMulPowOf2(RHS)) {
1245
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247
1248
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249
if (C->getValue().isPowerOf2())
1250
std::swap(MulLHS, MulRHS);
1251
1252
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254
Register RHSReg = getRegForValue(MulLHS);
1255
if (!RHSReg)
1256
return 0;
1257
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258
ShiftVal, SetFlags, WantResult);
1259
if (ResultReg)
1260
return ResultReg;
1261
}
1262
}
1263
1264
// Check if the shift can be folded into the instruction.
1265
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266
if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268
AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1269
switch (SI->getOpcode()) {
1270
default: break;
1271
case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1272
case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273
case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274
}
1275
uint64_t ShiftVal = C->getZExtValue();
1276
if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277
Register RHSReg = getRegForValue(SI->getOperand(0));
1278
if (!RHSReg)
1279
return 0;
1280
ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281
ShiftVal, SetFlags, WantResult);
1282
if (ResultReg)
1283
return ResultReg;
1284
}
1285
}
1286
}
1287
}
1288
1289
Register RHSReg = getRegForValue(RHS);
1290
if (!RHSReg)
1291
return 0;
1292
1293
if (NeedExtend)
1294
RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295
1296
return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297
}
1298
1299
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300
unsigned RHSReg, bool SetFlags,
1301
bool WantResult) {
1302
assert(LHSReg && RHSReg && "Invalid register number.");
1303
1304
if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305
RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306
return 0;
1307
1308
if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309
return 0;
1310
1311
static const unsigned OpcTable[2][2][2] = {
1312
{ { AArch64::SUBWrr, AArch64::SUBXrr },
1313
{ AArch64::ADDWrr, AArch64::ADDXrr } },
1314
{ { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315
{ AArch64::ADDSWrr, AArch64::ADDSXrr } }
1316
};
1317
bool Is64Bit = RetVT == MVT::i64;
1318
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319
const TargetRegisterClass *RC =
1320
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321
unsigned ResultReg;
1322
if (WantResult)
1323
ResultReg = createResultReg(RC);
1324
else
1325
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326
1327
const MCInstrDesc &II = TII.get(Opc);
1328
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331
.addReg(LHSReg)
1332
.addReg(RHSReg);
1333
return ResultReg;
1334
}
1335
1336
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337
uint64_t Imm, bool SetFlags,
1338
bool WantResult) {
1339
assert(LHSReg && "Invalid register number.");
1340
1341
if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342
return 0;
1343
1344
unsigned ShiftImm;
1345
if (isUInt<12>(Imm))
1346
ShiftImm = 0;
1347
else if ((Imm & 0xfff000) == Imm) {
1348
ShiftImm = 12;
1349
Imm >>= 12;
1350
} else
1351
return 0;
1352
1353
static const unsigned OpcTable[2][2][2] = {
1354
{ { AArch64::SUBWri, AArch64::SUBXri },
1355
{ AArch64::ADDWri, AArch64::ADDXri } },
1356
{ { AArch64::SUBSWri, AArch64::SUBSXri },
1357
{ AArch64::ADDSWri, AArch64::ADDSXri } }
1358
};
1359
bool Is64Bit = RetVT == MVT::i64;
1360
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361
const TargetRegisterClass *RC;
1362
if (SetFlags)
1363
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364
else
1365
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366
unsigned ResultReg;
1367
if (WantResult)
1368
ResultReg = createResultReg(RC);
1369
else
1370
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371
1372
const MCInstrDesc &II = TII.get(Opc);
1373
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375
.addReg(LHSReg)
1376
.addImm(Imm)
1377
.addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378
return ResultReg;
1379
}
1380
1381
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382
unsigned RHSReg,
1383
AArch64_AM::ShiftExtendType ShiftType,
1384
uint64_t ShiftImm, bool SetFlags,
1385
bool WantResult) {
1386
assert(LHSReg && RHSReg && "Invalid register number.");
1387
assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388
RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389
1390
if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391
return 0;
1392
1393
// Don't deal with undefined shifts.
1394
if (ShiftImm >= RetVT.getSizeInBits())
1395
return 0;
1396
1397
static const unsigned OpcTable[2][2][2] = {
1398
{ { AArch64::SUBWrs, AArch64::SUBXrs },
1399
{ AArch64::ADDWrs, AArch64::ADDXrs } },
1400
{ { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401
{ AArch64::ADDSWrs, AArch64::ADDSXrs } }
1402
};
1403
bool Is64Bit = RetVT == MVT::i64;
1404
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405
const TargetRegisterClass *RC =
1406
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407
unsigned ResultReg;
1408
if (WantResult)
1409
ResultReg = createResultReg(RC);
1410
else
1411
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412
1413
const MCInstrDesc &II = TII.get(Opc);
1414
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417
.addReg(LHSReg)
1418
.addReg(RHSReg)
1419
.addImm(getShifterImm(ShiftType, ShiftImm));
1420
return ResultReg;
1421
}
1422
1423
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424
unsigned RHSReg,
1425
AArch64_AM::ShiftExtendType ExtType,
1426
uint64_t ShiftImm, bool SetFlags,
1427
bool WantResult) {
1428
assert(LHSReg && RHSReg && "Invalid register number.");
1429
assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430
RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431
1432
if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433
return 0;
1434
1435
if (ShiftImm >= 4)
1436
return 0;
1437
1438
static const unsigned OpcTable[2][2][2] = {
1439
{ { AArch64::SUBWrx, AArch64::SUBXrx },
1440
{ AArch64::ADDWrx, AArch64::ADDXrx } },
1441
{ { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442
{ AArch64::ADDSWrx, AArch64::ADDSXrx } }
1443
};
1444
bool Is64Bit = RetVT == MVT::i64;
1445
unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446
const TargetRegisterClass *RC = nullptr;
1447
if (SetFlags)
1448
RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449
else
1450
RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451
unsigned ResultReg;
1452
if (WantResult)
1453
ResultReg = createResultReg(RC);
1454
else
1455
ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456
1457
const MCInstrDesc &II = TII.get(Opc);
1458
LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459
RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461
.addReg(LHSReg)
1462
.addReg(RHSReg)
1463
.addImm(getArithExtendImm(ExtType, ShiftImm));
1464
return ResultReg;
1465
}
1466
1467
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468
Type *Ty = LHS->getType();
1469
EVT EVT = TLI.getValueType(DL, Ty, true);
1470
if (!EVT.isSimple())
1471
return false;
1472
MVT VT = EVT.getSimpleVT();
1473
1474
switch (VT.SimpleTy) {
1475
default:
1476
return false;
1477
case MVT::i1:
1478
case MVT::i8:
1479
case MVT::i16:
1480
case MVT::i32:
1481
case MVT::i64:
1482
return emitICmp(VT, LHS, RHS, IsZExt);
1483
case MVT::f32:
1484
case MVT::f64:
1485
return emitFCmp(VT, LHS, RHS);
1486
}
1487
}
1488
1489
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490
bool IsZExt) {
1491
return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492
IsZExt) != 0;
1493
}
1494
1495
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496
return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497
/*SetFlags=*/true, /*WantResult=*/false) != 0;
1498
}
1499
1500
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501
if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502
return false;
1503
1504
// Check to see if the 2nd operand is a constant that we can encode directly
1505
// in the compare.
1506
bool UseImm = false;
1507
if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508
if (CFP->isZero() && !CFP->isNegative())
1509
UseImm = true;
1510
1511
Register LHSReg = getRegForValue(LHS);
1512
if (!LHSReg)
1513
return false;
1514
1515
if (UseImm) {
1516
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518
.addReg(LHSReg);
1519
return true;
1520
}
1521
1522
Register RHSReg = getRegForValue(RHS);
1523
if (!RHSReg)
1524
return false;
1525
1526
unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528
.addReg(LHSReg)
1529
.addReg(RHSReg);
1530
return true;
1531
}
1532
1533
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534
bool SetFlags, bool WantResult, bool IsZExt) {
1535
return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536
IsZExt);
1537
}
1538
1539
/// This method is a wrapper to simplify add emission.
1540
///
1541
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542
/// that fails, then try to materialize the immediate into a register and use
1543
/// emitAddSub_rr instead.
1544
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545
unsigned ResultReg;
1546
if (Imm < 0)
1547
ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548
else
1549
ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550
1551
if (ResultReg)
1552
return ResultReg;
1553
1554
unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555
if (!CReg)
1556
return 0;
1557
1558
ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559
return ResultReg;
1560
}
1561
1562
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563
bool SetFlags, bool WantResult, bool IsZExt) {
1564
return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565
IsZExt);
1566
}
1567
1568
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569
unsigned RHSReg, bool WantResult) {
1570
return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571
/*SetFlags=*/true, WantResult);
1572
}
1573
1574
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575
unsigned RHSReg,
1576
AArch64_AM::ShiftExtendType ShiftType,
1577
uint64_t ShiftImm, bool WantResult) {
1578
return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579
ShiftImm, /*SetFlags=*/true, WantResult);
1580
}
1581
1582
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583
const Value *LHS, const Value *RHS) {
1584
// Canonicalize immediates to the RHS first.
1585
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586
std::swap(LHS, RHS);
1587
1588
// Canonicalize mul by power-of-2 to the RHS.
1589
if (LHS->hasOneUse() && isValueAvailable(LHS))
1590
if (isMulPowOf2(LHS))
1591
std::swap(LHS, RHS);
1592
1593
// Canonicalize shift immediate to the RHS.
1594
if (LHS->hasOneUse() && isValueAvailable(LHS))
1595
if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596
if (isa<ConstantInt>(SI->getOperand(1)))
1597
std::swap(LHS, RHS);
1598
1599
Register LHSReg = getRegForValue(LHS);
1600
if (!LHSReg)
1601
return 0;
1602
1603
unsigned ResultReg = 0;
1604
if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605
uint64_t Imm = C->getZExtValue();
1606
ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607
}
1608
if (ResultReg)
1609
return ResultReg;
1610
1611
// Check if the mul can be folded into the instruction.
1612
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613
if (isMulPowOf2(RHS)) {
1614
const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615
const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616
1617
if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618
if (C->getValue().isPowerOf2())
1619
std::swap(MulLHS, MulRHS);
1620
1621
assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622
uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623
1624
Register RHSReg = getRegForValue(MulLHS);
1625
if (!RHSReg)
1626
return 0;
1627
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628
if (ResultReg)
1629
return ResultReg;
1630
}
1631
}
1632
1633
// Check if the shift can be folded into the instruction.
1634
if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635
if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636
if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637
uint64_t ShiftVal = C->getZExtValue();
1638
Register RHSReg = getRegForValue(SI->getOperand(0));
1639
if (!RHSReg)
1640
return 0;
1641
ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642
if (ResultReg)
1643
return ResultReg;
1644
}
1645
}
1646
1647
Register RHSReg = getRegForValue(RHS);
1648
if (!RHSReg)
1649
return 0;
1650
1651
MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652
ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656
}
1657
return ResultReg;
1658
}
1659
1660
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661
unsigned LHSReg, uint64_t Imm) {
1662
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663
"ISD nodes are not consecutive!");
1664
static const unsigned OpcTable[3][2] = {
1665
{ AArch64::ANDWri, AArch64::ANDXri },
1666
{ AArch64::ORRWri, AArch64::ORRXri },
1667
{ AArch64::EORWri, AArch64::EORXri }
1668
};
1669
const TargetRegisterClass *RC;
1670
unsigned Opc;
1671
unsigned RegSize;
1672
switch (RetVT.SimpleTy) {
1673
default:
1674
return 0;
1675
case MVT::i1:
1676
case MVT::i8:
1677
case MVT::i16:
1678
case MVT::i32: {
1679
unsigned Idx = ISDOpc - ISD::AND;
1680
Opc = OpcTable[Idx][0];
1681
RC = &AArch64::GPR32spRegClass;
1682
RegSize = 32;
1683
break;
1684
}
1685
case MVT::i64:
1686
Opc = OpcTable[ISDOpc - ISD::AND][1];
1687
RC = &AArch64::GPR64spRegClass;
1688
RegSize = 64;
1689
break;
1690
}
1691
1692
if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1693
return 0;
1694
1695
Register ResultReg =
1696
fastEmitInst_ri(Opc, RC, LHSReg,
1697
AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1698
if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701
}
1702
return ResultReg;
1703
}
1704
1705
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706
unsigned LHSReg, unsigned RHSReg,
1707
uint64_t ShiftImm) {
1708
static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709
"ISD nodes are not consecutive!");
1710
static const unsigned OpcTable[3][2] = {
1711
{ AArch64::ANDWrs, AArch64::ANDXrs },
1712
{ AArch64::ORRWrs, AArch64::ORRXrs },
1713
{ AArch64::EORWrs, AArch64::EORXrs }
1714
};
1715
1716
// Don't deal with undefined shifts.
1717
if (ShiftImm >= RetVT.getSizeInBits())
1718
return 0;
1719
1720
const TargetRegisterClass *RC;
1721
unsigned Opc;
1722
switch (RetVT.SimpleTy) {
1723
default:
1724
return 0;
1725
case MVT::i1:
1726
case MVT::i8:
1727
case MVT::i16:
1728
case MVT::i32:
1729
Opc = OpcTable[ISDOpc - ISD::AND][0];
1730
RC = &AArch64::GPR32RegClass;
1731
break;
1732
case MVT::i64:
1733
Opc = OpcTable[ISDOpc - ISD::AND][1];
1734
RC = &AArch64::GPR64RegClass;
1735
break;
1736
}
1737
Register ResultReg =
1738
fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739
AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740
if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741
uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743
}
1744
return ResultReg;
1745
}
1746
1747
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748
uint64_t Imm) {
1749
return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750
}
1751
1752
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753
bool WantZExt, MachineMemOperand *MMO) {
1754
if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755
return 0;
1756
1757
// Simplify this down to something we can handle.
1758
if (!simplifyAddress(Addr, VT))
1759
return 0;
1760
1761
unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762
if (!ScaleFactor)
1763
llvm_unreachable("Unexpected value type.");
1764
1765
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767
bool UseScaled = true;
1768
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769
UseScaled = false;
1770
ScaleFactor = 1;
1771
}
1772
1773
static const unsigned GPOpcTable[2][8][4] = {
1774
// Sign-extend.
1775
{ { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776
AArch64::LDURXi },
1777
{ AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778
AArch64::LDURXi },
1779
{ AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780
AArch64::LDRXui },
1781
{ AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782
AArch64::LDRXui },
1783
{ AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784
AArch64::LDRXroX },
1785
{ AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786
AArch64::LDRXroX },
1787
{ AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788
AArch64::LDRXroW },
1789
{ AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790
AArch64::LDRXroW }
1791
},
1792
// Zero-extend.
1793
{ { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794
AArch64::LDURXi },
1795
{ AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796
AArch64::LDURXi },
1797
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798
AArch64::LDRXui },
1799
{ AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800
AArch64::LDRXui },
1801
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802
AArch64::LDRXroX },
1803
{ AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804
AArch64::LDRXroX },
1805
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806
AArch64::LDRXroW },
1807
{ AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808
AArch64::LDRXroW }
1809
}
1810
};
1811
1812
static const unsigned FPOpcTable[4][2] = {
1813
{ AArch64::LDURSi, AArch64::LDURDi },
1814
{ AArch64::LDRSui, AArch64::LDRDui },
1815
{ AArch64::LDRSroX, AArch64::LDRDroX },
1816
{ AArch64::LDRSroW, AArch64::LDRDroW }
1817
};
1818
1819
unsigned Opc;
1820
const TargetRegisterClass *RC;
1821
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822
Addr.getOffsetReg();
1823
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824
if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825
Addr.getExtendType() == AArch64_AM::SXTW)
1826
Idx++;
1827
1828
bool IsRet64Bit = RetVT == MVT::i64;
1829
switch (VT.SimpleTy) {
1830
default:
1831
llvm_unreachable("Unexpected value type.");
1832
case MVT::i1: // Intentional fall-through.
1833
case MVT::i8:
1834
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835
RC = (IsRet64Bit && !WantZExt) ?
1836
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837
break;
1838
case MVT::i16:
1839
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840
RC = (IsRet64Bit && !WantZExt) ?
1841
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842
break;
1843
case MVT::i32:
1844
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845
RC = (IsRet64Bit && !WantZExt) ?
1846
&AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847
break;
1848
case MVT::i64:
1849
Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850
RC = &AArch64::GPR64RegClass;
1851
break;
1852
case MVT::f32:
1853
Opc = FPOpcTable[Idx][0];
1854
RC = &AArch64::FPR32RegClass;
1855
break;
1856
case MVT::f64:
1857
Opc = FPOpcTable[Idx][1];
1858
RC = &AArch64::FPR64RegClass;
1859
break;
1860
}
1861
1862
// Create the base instruction, then add the operands.
1863
Register ResultReg = createResultReg(RC);
1864
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865
TII.get(Opc), ResultReg);
1866
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868
// Loading an i1 requires special handling.
1869
if (VT == MVT::i1) {
1870
unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871
assert(ANDReg && "Unexpected AND instruction emission failure.");
1872
ResultReg = ANDReg;
1873
}
1874
1875
// For zero-extending loads to 64bit we emit a 32bit load and then convert
1876
// the 32bit reg to a 64bit reg.
1877
if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878
Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880
TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881
.addImm(0)
1882
.addReg(ResultReg, getKillRegState(true))
1883
.addImm(AArch64::sub_32);
1884
ResultReg = Reg64;
1885
}
1886
return ResultReg;
1887
}
1888
1889
bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890
MVT VT;
1891
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892
return false;
1893
1894
if (VT.isVector())
1895
return selectOperator(I, I->getOpcode());
1896
1897
unsigned ResultReg;
1898
switch (I->getOpcode()) {
1899
default:
1900
llvm_unreachable("Unexpected instruction.");
1901
case Instruction::Add:
1902
ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903
break;
1904
case Instruction::Sub:
1905
ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906
break;
1907
}
1908
if (!ResultReg)
1909
return false;
1910
1911
updateValueMap(I, ResultReg);
1912
return true;
1913
}
1914
1915
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916
MVT VT;
1917
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918
return false;
1919
1920
if (VT.isVector())
1921
return selectOperator(I, I->getOpcode());
1922
1923
unsigned ResultReg;
1924
switch (I->getOpcode()) {
1925
default:
1926
llvm_unreachable("Unexpected instruction.");
1927
case Instruction::And:
1928
ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929
break;
1930
case Instruction::Or:
1931
ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932
break;
1933
case Instruction::Xor:
1934
ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935
break;
1936
}
1937
if (!ResultReg)
1938
return false;
1939
1940
updateValueMap(I, ResultReg);
1941
return true;
1942
}
1943
1944
bool AArch64FastISel::selectLoad(const Instruction *I) {
1945
MVT VT;
1946
// Verify we have a legal type before going any further. Currently, we handle
1947
// simple types that will directly fit in a register (i32/f32/i64/f64) or
1948
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950
cast<LoadInst>(I)->isAtomic())
1951
return false;
1952
1953
const Value *SV = I->getOperand(0);
1954
if (TLI.supportSwiftError()) {
1955
// Swifterror values can come from either a function parameter with
1956
// swifterror attribute or an alloca with swifterror attribute.
1957
if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958
if (Arg->hasSwiftErrorAttr())
1959
return false;
1960
}
1961
1962
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963
if (Alloca->isSwiftError())
1964
return false;
1965
}
1966
}
1967
1968
// See if we can handle this address.
1969
Address Addr;
1970
if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971
return false;
1972
1973
// Fold the following sign-/zero-extend into the load instruction.
1974
bool WantZExt = true;
1975
MVT RetVT = VT;
1976
const Value *IntExtVal = nullptr;
1977
if (I->hasOneUse()) {
1978
if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979
if (isTypeSupported(ZE->getType(), RetVT))
1980
IntExtVal = ZE;
1981
else
1982
RetVT = VT;
1983
} else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984
if (isTypeSupported(SE->getType(), RetVT))
1985
IntExtVal = SE;
1986
else
1987
RetVT = VT;
1988
WantZExt = false;
1989
}
1990
}
1991
1992
unsigned ResultReg =
1993
emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994
if (!ResultReg)
1995
return false;
1996
1997
// There are a few different cases we have to handle, because the load or the
1998
// sign-/zero-extend might not be selected by FastISel if we fall-back to
1999
// SelectionDAG. There is also an ordering issue when both instructions are in
2000
// different basic blocks.
2001
// 1.) The load instruction is selected by FastISel, but the integer extend
2002
// not. This usually happens when the integer extend is in a different
2003
// basic block and SelectionDAG took over for that basic block.
2004
// 2.) The load instruction is selected before the integer extend. This only
2005
// happens when the integer extend is in a different basic block.
2006
// 3.) The load instruction is selected by SelectionDAG and the integer extend
2007
// by FastISel. This happens if there are instructions between the load
2008
// and the integer extend that couldn't be selected by FastISel.
2009
if (IntExtVal) {
2010
// The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011
// could select it. Emit a copy to subreg if necessary. FastISel will remove
2012
// it when it selects the integer extend.
2013
Register Reg = lookUpRegForValue(IntExtVal);
2014
auto *MI = MRI.getUniqueVRegDef(Reg);
2015
if (!MI) {
2016
if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017
if (WantZExt) {
2018
// Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019
MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020
ResultReg = std::prev(I)->getOperand(0).getReg();
2021
removeDeadCode(I, std::next(I));
2022
} else
2023
ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024
AArch64::sub_32);
2025
}
2026
updateValueMap(I, ResultReg);
2027
return true;
2028
}
2029
2030
// The integer extend has already been emitted - delete all the instructions
2031
// that have been emitted by the integer extend lowering code and use the
2032
// result from the load instruction directly.
2033
while (MI) {
2034
Reg = 0;
2035
for (auto &Opnd : MI->uses()) {
2036
if (Opnd.isReg()) {
2037
Reg = Opnd.getReg();
2038
break;
2039
}
2040
}
2041
MachineBasicBlock::iterator I(MI);
2042
removeDeadCode(I, std::next(I));
2043
MI = nullptr;
2044
if (Reg)
2045
MI = MRI.getUniqueVRegDef(Reg);
2046
}
2047
updateValueMap(IntExtVal, ResultReg);
2048
return true;
2049
}
2050
2051
updateValueMap(I, ResultReg);
2052
return true;
2053
}
2054
2055
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056
unsigned AddrReg,
2057
MachineMemOperand *MMO) {
2058
unsigned Opc;
2059
switch (VT.SimpleTy) {
2060
default: return false;
2061
case MVT::i8: Opc = AArch64::STLRB; break;
2062
case MVT::i16: Opc = AArch64::STLRH; break;
2063
case MVT::i32: Opc = AArch64::STLRW; break;
2064
case MVT::i64: Opc = AArch64::STLRX; break;
2065
}
2066
2067
const MCInstrDesc &II = TII.get(Opc);
2068
SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069
AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071
.addReg(SrcReg)
2072
.addReg(AddrReg)
2073
.addMemOperand(MMO);
2074
return true;
2075
}
2076
2077
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078
MachineMemOperand *MMO) {
2079
if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080
return false;
2081
2082
// Simplify this down to something we can handle.
2083
if (!simplifyAddress(Addr, VT))
2084
return false;
2085
2086
unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087
if (!ScaleFactor)
2088
llvm_unreachable("Unexpected value type.");
2089
2090
// Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091
// Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092
bool UseScaled = true;
2093
if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094
UseScaled = false;
2095
ScaleFactor = 1;
2096
}
2097
2098
static const unsigned OpcTable[4][6] = {
2099
{ AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2100
AArch64::STURSi, AArch64::STURDi },
2101
{ AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2102
AArch64::STRSui, AArch64::STRDui },
2103
{ AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104
AArch64::STRSroX, AArch64::STRDroX },
2105
{ AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106
AArch64::STRSroW, AArch64::STRDroW }
2107
};
2108
2109
unsigned Opc;
2110
bool VTIsi1 = false;
2111
bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112
Addr.getOffsetReg();
2113
unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114
if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115
Addr.getExtendType() == AArch64_AM::SXTW)
2116
Idx++;
2117
2118
switch (VT.SimpleTy) {
2119
default: llvm_unreachable("Unexpected value type.");
2120
case MVT::i1: VTIsi1 = true; [[fallthrough]];
2121
case MVT::i8: Opc = OpcTable[Idx][0]; break;
2122
case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123
case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124
case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125
case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126
case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127
}
2128
2129
// Storing an i1 requires special handling.
2130
if (VTIsi1 && SrcReg != AArch64::WZR) {
2131
unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132
assert(ANDReg && "Unexpected AND instruction emission failure.");
2133
SrcReg = ANDReg;
2134
}
2135
// Create the base instruction, then add the operands.
2136
const MCInstrDesc &II = TII.get(Opc);
2137
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138
MachineInstrBuilder MIB =
2139
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140
addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141
2142
return true;
2143
}
2144
2145
bool AArch64FastISel::selectStore(const Instruction *I) {
2146
MVT VT;
2147
const Value *Op0 = I->getOperand(0);
2148
// Verify we have a legal type before going any further. Currently, we handle
2149
// simple types that will directly fit in a register (i32/f32/i64/f64) or
2150
// those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151
if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152
return false;
2153
2154
const Value *PtrV = I->getOperand(1);
2155
if (TLI.supportSwiftError()) {
2156
// Swifterror values can come from either a function parameter with
2157
// swifterror attribute or an alloca with swifterror attribute.
2158
if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159
if (Arg->hasSwiftErrorAttr())
2160
return false;
2161
}
2162
2163
if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164
if (Alloca->isSwiftError())
2165
return false;
2166
}
2167
}
2168
2169
// Get the value to be stored into a register. Use the zero register directly
2170
// when possible to avoid an unnecessary copy and a wasted register.
2171
unsigned SrcReg = 0;
2172
if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173
if (CI->isZero())
2174
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175
} else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176
if (CF->isZero() && !CF->isNegative()) {
2177
VT = MVT::getIntegerVT(VT.getSizeInBits());
2178
SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179
}
2180
}
2181
2182
if (!SrcReg)
2183
SrcReg = getRegForValue(Op0);
2184
2185
if (!SrcReg)
2186
return false;
2187
2188
auto *SI = cast<StoreInst>(I);
2189
2190
// Try to emit a STLR for seq_cst/release.
2191
if (SI->isAtomic()) {
2192
AtomicOrdering Ord = SI->getOrdering();
2193
// The non-atomic instructions are sufficient for relaxed stores.
2194
if (isReleaseOrStronger(Ord)) {
2195
// The STLR addressing mode only supports a base reg; pass that directly.
2196
Register AddrReg = getRegForValue(PtrV);
2197
return emitStoreRelease(VT, SrcReg, AddrReg,
2198
createMachineMemOperandFor(I));
2199
}
2200
}
2201
2202
// See if we can handle this address.
2203
Address Addr;
2204
if (!computeAddress(PtrV, Addr, Op0->getType()))
2205
return false;
2206
2207
if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208
return false;
2209
return true;
2210
}
2211
2212
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2213
switch (Pred) {
2214
case CmpInst::FCMP_ONE:
2215
case CmpInst::FCMP_UEQ:
2216
default:
2217
// AL is our "false" for now. The other two need more compares.
2218
return AArch64CC::AL;
2219
case CmpInst::ICMP_EQ:
2220
case CmpInst::FCMP_OEQ:
2221
return AArch64CC::EQ;
2222
case CmpInst::ICMP_SGT:
2223
case CmpInst::FCMP_OGT:
2224
return AArch64CC::GT;
2225
case CmpInst::ICMP_SGE:
2226
case CmpInst::FCMP_OGE:
2227
return AArch64CC::GE;
2228
case CmpInst::ICMP_UGT:
2229
case CmpInst::FCMP_UGT:
2230
return AArch64CC::HI;
2231
case CmpInst::FCMP_OLT:
2232
return AArch64CC::MI;
2233
case CmpInst::ICMP_ULE:
2234
case CmpInst::FCMP_OLE:
2235
return AArch64CC::LS;
2236
case CmpInst::FCMP_ORD:
2237
return AArch64CC::VC;
2238
case CmpInst::FCMP_UNO:
2239
return AArch64CC::VS;
2240
case CmpInst::FCMP_UGE:
2241
return AArch64CC::PL;
2242
case CmpInst::ICMP_SLT:
2243
case CmpInst::FCMP_ULT:
2244
return AArch64CC::LT;
2245
case CmpInst::ICMP_SLE:
2246
case CmpInst::FCMP_ULE:
2247
return AArch64CC::LE;
2248
case CmpInst::FCMP_UNE:
2249
case CmpInst::ICMP_NE:
2250
return AArch64CC::NE;
2251
case CmpInst::ICMP_UGE:
2252
return AArch64CC::HS;
2253
case CmpInst::ICMP_ULT:
2254
return AArch64CC::LO;
2255
}
2256
}
2257
2258
/// Try to emit a combined compare-and-branch instruction.
2259
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260
// Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261
// will not be produced, as they are conditional branch instructions that do
2262
// not set flags.
2263
if (FuncInfo.MF->getFunction().hasFnAttribute(
2264
Attribute::SpeculativeLoadHardening))
2265
return false;
2266
2267
assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268
const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270
2271
const Value *LHS = CI->getOperand(0);
2272
const Value *RHS = CI->getOperand(1);
2273
2274
MVT VT;
2275
if (!isTypeSupported(LHS->getType(), VT))
2276
return false;
2277
2278
unsigned BW = VT.getSizeInBits();
2279
if (BW > 64)
2280
return false;
2281
2282
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2283
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2284
2285
// Try to take advantage of fallthrough opportunities.
2286
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287
std::swap(TBB, FBB);
2288
Predicate = CmpInst::getInversePredicate(Predicate);
2289
}
2290
2291
int TestBit = -1;
2292
bool IsCmpNE;
2293
switch (Predicate) {
2294
default:
2295
return false;
2296
case CmpInst::ICMP_EQ:
2297
case CmpInst::ICMP_NE:
2298
if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299
std::swap(LHS, RHS);
2300
2301
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302
return false;
2303
2304
if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305
if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306
const Value *AndLHS = AI->getOperand(0);
2307
const Value *AndRHS = AI->getOperand(1);
2308
2309
if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310
if (C->getValue().isPowerOf2())
2311
std::swap(AndLHS, AndRHS);
2312
2313
if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314
if (C->getValue().isPowerOf2()) {
2315
TestBit = C->getValue().logBase2();
2316
LHS = AndLHS;
2317
}
2318
}
2319
2320
if (VT == MVT::i1)
2321
TestBit = 0;
2322
2323
IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324
break;
2325
case CmpInst::ICMP_SLT:
2326
case CmpInst::ICMP_SGE:
2327
if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328
return false;
2329
2330
TestBit = BW - 1;
2331
IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332
break;
2333
case CmpInst::ICMP_SGT:
2334
case CmpInst::ICMP_SLE:
2335
if (!isa<ConstantInt>(RHS))
2336
return false;
2337
2338
if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339
return false;
2340
2341
TestBit = BW - 1;
2342
IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343
break;
2344
} // end switch
2345
2346
static const unsigned OpcTable[2][2][2] = {
2347
{ {AArch64::CBZW, AArch64::CBZX },
2348
{AArch64::CBNZW, AArch64::CBNZX} },
2349
{ {AArch64::TBZW, AArch64::TBZX },
2350
{AArch64::TBNZW, AArch64::TBNZX} }
2351
};
2352
2353
bool IsBitTest = TestBit != -1;
2354
bool Is64Bit = BW == 64;
2355
if (TestBit < 32 && TestBit >= 0)
2356
Is64Bit = false;
2357
2358
unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359
const MCInstrDesc &II = TII.get(Opc);
2360
2361
Register SrcReg = getRegForValue(LHS);
2362
if (!SrcReg)
2363
return false;
2364
2365
if (BW == 64 && !Is64Bit)
2366
SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367
2368
if ((BW < 32) && !IsBitTest)
2369
SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370
2371
// Emit the combined compare and branch instruction.
2372
SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2373
MachineInstrBuilder MIB =
2374
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375
.addReg(SrcReg);
2376
if (IsBitTest)
2377
MIB.addImm(TestBit);
2378
MIB.addMBB(TBB);
2379
2380
finishCondBranch(BI->getParent(), TBB, FBB);
2381
return true;
2382
}
2383
2384
bool AArch64FastISel::selectBranch(const Instruction *I) {
2385
const BranchInst *BI = cast<BranchInst>(I);
2386
if (BI->isUnconditional()) {
2387
MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2388
fastEmitBranch(MSucc, BI->getDebugLoc());
2389
return true;
2390
}
2391
2392
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394
2395
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396
if (CI->hasOneUse() && isValueAvailable(CI)) {
2397
// Try to optimize or fold the cmp.
2398
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399
switch (Predicate) {
2400
default:
2401
break;
2402
case CmpInst::FCMP_FALSE:
2403
fastEmitBranch(FBB, MIMD.getDL());
2404
return true;
2405
case CmpInst::FCMP_TRUE:
2406
fastEmitBranch(TBB, MIMD.getDL());
2407
return true;
2408
}
2409
2410
// Try to emit a combined compare-and-branch first.
2411
if (emitCompareAndBranch(BI))
2412
return true;
2413
2414
// Try to take advantage of fallthrough opportunities.
2415
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416
std::swap(TBB, FBB);
2417
Predicate = CmpInst::getInversePredicate(Predicate);
2418
}
2419
2420
// Emit the cmp.
2421
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422
return false;
2423
2424
// FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425
// instruction.
2426
AArch64CC::CondCode CC = getCompareCC(Predicate);
2427
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2428
switch (Predicate) {
2429
default:
2430
break;
2431
case CmpInst::FCMP_UEQ:
2432
ExtraCC = AArch64CC::EQ;
2433
CC = AArch64CC::VS;
2434
break;
2435
case CmpInst::FCMP_ONE:
2436
ExtraCC = AArch64CC::MI;
2437
CC = AArch64CC::GT;
2438
break;
2439
}
2440
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441
2442
// Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443
if (ExtraCC != AArch64CC::AL) {
2444
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445
.addImm(ExtraCC)
2446
.addMBB(TBB);
2447
}
2448
2449
// Emit the branch.
2450
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451
.addImm(CC)
2452
.addMBB(TBB);
2453
2454
finishCondBranch(BI->getParent(), TBB, FBB);
2455
return true;
2456
}
2457
} else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458
uint64_t Imm = CI->getZExtValue();
2459
MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461
.addMBB(Target);
2462
2463
// Obtain the branch probability and add the target to the successor list.
2464
if (FuncInfo.BPI) {
2465
auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466
BI->getParent(), Target->getBasicBlock());
2467
FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468
} else
2469
FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470
return true;
2471
} else {
2472
AArch64CC::CondCode CC = AArch64CC::NE;
2473
if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474
// Fake request the condition, otherwise the intrinsic might be completely
2475
// optimized away.
2476
Register CondReg = getRegForValue(BI->getCondition());
2477
if (!CondReg)
2478
return false;
2479
2480
// Emit the branch.
2481
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482
.addImm(CC)
2483
.addMBB(TBB);
2484
2485
finishCondBranch(BI->getParent(), TBB, FBB);
2486
return true;
2487
}
2488
}
2489
2490
Register CondReg = getRegForValue(BI->getCondition());
2491
if (CondReg == 0)
2492
return false;
2493
2494
// i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495
unsigned Opcode = AArch64::TBNZW;
2496
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497
std::swap(TBB, FBB);
2498
Opcode = AArch64::TBZW;
2499
}
2500
2501
const MCInstrDesc &II = TII.get(Opcode);
2502
Register ConstrainedCondReg
2503
= constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505
.addReg(ConstrainedCondReg)
2506
.addImm(0)
2507
.addMBB(TBB);
2508
2509
finishCondBranch(BI->getParent(), TBB, FBB);
2510
return true;
2511
}
2512
2513
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514
const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515
Register AddrReg = getRegForValue(BI->getOperand(0));
2516
if (AddrReg == 0)
2517
return false;
2518
2519
// Authenticated indirectbr is not implemented yet.
2520
if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2521
return false;
2522
2523
// Emit the indirect branch.
2524
const MCInstrDesc &II = TII.get(AArch64::BR);
2525
AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2526
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2527
2528
// Make sure the CFG is up-to-date.
2529
for (const auto *Succ : BI->successors())
2530
FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531
2532
return true;
2533
}
2534
2535
bool AArch64FastISel::selectCmp(const Instruction *I) {
2536
const CmpInst *CI = cast<CmpInst>(I);
2537
2538
// Vectors of i1 are weird: bail out.
2539
if (CI->getType()->isVectorTy())
2540
return false;
2541
2542
// Try to optimize or fold the cmp.
2543
CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544
unsigned ResultReg = 0;
2545
switch (Predicate) {
2546
default:
2547
break;
2548
case CmpInst::FCMP_FALSE:
2549
ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2551
TII.get(TargetOpcode::COPY), ResultReg)
2552
.addReg(AArch64::WZR, getKillRegState(true));
2553
break;
2554
case CmpInst::FCMP_TRUE:
2555
ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556
break;
2557
}
2558
2559
if (ResultReg) {
2560
updateValueMap(I, ResultReg);
2561
return true;
2562
}
2563
2564
// Emit the cmp.
2565
if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566
return false;
2567
2568
ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569
2570
// FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571
// condition codes are inverted, because they are used by CSINC.
2572
static unsigned CondCodeTable[2][2] = {
2573
{ AArch64CC::NE, AArch64CC::VC },
2574
{ AArch64CC::PL, AArch64CC::LE }
2575
};
2576
unsigned *CondCodes = nullptr;
2577
switch (Predicate) {
2578
default:
2579
break;
2580
case CmpInst::FCMP_UEQ:
2581
CondCodes = &CondCodeTable[0][0];
2582
break;
2583
case CmpInst::FCMP_ONE:
2584
CondCodes = &CondCodeTable[1][0];
2585
break;
2586
}
2587
2588
if (CondCodes) {
2589
Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591
TmpReg1)
2592
.addReg(AArch64::WZR, getKillRegState(true))
2593
.addReg(AArch64::WZR, getKillRegState(true))
2594
.addImm(CondCodes[0]);
2595
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2596
ResultReg)
2597
.addReg(TmpReg1, getKillRegState(true))
2598
.addReg(AArch64::WZR, getKillRegState(true))
2599
.addImm(CondCodes[1]);
2600
2601
updateValueMap(I, ResultReg);
2602
return true;
2603
}
2604
2605
// Now set a register based on the comparison.
2606
AArch64CC::CondCode CC = getCompareCC(Predicate);
2607
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608
AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2610
ResultReg)
2611
.addReg(AArch64::WZR, getKillRegState(true))
2612
.addReg(AArch64::WZR, getKillRegState(true))
2613
.addImm(invertedCC);
2614
2615
updateValueMap(I, ResultReg);
2616
return true;
2617
}
2618
2619
/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620
/// value.
2621
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622
if (!SI->getType()->isIntegerTy(1))
2623
return false;
2624
2625
const Value *Src1Val, *Src2Val;
2626
unsigned Opc = 0;
2627
bool NeedExtraOp = false;
2628
if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629
if (CI->isOne()) {
2630
Src1Val = SI->getCondition();
2631
Src2Val = SI->getFalseValue();
2632
Opc = AArch64::ORRWrr;
2633
} else {
2634
assert(CI->isZero());
2635
Src1Val = SI->getFalseValue();
2636
Src2Val = SI->getCondition();
2637
Opc = AArch64::BICWrr;
2638
}
2639
} else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640
if (CI->isOne()) {
2641
Src1Val = SI->getCondition();
2642
Src2Val = SI->getTrueValue();
2643
Opc = AArch64::ORRWrr;
2644
NeedExtraOp = true;
2645
} else {
2646
assert(CI->isZero());
2647
Src1Val = SI->getCondition();
2648
Src2Val = SI->getTrueValue();
2649
Opc = AArch64::ANDWrr;
2650
}
2651
}
2652
2653
if (!Opc)
2654
return false;
2655
2656
Register Src1Reg = getRegForValue(Src1Val);
2657
if (!Src1Reg)
2658
return false;
2659
2660
Register Src2Reg = getRegForValue(Src2Val);
2661
if (!Src2Reg)
2662
return false;
2663
2664
if (NeedExtraOp)
2665
Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2666
2667
Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2668
Src2Reg);
2669
updateValueMap(SI, ResultReg);
2670
return true;
2671
}
2672
2673
bool AArch64FastISel::selectSelect(const Instruction *I) {
2674
assert(isa<SelectInst>(I) && "Expected a select instruction.");
2675
MVT VT;
2676
if (!isTypeSupported(I->getType(), VT))
2677
return false;
2678
2679
unsigned Opc;
2680
const TargetRegisterClass *RC;
2681
switch (VT.SimpleTy) {
2682
default:
2683
return false;
2684
case MVT::i1:
2685
case MVT::i8:
2686
case MVT::i16:
2687
case MVT::i32:
2688
Opc = AArch64::CSELWr;
2689
RC = &AArch64::GPR32RegClass;
2690
break;
2691
case MVT::i64:
2692
Opc = AArch64::CSELXr;
2693
RC = &AArch64::GPR64RegClass;
2694
break;
2695
case MVT::f32:
2696
Opc = AArch64::FCSELSrrr;
2697
RC = &AArch64::FPR32RegClass;
2698
break;
2699
case MVT::f64:
2700
Opc = AArch64::FCSELDrrr;
2701
RC = &AArch64::FPR64RegClass;
2702
break;
2703
}
2704
2705
const SelectInst *SI = cast<SelectInst>(I);
2706
const Value *Cond = SI->getCondition();
2707
AArch64CC::CondCode CC = AArch64CC::NE;
2708
AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2709
2710
if (optimizeSelect(SI))
2711
return true;
2712
2713
// Try to pickup the flags, so we don't have to emit another compare.
2714
if (foldXALUIntrinsic(CC, I, Cond)) {
2715
// Fake request the condition to force emission of the XALU intrinsic.
2716
Register CondReg = getRegForValue(Cond);
2717
if (!CondReg)
2718
return false;
2719
} else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2720
isValueAvailable(Cond)) {
2721
const auto *Cmp = cast<CmpInst>(Cond);
2722
// Try to optimize or fold the cmp.
2723
CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2724
const Value *FoldSelect = nullptr;
2725
switch (Predicate) {
2726
default:
2727
break;
2728
case CmpInst::FCMP_FALSE:
2729
FoldSelect = SI->getFalseValue();
2730
break;
2731
case CmpInst::FCMP_TRUE:
2732
FoldSelect = SI->getTrueValue();
2733
break;
2734
}
2735
2736
if (FoldSelect) {
2737
Register SrcReg = getRegForValue(FoldSelect);
2738
if (!SrcReg)
2739
return false;
2740
2741
updateValueMap(I, SrcReg);
2742
return true;
2743
}
2744
2745
// Emit the cmp.
2746
if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2747
return false;
2748
2749
// FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2750
CC = getCompareCC(Predicate);
2751
switch (Predicate) {
2752
default:
2753
break;
2754
case CmpInst::FCMP_UEQ:
2755
ExtraCC = AArch64CC::EQ;
2756
CC = AArch64CC::VS;
2757
break;
2758
case CmpInst::FCMP_ONE:
2759
ExtraCC = AArch64CC::MI;
2760
CC = AArch64CC::GT;
2761
break;
2762
}
2763
assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2764
} else {
2765
Register CondReg = getRegForValue(Cond);
2766
if (!CondReg)
2767
return false;
2768
2769
const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2770
CondReg = constrainOperandRegClass(II, CondReg, 1);
2771
2772
// Emit a TST instruction (ANDS wzr, reg, #imm).
2773
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2774
AArch64::WZR)
2775
.addReg(CondReg)
2776
.addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2777
}
2778
2779
Register Src1Reg = getRegForValue(SI->getTrueValue());
2780
Register Src2Reg = getRegForValue(SI->getFalseValue());
2781
2782
if (!Src1Reg || !Src2Reg)
2783
return false;
2784
2785
if (ExtraCC != AArch64CC::AL)
2786
Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2787
2788
Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2789
updateValueMap(I, ResultReg);
2790
return true;
2791
}
2792
2793
bool AArch64FastISel::selectFPExt(const Instruction *I) {
2794
Value *V = I->getOperand(0);
2795
if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2796
return false;
2797
2798
Register Op = getRegForValue(V);
2799
if (Op == 0)
2800
return false;
2801
2802
Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2803
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2804
ResultReg).addReg(Op);
2805
updateValueMap(I, ResultReg);
2806
return true;
2807
}
2808
2809
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2810
Value *V = I->getOperand(0);
2811
if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2812
return false;
2813
2814
Register Op = getRegForValue(V);
2815
if (Op == 0)
2816
return false;
2817
2818
Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2819
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2820
ResultReg).addReg(Op);
2821
updateValueMap(I, ResultReg);
2822
return true;
2823
}
2824
2825
// FPToUI and FPToSI
2826
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2827
MVT DestVT;
2828
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2829
return false;
2830
2831
Register SrcReg = getRegForValue(I->getOperand(0));
2832
if (SrcReg == 0)
2833
return false;
2834
2835
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2836
if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2837
return false;
2838
2839
unsigned Opc;
2840
if (SrcVT == MVT::f64) {
2841
if (Signed)
2842
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2843
else
2844
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2845
} else {
2846
if (Signed)
2847
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2848
else
2849
Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2850
}
2851
Register ResultReg = createResultReg(
2852
DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2853
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2854
.addReg(SrcReg);
2855
updateValueMap(I, ResultReg);
2856
return true;
2857
}
2858
2859
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2860
MVT DestVT;
2861
if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2862
return false;
2863
// Let regular ISEL handle FP16
2864
if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2865
return false;
2866
2867
assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2868
"Unexpected value type.");
2869
2870
Register SrcReg = getRegForValue(I->getOperand(0));
2871
if (!SrcReg)
2872
return false;
2873
2874
EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2875
2876
// Handle sign-extension.
2877
if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2878
SrcReg =
2879
emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2880
if (!SrcReg)
2881
return false;
2882
}
2883
2884
unsigned Opc;
2885
if (SrcVT == MVT::i64) {
2886
if (Signed)
2887
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2888
else
2889
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2890
} else {
2891
if (Signed)
2892
Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2893
else
2894
Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2895
}
2896
2897
Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2898
updateValueMap(I, ResultReg);
2899
return true;
2900
}
2901
2902
bool AArch64FastISel::fastLowerArguments() {
2903
if (!FuncInfo.CanLowerReturn)
2904
return false;
2905
2906
const Function *F = FuncInfo.Fn;
2907
if (F->isVarArg())
2908
return false;
2909
2910
CallingConv::ID CC = F->getCallingConv();
2911
if (CC != CallingConv::C && CC != CallingConv::Swift)
2912
return false;
2913
2914
if (Subtarget->hasCustomCallingConv())
2915
return false;
2916
2917
// Only handle simple cases of up to 8 GPR and FPR each.
2918
unsigned GPRCnt = 0;
2919
unsigned FPRCnt = 0;
2920
for (auto const &Arg : F->args()) {
2921
if (Arg.hasAttribute(Attribute::ByVal) ||
2922
Arg.hasAttribute(Attribute::InReg) ||
2923
Arg.hasAttribute(Attribute::StructRet) ||
2924
Arg.hasAttribute(Attribute::SwiftSelf) ||
2925
Arg.hasAttribute(Attribute::SwiftAsync) ||
2926
Arg.hasAttribute(Attribute::SwiftError) ||
2927
Arg.hasAttribute(Attribute::Nest))
2928
return false;
2929
2930
Type *ArgTy = Arg.getType();
2931
if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2932
return false;
2933
2934
EVT ArgVT = TLI.getValueType(DL, ArgTy);
2935
if (!ArgVT.isSimple())
2936
return false;
2937
2938
MVT VT = ArgVT.getSimpleVT().SimpleTy;
2939
if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2940
return false;
2941
2942
if (VT.isVector() &&
2943
(!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2944
return false;
2945
2946
if (VT >= MVT::i1 && VT <= MVT::i64)
2947
++GPRCnt;
2948
else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2949
VT.is128BitVector())
2950
++FPRCnt;
2951
else
2952
return false;
2953
2954
if (GPRCnt > 8 || FPRCnt > 8)
2955
return false;
2956
}
2957
2958
static const MCPhysReg Registers[6][8] = {
2959
{ AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2960
AArch64::W5, AArch64::W6, AArch64::W7 },
2961
{ AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2962
AArch64::X5, AArch64::X6, AArch64::X7 },
2963
{ AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2964
AArch64::H5, AArch64::H6, AArch64::H7 },
2965
{ AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2966
AArch64::S5, AArch64::S6, AArch64::S7 },
2967
{ AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2968
AArch64::D5, AArch64::D6, AArch64::D7 },
2969
{ AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2970
AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2971
};
2972
2973
unsigned GPRIdx = 0;
2974
unsigned FPRIdx = 0;
2975
for (auto const &Arg : F->args()) {
2976
MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2977
unsigned SrcReg;
2978
const TargetRegisterClass *RC;
2979
if (VT >= MVT::i1 && VT <= MVT::i32) {
2980
SrcReg = Registers[0][GPRIdx++];
2981
RC = &AArch64::GPR32RegClass;
2982
VT = MVT::i32;
2983
} else if (VT == MVT::i64) {
2984
SrcReg = Registers[1][GPRIdx++];
2985
RC = &AArch64::GPR64RegClass;
2986
} else if (VT == MVT::f16 || VT == MVT::bf16) {
2987
SrcReg = Registers[2][FPRIdx++];
2988
RC = &AArch64::FPR16RegClass;
2989
} else if (VT == MVT::f32) {
2990
SrcReg = Registers[3][FPRIdx++];
2991
RC = &AArch64::FPR32RegClass;
2992
} else if ((VT == MVT::f64) || VT.is64BitVector()) {
2993
SrcReg = Registers[4][FPRIdx++];
2994
RC = &AArch64::FPR64RegClass;
2995
} else if (VT.is128BitVector()) {
2996
SrcReg = Registers[5][FPRIdx++];
2997
RC = &AArch64::FPR128RegClass;
2998
} else
2999
llvm_unreachable("Unexpected value type.");
3000
3001
Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3002
// FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3003
// Without this, EmitLiveInCopies may eliminate the livein if its only
3004
// use is a bitcast (which isn't turned into an instruction).
3005
Register ResultReg = createResultReg(RC);
3006
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3007
TII.get(TargetOpcode::COPY), ResultReg)
3008
.addReg(DstReg, getKillRegState(true));
3009
updateValueMap(&Arg, ResultReg);
3010
}
3011
return true;
3012
}
3013
3014
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3015
SmallVectorImpl<MVT> &OutVTs,
3016
unsigned &NumBytes) {
3017
CallingConv::ID CC = CLI.CallConv;
3018
SmallVector<CCValAssign, 16> ArgLocs;
3019
CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3020
CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3021
3022
// Get a count of how many bytes are to be pushed on the stack.
3023
NumBytes = CCInfo.getStackSize();
3024
3025
// Issue CALLSEQ_START
3026
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028
.addImm(NumBytes).addImm(0);
3029
3030
// Process the args.
3031
for (CCValAssign &VA : ArgLocs) {
3032
const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033
MVT ArgVT = OutVTs[VA.getValNo()];
3034
3035
Register ArgReg = getRegForValue(ArgVal);
3036
if (!ArgReg)
3037
return false;
3038
3039
// Handle arg promotion: SExt, ZExt, AExt.
3040
switch (VA.getLocInfo()) {
3041
case CCValAssign::Full:
3042
break;
3043
case CCValAssign::SExt: {
3044
MVT DestVT = VA.getLocVT();
3045
MVT SrcVT = ArgVT;
3046
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047
if (!ArgReg)
3048
return false;
3049
break;
3050
}
3051
case CCValAssign::AExt:
3052
// Intentional fall-through.
3053
case CCValAssign::ZExt: {
3054
MVT DestVT = VA.getLocVT();
3055
MVT SrcVT = ArgVT;
3056
ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057
if (!ArgReg)
3058
return false;
3059
break;
3060
}
3061
default:
3062
llvm_unreachable("Unknown arg promotion!");
3063
}
3064
3065
// Now copy/store arg to correct locations.
3066
if (VA.isRegLoc() && !VA.needsCustom()) {
3067
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068
TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069
CLI.OutRegs.push_back(VA.getLocReg());
3070
} else if (VA.needsCustom()) {
3071
// FIXME: Handle custom args.
3072
return false;
3073
} else {
3074
assert(VA.isMemLoc() && "Assuming store on stack.");
3075
3076
// Don't emit stores for undef values.
3077
if (isa<UndefValue>(ArgVal))
3078
continue;
3079
3080
// Need to store on the stack.
3081
unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082
3083
unsigned BEAlign = 0;
3084
if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085
BEAlign = 8 - ArgSize;
3086
3087
Address Addr;
3088
Addr.setKind(Address::RegBase);
3089
Addr.setReg(AArch64::SP);
3090
Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091
3092
Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093
MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094
MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095
MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096
3097
if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098
return false;
3099
}
3100
}
3101
return true;
3102
}
3103
3104
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105
CallingConv::ID CC = CLI.CallConv;
3106
3107
// Issue CALLSEQ_END
3108
unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110
.addImm(NumBytes).addImm(0);
3111
3112
// Now the return values.
3113
SmallVector<CCValAssign, 16> RVLocs;
3114
CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115
CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116
3117
Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118
for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119
CCValAssign &VA = RVLocs[i];
3120
MVT CopyVT = VA.getValVT();
3121
unsigned CopyReg = ResultReg + i;
3122
3123
// TODO: Handle big-endian results
3124
if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125
return false;
3126
3127
// Copy result out of their specified physreg.
3128
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129
CopyReg)
3130
.addReg(VA.getLocReg());
3131
CLI.InRegs.push_back(VA.getLocReg());
3132
}
3133
3134
CLI.ResultReg = ResultReg;
3135
CLI.NumResultRegs = RVLocs.size();
3136
3137
return true;
3138
}
3139
3140
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141
CallingConv::ID CC = CLI.CallConv;
3142
bool IsTailCall = CLI.IsTailCall;
3143
bool IsVarArg = CLI.IsVarArg;
3144
const Value *Callee = CLI.Callee;
3145
MCSymbol *Symbol = CLI.Symbol;
3146
3147
if (!Callee && !Symbol)
3148
return false;
3149
3150
// Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151
// a bti instruction following the call.
3152
if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153
!Subtarget->noBTIAtReturnTwice() &&
3154
MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155
return false;
3156
3157
// Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158
if (CLI.CB && CLI.CB->isIndirectCall() &&
3159
CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160
return false;
3161
3162
// Allow SelectionDAG isel to handle tail calls.
3163
if (IsTailCall)
3164
return false;
3165
3166
// FIXME: we could and should support this, but for now correctness at -O0 is
3167
// more important.
3168
if (Subtarget->isTargetILP32())
3169
return false;
3170
3171
CodeModel::Model CM = TM.getCodeModel();
3172
// Only support the small-addressing and large code models.
3173
if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174
return false;
3175
3176
// FIXME: Add large code model support for ELF.
3177
if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178
return false;
3179
3180
// ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181
// attribute. Check "RtLibUseGOT" instead.
3182
if (MF->getFunction().getParent()->getRtLibUseGOT())
3183
return false;
3184
3185
// Let SDISel handle vararg functions.
3186
if (IsVarArg)
3187
return false;
3188
3189
if (Subtarget->isWindowsArm64EC())
3190
return false;
3191
3192
for (auto Flag : CLI.OutFlags)
3193
if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194
Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195
return false;
3196
3197
// Set up the argument vectors.
3198
SmallVector<MVT, 16> OutVTs;
3199
OutVTs.reserve(CLI.OutVals.size());
3200
3201
for (auto *Val : CLI.OutVals) {
3202
MVT VT;
3203
if (!isTypeLegal(Val->getType(), VT) &&
3204
!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205
return false;
3206
3207
// We don't handle vector parameters yet.
3208
if (VT.isVector() || VT.getSizeInBits() > 64)
3209
return false;
3210
3211
OutVTs.push_back(VT);
3212
}
3213
3214
Address Addr;
3215
if (Callee && !computeCallAddress(Callee, Addr))
3216
return false;
3217
3218
// The weak function target may be zero; in that case we must use indirect
3219
// addressing via a stub on windows as it may be out of range for a
3220
// PC-relative jump.
3221
if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3222
Addr.getGlobalValue()->hasExternalWeakLinkage())
3223
return false;
3224
3225
// Handle the arguments now that we've gotten them.
3226
unsigned NumBytes;
3227
if (!processCallArgs(CLI, OutVTs, NumBytes))
3228
return false;
3229
3230
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3231
if (RegInfo->isAnyArgRegReserved(*MF))
3232
RegInfo->emitReservedArgRegCallError(*MF);
3233
3234
// Issue the call.
3235
MachineInstrBuilder MIB;
3236
if (Subtarget->useSmallAddressing()) {
3237
const MCInstrDesc &II =
3238
TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3239
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3240
if (Symbol)
3241
MIB.addSym(Symbol, 0);
3242
else if (Addr.getGlobalValue())
3243
MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3244
else if (Addr.getReg()) {
3245
Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3246
MIB.addReg(Reg);
3247
} else
3248
return false;
3249
} else {
3250
unsigned CallReg = 0;
3251
if (Symbol) {
3252
Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3253
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3254
ADRPReg)
3255
.addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3256
3257
CallReg = createResultReg(&AArch64::GPR64RegClass);
3258
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3259
TII.get(AArch64::LDRXui), CallReg)
3260
.addReg(ADRPReg)
3261
.addSym(Symbol,
3262
AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3263
} else if (Addr.getGlobalValue())
3264
CallReg = materializeGV(Addr.getGlobalValue());
3265
else if (Addr.getReg())
3266
CallReg = Addr.getReg();
3267
3268
if (!CallReg)
3269
return false;
3270
3271
const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3272
CallReg = constrainOperandRegClass(II, CallReg, 0);
3273
MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3274
}
3275
3276
// Add implicit physical register uses to the call.
3277
for (auto Reg : CLI.OutRegs)
3278
MIB.addReg(Reg, RegState::Implicit);
3279
3280
// Add a register mask with the call-preserved registers.
3281
// Proper defs for return values will be added by setPhysRegsDeadExcept().
3282
MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3283
3284
CLI.Call = MIB;
3285
3286
// Finish off the call including any return values.
3287
return finishCall(CLI, NumBytes);
3288
}
3289
3290
bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3291
if (Alignment)
3292
return Len / Alignment->value() <= 4;
3293
else
3294
return Len < 32;
3295
}
3296
3297
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3298
uint64_t Len, MaybeAlign Alignment) {
3299
// Make sure we don't bloat code by inlining very large memcpy's.
3300
if (!isMemCpySmall(Len, Alignment))
3301
return false;
3302
3303
int64_t UnscaledOffset = 0;
3304
Address OrigDest = Dest;
3305
Address OrigSrc = Src;
3306
3307
while (Len) {
3308
MVT VT;
3309
if (!Alignment || *Alignment >= 8) {
3310
if (Len >= 8)
3311
VT = MVT::i64;
3312
else if (Len >= 4)
3313
VT = MVT::i32;
3314
else if (Len >= 2)
3315
VT = MVT::i16;
3316
else {
3317
VT = MVT::i8;
3318
}
3319
} else {
3320
assert(Alignment && "Alignment is set in this branch");
3321
// Bound based on alignment.
3322
if (Len >= 4 && *Alignment == 4)
3323
VT = MVT::i32;
3324
else if (Len >= 2 && *Alignment == 2)
3325
VT = MVT::i16;
3326
else {
3327
VT = MVT::i8;
3328
}
3329
}
3330
3331
unsigned ResultReg = emitLoad(VT, VT, Src);
3332
if (!ResultReg)
3333
return false;
3334
3335
if (!emitStore(VT, ResultReg, Dest))
3336
return false;
3337
3338
int64_t Size = VT.getSizeInBits() / 8;
3339
Len -= Size;
3340
UnscaledOffset += Size;
3341
3342
// We need to recompute the unscaled offset for each iteration.
3343
Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3344
Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3345
}
3346
3347
return true;
3348
}
3349
3350
/// Check if it is possible to fold the condition from the XALU intrinsic
3351
/// into the user. The condition code will only be updated on success.
3352
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3353
const Instruction *I,
3354
const Value *Cond) {
3355
if (!isa<ExtractValueInst>(Cond))
3356
return false;
3357
3358
const auto *EV = cast<ExtractValueInst>(Cond);
3359
if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3360
return false;
3361
3362
const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3363
MVT RetVT;
3364
const Function *Callee = II->getCalledFunction();
3365
Type *RetTy =
3366
cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3367
if (!isTypeLegal(RetTy, RetVT))
3368
return false;
3369
3370
if (RetVT != MVT::i32 && RetVT != MVT::i64)
3371
return false;
3372
3373
const Value *LHS = II->getArgOperand(0);
3374
const Value *RHS = II->getArgOperand(1);
3375
3376
// Canonicalize immediate to the RHS.
3377
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3378
std::swap(LHS, RHS);
3379
3380
// Simplify multiplies.
3381
Intrinsic::ID IID = II->getIntrinsicID();
3382
switch (IID) {
3383
default:
3384
break;
3385
case Intrinsic::smul_with_overflow:
3386
if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387
if (C->getValue() == 2)
3388
IID = Intrinsic::sadd_with_overflow;
3389
break;
3390
case Intrinsic::umul_with_overflow:
3391
if (const auto *C = dyn_cast<ConstantInt>(RHS))
3392
if (C->getValue() == 2)
3393
IID = Intrinsic::uadd_with_overflow;
3394
break;
3395
}
3396
3397
AArch64CC::CondCode TmpCC;
3398
switch (IID) {
3399
default:
3400
return false;
3401
case Intrinsic::sadd_with_overflow:
3402
case Intrinsic::ssub_with_overflow:
3403
TmpCC = AArch64CC::VS;
3404
break;
3405
case Intrinsic::uadd_with_overflow:
3406
TmpCC = AArch64CC::HS;
3407
break;
3408
case Intrinsic::usub_with_overflow:
3409
TmpCC = AArch64CC::LO;
3410
break;
3411
case Intrinsic::smul_with_overflow:
3412
case Intrinsic::umul_with_overflow:
3413
TmpCC = AArch64CC::NE;
3414
break;
3415
}
3416
3417
// Check if both instructions are in the same basic block.
3418
if (!isValueAvailable(II))
3419
return false;
3420
3421
// Make sure nothing is in the way
3422
BasicBlock::const_iterator Start(I);
3423
BasicBlock::const_iterator End(II);
3424
for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3425
// We only expect extractvalue instructions between the intrinsic and the
3426
// instruction to be selected.
3427
if (!isa<ExtractValueInst>(Itr))
3428
return false;
3429
3430
// Check that the extractvalue operand comes from the intrinsic.
3431
const auto *EVI = cast<ExtractValueInst>(Itr);
3432
if (EVI->getAggregateOperand() != II)
3433
return false;
3434
}
3435
3436
CC = TmpCC;
3437
return true;
3438
}
3439
3440
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3441
// FIXME: Handle more intrinsics.
3442
switch (II->getIntrinsicID()) {
3443
default: return false;
3444
case Intrinsic::frameaddress: {
3445
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446
MFI.setFrameAddressIsTaken(true);
3447
3448
const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3449
Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3450
Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3451
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3452
TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3453
// Recursively load frame address
3454
// ldr x0, [fp]
3455
// ldr x0, [x0]
3456
// ldr x0, [x0]
3457
// ...
3458
unsigned DestReg;
3459
unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3460
while (Depth--) {
3461
DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3462
SrcReg, 0);
3463
assert(DestReg && "Unexpected LDR instruction emission failure.");
3464
SrcReg = DestReg;
3465
}
3466
3467
updateValueMap(II, SrcReg);
3468
return true;
3469
}
3470
case Intrinsic::sponentry: {
3471
MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3472
3473
// SP = FP + Fixed Object + 16
3474
int FI = MFI.CreateFixedObject(4, 0, false);
3475
Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3476
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3477
TII.get(AArch64::ADDXri), ResultReg)
3478
.addFrameIndex(FI)
3479
.addImm(0)
3480
.addImm(0);
3481
3482
updateValueMap(II, ResultReg);
3483
return true;
3484
}
3485
case Intrinsic::memcpy:
3486
case Intrinsic::memmove: {
3487
const auto *MTI = cast<MemTransferInst>(II);
3488
// Don't handle volatile.
3489
if (MTI->isVolatile())
3490
return false;
3491
3492
// Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3493
// we would emit dead code because we don't currently handle memmoves.
3494
bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3495
if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3496
// Small memcpy's are common enough that we want to do them without a call
3497
// if possible.
3498
uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3499
MaybeAlign Alignment;
3500
if (MTI->getDestAlign() || MTI->getSourceAlign())
3501
Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3502
MTI->getSourceAlign().valueOrOne());
3503
if (isMemCpySmall(Len, Alignment)) {
3504
Address Dest, Src;
3505
if (!computeAddress(MTI->getRawDest(), Dest) ||
3506
!computeAddress(MTI->getRawSource(), Src))
3507
return false;
3508
if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3509
return true;
3510
}
3511
}
3512
3513
if (!MTI->getLength()->getType()->isIntegerTy(64))
3514
return false;
3515
3516
if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3517
// Fast instruction selection doesn't support the special
3518
// address spaces.
3519
return false;
3520
3521
const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3522
return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3523
}
3524
case Intrinsic::memset: {
3525
const MemSetInst *MSI = cast<MemSetInst>(II);
3526
// Don't handle volatile.
3527
if (MSI->isVolatile())
3528
return false;
3529
3530
if (!MSI->getLength()->getType()->isIntegerTy(64))
3531
return false;
3532
3533
if (MSI->getDestAddressSpace() > 255)
3534
// Fast instruction selection doesn't support the special
3535
// address spaces.
3536
return false;
3537
3538
return lowerCallTo(II, "memset", II->arg_size() - 1);
3539
}
3540
case Intrinsic::sin:
3541
case Intrinsic::cos:
3542
case Intrinsic::tan:
3543
case Intrinsic::pow: {
3544
MVT RetVT;
3545
if (!isTypeLegal(II->getType(), RetVT))
3546
return false;
3547
3548
if (RetVT != MVT::f32 && RetVT != MVT::f64)
3549
return false;
3550
3551
static const RTLIB::Libcall LibCallTable[4][2] = {
3552
{RTLIB::SIN_F32, RTLIB::SIN_F64},
3553
{RTLIB::COS_F32, RTLIB::COS_F64},
3554
{RTLIB::TAN_F32, RTLIB::TAN_F64},
3555
{RTLIB::POW_F32, RTLIB::POW_F64}};
3556
RTLIB::Libcall LC;
3557
bool Is64Bit = RetVT == MVT::f64;
3558
switch (II->getIntrinsicID()) {
3559
default:
3560
llvm_unreachable("Unexpected intrinsic.");
3561
case Intrinsic::sin:
3562
LC = LibCallTable[0][Is64Bit];
3563
break;
3564
case Intrinsic::cos:
3565
LC = LibCallTable[1][Is64Bit];
3566
break;
3567
case Intrinsic::tan:
3568
LC = LibCallTable[2][Is64Bit];
3569
break;
3570
case Intrinsic::pow:
3571
LC = LibCallTable[3][Is64Bit];
3572
break;
3573
}
3574
3575
ArgListTy Args;
3576
Args.reserve(II->arg_size());
3577
3578
// Populate the argument list.
3579
for (auto &Arg : II->args()) {
3580
ArgListEntry Entry;
3581
Entry.Val = Arg;
3582
Entry.Ty = Arg->getType();
3583
Args.push_back(Entry);
3584
}
3585
3586
CallLoweringInfo CLI;
3587
MCContext &Ctx = MF->getContext();
3588
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3589
TLI.getLibcallName(LC), std::move(Args));
3590
if (!lowerCallTo(CLI))
3591
return false;
3592
updateValueMap(II, CLI.ResultReg);
3593
return true;
3594
}
3595
case Intrinsic::fabs: {
3596
MVT VT;
3597
if (!isTypeLegal(II->getType(), VT))
3598
return false;
3599
3600
unsigned Opc;
3601
switch (VT.SimpleTy) {
3602
default:
3603
return false;
3604
case MVT::f32:
3605
Opc = AArch64::FABSSr;
3606
break;
3607
case MVT::f64:
3608
Opc = AArch64::FABSDr;
3609
break;
3610
}
3611
Register SrcReg = getRegForValue(II->getOperand(0));
3612
if (!SrcReg)
3613
return false;
3614
Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3615
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3616
.addReg(SrcReg);
3617
updateValueMap(II, ResultReg);
3618
return true;
3619
}
3620
case Intrinsic::trap:
3621
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622
.addImm(1);
3623
return true;
3624
case Intrinsic::debugtrap:
3625
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3626
.addImm(0xF000);
3627
return true;
3628
3629
case Intrinsic::sqrt: {
3630
Type *RetTy = II->getCalledFunction()->getReturnType();
3631
3632
MVT VT;
3633
if (!isTypeLegal(RetTy, VT))
3634
return false;
3635
3636
Register Op0Reg = getRegForValue(II->getOperand(0));
3637
if (!Op0Reg)
3638
return false;
3639
3640
unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3641
if (!ResultReg)
3642
return false;
3643
3644
updateValueMap(II, ResultReg);
3645
return true;
3646
}
3647
case Intrinsic::sadd_with_overflow:
3648
case Intrinsic::uadd_with_overflow:
3649
case Intrinsic::ssub_with_overflow:
3650
case Intrinsic::usub_with_overflow:
3651
case Intrinsic::smul_with_overflow:
3652
case Intrinsic::umul_with_overflow: {
3653
// This implements the basic lowering of the xalu with overflow intrinsics.
3654
const Function *Callee = II->getCalledFunction();
3655
auto *Ty = cast<StructType>(Callee->getReturnType());
3656
Type *RetTy = Ty->getTypeAtIndex(0U);
3657
3658
MVT VT;
3659
if (!isTypeLegal(RetTy, VT))
3660
return false;
3661
3662
if (VT != MVT::i32 && VT != MVT::i64)
3663
return false;
3664
3665
const Value *LHS = II->getArgOperand(0);
3666
const Value *RHS = II->getArgOperand(1);
3667
// Canonicalize immediate to the RHS.
3668
if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3669
std::swap(LHS, RHS);
3670
3671
// Simplify multiplies.
3672
Intrinsic::ID IID = II->getIntrinsicID();
3673
switch (IID) {
3674
default:
3675
break;
3676
case Intrinsic::smul_with_overflow:
3677
if (const auto *C = dyn_cast<ConstantInt>(RHS))
3678
if (C->getValue() == 2) {
3679
IID = Intrinsic::sadd_with_overflow;
3680
RHS = LHS;
3681
}
3682
break;
3683
case Intrinsic::umul_with_overflow:
3684
if (const auto *C = dyn_cast<ConstantInt>(RHS))
3685
if (C->getValue() == 2) {
3686
IID = Intrinsic::uadd_with_overflow;
3687
RHS = LHS;
3688
}
3689
break;
3690
}
3691
3692
unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693
AArch64CC::CondCode CC = AArch64CC::Invalid;
3694
switch (IID) {
3695
default: llvm_unreachable("Unexpected intrinsic!");
3696
case Intrinsic::sadd_with_overflow:
3697
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698
CC = AArch64CC::VS;
3699
break;
3700
case Intrinsic::uadd_with_overflow:
3701
ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3702
CC = AArch64CC::HS;
3703
break;
3704
case Intrinsic::ssub_with_overflow:
3705
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706
CC = AArch64CC::VS;
3707
break;
3708
case Intrinsic::usub_with_overflow:
3709
ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3710
CC = AArch64CC::LO;
3711
break;
3712
case Intrinsic::smul_with_overflow: {
3713
CC = AArch64CC::NE;
3714
Register LHSReg = getRegForValue(LHS);
3715
if (!LHSReg)
3716
return false;
3717
3718
Register RHSReg = getRegForValue(RHS);
3719
if (!RHSReg)
3720
return false;
3721
3722
if (VT == MVT::i32) {
3723
MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3724
Register MulSubReg =
3725
fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3726
// cmp xreg, wreg, sxtw
3727
emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3728
AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3729
/*WantResult=*/false);
3730
MulReg = MulSubReg;
3731
} else {
3732
assert(VT == MVT::i64 && "Unexpected value type.");
3733
// LHSReg and RHSReg cannot be killed by this Mul, since they are
3734
// reused in the next instruction.
3735
MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3736
unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3737
emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3738
/*WantResult=*/false);
3739
}
3740
break;
3741
}
3742
case Intrinsic::umul_with_overflow: {
3743
CC = AArch64CC::NE;
3744
Register LHSReg = getRegForValue(LHS);
3745
if (!LHSReg)
3746
return false;
3747
3748
Register RHSReg = getRegForValue(RHS);
3749
if (!RHSReg)
3750
return false;
3751
3752
if (VT == MVT::i32) {
3753
MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3754
// tst xreg, #0xffffffff00000000
3755
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3756
TII.get(AArch64::ANDSXri), AArch64::XZR)
3757
.addReg(MulReg)
3758
.addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3759
MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3760
} else {
3761
assert(VT == MVT::i64 && "Unexpected value type.");
3762
// LHSReg and RHSReg cannot be killed by this Mul, since they are
3763
// reused in the next instruction.
3764
MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3765
unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3766
emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3767
}
3768
break;
3769
}
3770
}
3771
3772
if (MulReg) {
3773
ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3774
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3775
TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776
}
3777
3778
if (!ResultReg1)
3779
return false;
3780
3781
ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3782
AArch64::WZR, AArch64::WZR,
3783
getInvertedCondCode(CC));
3784
(void)ResultReg2;
3785
assert((ResultReg1 + 1) == ResultReg2 &&
3786
"Nonconsecutive result registers.");
3787
updateValueMap(II, ResultReg1, 2);
3788
return true;
3789
}
3790
case Intrinsic::aarch64_crc32b:
3791
case Intrinsic::aarch64_crc32h:
3792
case Intrinsic::aarch64_crc32w:
3793
case Intrinsic::aarch64_crc32x:
3794
case Intrinsic::aarch64_crc32cb:
3795
case Intrinsic::aarch64_crc32ch:
3796
case Intrinsic::aarch64_crc32cw:
3797
case Intrinsic::aarch64_crc32cx: {
3798
if (!Subtarget->hasCRC())
3799
return false;
3800
3801
unsigned Opc;
3802
switch (II->getIntrinsicID()) {
3803
default:
3804
llvm_unreachable("Unexpected intrinsic!");
3805
case Intrinsic::aarch64_crc32b:
3806
Opc = AArch64::CRC32Brr;
3807
break;
3808
case Intrinsic::aarch64_crc32h:
3809
Opc = AArch64::CRC32Hrr;
3810
break;
3811
case Intrinsic::aarch64_crc32w:
3812
Opc = AArch64::CRC32Wrr;
3813
break;
3814
case Intrinsic::aarch64_crc32x:
3815
Opc = AArch64::CRC32Xrr;
3816
break;
3817
case Intrinsic::aarch64_crc32cb:
3818
Opc = AArch64::CRC32CBrr;
3819
break;
3820
case Intrinsic::aarch64_crc32ch:
3821
Opc = AArch64::CRC32CHrr;
3822
break;
3823
case Intrinsic::aarch64_crc32cw:
3824
Opc = AArch64::CRC32CWrr;
3825
break;
3826
case Intrinsic::aarch64_crc32cx:
3827
Opc = AArch64::CRC32CXrr;
3828
break;
3829
}
3830
3831
Register LHSReg = getRegForValue(II->getArgOperand(0));
3832
Register RHSReg = getRegForValue(II->getArgOperand(1));
3833
if (!LHSReg || !RHSReg)
3834
return false;
3835
3836
Register ResultReg =
3837
fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3838
updateValueMap(II, ResultReg);
3839
return true;
3840
}
3841
}
3842
return false;
3843
}
3844
3845
bool AArch64FastISel::selectRet(const Instruction *I) {
3846
const ReturnInst *Ret = cast<ReturnInst>(I);
3847
const Function &F = *I->getParent()->getParent();
3848
3849
if (!FuncInfo.CanLowerReturn)
3850
return false;
3851
3852
if (F.isVarArg())
3853
return false;
3854
3855
if (TLI.supportSwiftError() &&
3856
F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3857
return false;
3858
3859
if (TLI.supportSplitCSR(FuncInfo.MF))
3860
return false;
3861
3862
// Build a list of return value registers.
3863
SmallVector<unsigned, 4> RetRegs;
3864
3865
if (Ret->getNumOperands() > 0) {
3866
CallingConv::ID CC = F.getCallingConv();
3867
SmallVector<ISD::OutputArg, 4> Outs;
3868
GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3869
3870
// Analyze operands of the call, assigning locations to each operand.
3871
SmallVector<CCValAssign, 16> ValLocs;
3872
CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3873
CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3874
3875
// Only handle a single return value for now.
3876
if (ValLocs.size() != 1)
3877
return false;
3878
3879
CCValAssign &VA = ValLocs[0];
3880
const Value *RV = Ret->getOperand(0);
3881
3882
// Don't bother handling odd stuff for now.
3883
if ((VA.getLocInfo() != CCValAssign::Full) &&
3884
(VA.getLocInfo() != CCValAssign::BCvt))
3885
return false;
3886
3887
// Only handle register returns for now.
3888
if (!VA.isRegLoc())
3889
return false;
3890
3891
Register Reg = getRegForValue(RV);
3892
if (Reg == 0)
3893
return false;
3894
3895
unsigned SrcReg = Reg + VA.getValNo();
3896
Register DestReg = VA.getLocReg();
3897
// Avoid a cross-class copy. This is very unlikely.
3898
if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3899
return false;
3900
3901
EVT RVEVT = TLI.getValueType(DL, RV->getType());
3902
if (!RVEVT.isSimple())
3903
return false;
3904
3905
// Vectors (of > 1 lane) in big endian need tricky handling.
3906
if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3907
!Subtarget->isLittleEndian())
3908
return false;
3909
3910
MVT RVVT = RVEVT.getSimpleVT();
3911
if (RVVT == MVT::f128)
3912
return false;
3913
3914
MVT DestVT = VA.getValVT();
3915
// Special handling for extended integers.
3916
if (RVVT != DestVT) {
3917
if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3918
return false;
3919
3920
if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3921
return false;
3922
3923
bool IsZExt = Outs[0].Flags.isZExt();
3924
SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3925
if (SrcReg == 0)
3926
return false;
3927
}
3928
3929
// "Callee" (i.e. value producer) zero extends pointers at function
3930
// boundary.
3931
if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3932
SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3933
3934
// Make the copy.
3935
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3936
TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3937
3938
// Add register to return instruction.
3939
RetRegs.push_back(VA.getLocReg());
3940
}
3941
3942
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3943
TII.get(AArch64::RET_ReallyLR));
3944
for (unsigned RetReg : RetRegs)
3945
MIB.addReg(RetReg, RegState::Implicit);
3946
return true;
3947
}
3948
3949
bool AArch64FastISel::selectTrunc(const Instruction *I) {
3950
Type *DestTy = I->getType();
3951
Value *Op = I->getOperand(0);
3952
Type *SrcTy = Op->getType();
3953
3954
EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3955
EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3956
if (!SrcEVT.isSimple())
3957
return false;
3958
if (!DestEVT.isSimple())
3959
return false;
3960
3961
MVT SrcVT = SrcEVT.getSimpleVT();
3962
MVT DestVT = DestEVT.getSimpleVT();
3963
3964
if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3965
SrcVT != MVT::i8)
3966
return false;
3967
if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3968
DestVT != MVT::i1)
3969
return false;
3970
3971
Register SrcReg = getRegForValue(Op);
3972
if (!SrcReg)
3973
return false;
3974
3975
// If we're truncating from i64 to a smaller non-legal type then generate an
3976
// AND. Otherwise, we know the high bits are undefined and a truncate only
3977
// generate a COPY. We cannot mark the source register also as result
3978
// register, because this can incorrectly transfer the kill flag onto the
3979
// source register.
3980
unsigned ResultReg;
3981
if (SrcVT == MVT::i64) {
3982
uint64_t Mask = 0;
3983
switch (DestVT.SimpleTy) {
3984
default:
3985
// Trunc i64 to i32 is handled by the target-independent fast-isel.
3986
return false;
3987
case MVT::i1:
3988
Mask = 0x1;
3989
break;
3990
case MVT::i8:
3991
Mask = 0xff;
3992
break;
3993
case MVT::i16:
3994
Mask = 0xffff;
3995
break;
3996
}
3997
// Issue an extract_subreg to get the lower 32-bits.
3998
Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3999
AArch64::sub_32);
4000
// Create the AND instruction which performs the actual truncation.
4001
ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4002
assert(ResultReg && "Unexpected AND instruction emission failure.");
4003
} else {
4004
ResultReg = createResultReg(&AArch64::GPR32RegClass);
4005
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4006
TII.get(TargetOpcode::COPY), ResultReg)
4007
.addReg(SrcReg);
4008
}
4009
4010
updateValueMap(I, ResultReg);
4011
return true;
4012
}
4013
4014
unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4015
assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4016
DestVT == MVT::i64) &&
4017
"Unexpected value type.");
4018
// Handle i8 and i16 as i32.
4019
if (DestVT == MVT::i8 || DestVT == MVT::i16)
4020
DestVT = MVT::i32;
4021
4022
if (IsZExt) {
4023
unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4024
assert(ResultReg && "Unexpected AND instruction emission failure.");
4025
if (DestVT == MVT::i64) {
4026
// We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4027
// upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4028
Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4029
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4030
TII.get(AArch64::SUBREG_TO_REG), Reg64)
4031
.addImm(0)
4032
.addReg(ResultReg)
4033
.addImm(AArch64::sub_32);
4034
ResultReg = Reg64;
4035
}
4036
return ResultReg;
4037
} else {
4038
if (DestVT == MVT::i64) {
4039
// FIXME: We're SExt i1 to i64.
4040
return 0;
4041
}
4042
return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043
0, 0);
4044
}
4045
}
4046
4047
unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4048
unsigned Opc, ZReg;
4049
switch (RetVT.SimpleTy) {
4050
default: return 0;
4051
case MVT::i8:
4052
case MVT::i16:
4053
case MVT::i32:
4054
RetVT = MVT::i32;
4055
Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056
case MVT::i64:
4057
Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058
}
4059
4060
const TargetRegisterClass *RC =
4061
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062
return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063
}
4064
4065
unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066
if (RetVT != MVT::i64)
4067
return 0;
4068
4069
return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070
Op0, Op1, AArch64::XZR);
4071
}
4072
4073
unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4074
if (RetVT != MVT::i64)
4075
return 0;
4076
4077
return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078
Op0, Op1, AArch64::XZR);
4079
}
4080
4081
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4082
unsigned Op1Reg) {
4083
unsigned Opc = 0;
4084
bool NeedTrunc = false;
4085
uint64_t Mask = 0;
4086
switch (RetVT.SimpleTy) {
4087
default: return 0;
4088
case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4089
case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4090
case MVT::i32: Opc = AArch64::LSLVWr; break;
4091
case MVT::i64: Opc = AArch64::LSLVXr; break;
4092
}
4093
4094
const TargetRegisterClass *RC =
4095
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4096
if (NeedTrunc)
4097
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4098
4099
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4100
if (NeedTrunc)
4101
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4102
return ResultReg;
4103
}
4104
4105
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4106
uint64_t Shift, bool IsZExt) {
4107
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4108
"Unexpected source/return type pair.");
4109
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4110
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4111
"Unexpected source value type.");
4112
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4113
RetVT == MVT::i64) && "Unexpected return value type.");
4114
4115
bool Is64Bit = (RetVT == MVT::i64);
4116
unsigned RegSize = Is64Bit ? 64 : 32;
4117
unsigned DstBits = RetVT.getSizeInBits();
4118
unsigned SrcBits = SrcVT.getSizeInBits();
4119
const TargetRegisterClass *RC =
4120
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121
4122
// Just emit a copy for "zero" shifts.
4123
if (Shift == 0) {
4124
if (RetVT == SrcVT) {
4125
Register ResultReg = createResultReg(RC);
4126
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4127
TII.get(TargetOpcode::COPY), ResultReg)
4128
.addReg(Op0);
4129
return ResultReg;
4130
} else
4131
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4132
}
4133
4134
// Don't deal with undefined shifts.
4135
if (Shift >= DstBits)
4136
return 0;
4137
4138
// For immediate shifts we can fold the zero-/sign-extension into the shift.
4139
// {S|U}BFM Wd, Wn, #r, #s
4140
// Wd<32+s-r,32-r> = Wn<s:0> when r > s
4141
4142
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143
// %2 = shl i16 %1, 4
4144
// Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4145
// 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4146
// 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4147
// 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4148
4149
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4150
// %2 = shl i16 %1, 8
4151
// Wd<32+7-24,32-24> = Wn<7:0>
4152
// 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4153
// 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4154
// 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4155
4156
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157
// %2 = shl i16 %1, 12
4158
// Wd<32+3-20,32-20> = Wn<3:0>
4159
// 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4160
// 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4161
// 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4162
4163
unsigned ImmR = RegSize - Shift;
4164
// Limit the width to the length of the source type.
4165
unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4166
static const unsigned OpcTable[2][2] = {
4167
{AArch64::SBFMWri, AArch64::SBFMXri},
4168
{AArch64::UBFMWri, AArch64::UBFMXri}
4169
};
4170
unsigned Opc = OpcTable[IsZExt][Is64Bit];
4171
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4172
Register TmpReg = MRI.createVirtualRegister(RC);
4173
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4174
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4175
.addImm(0)
4176
.addReg(Op0)
4177
.addImm(AArch64::sub_32);
4178
Op0 = TmpReg;
4179
}
4180
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4181
}
4182
4183
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4184
unsigned Op1Reg) {
4185
unsigned Opc = 0;
4186
bool NeedTrunc = false;
4187
uint64_t Mask = 0;
4188
switch (RetVT.SimpleTy) {
4189
default: return 0;
4190
case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4191
case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192
case MVT::i32: Opc = AArch64::LSRVWr; break;
4193
case MVT::i64: Opc = AArch64::LSRVXr; break;
4194
}
4195
4196
const TargetRegisterClass *RC =
4197
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198
if (NeedTrunc) {
4199
Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4200
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4201
}
4202
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4203
if (NeedTrunc)
4204
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4205
return ResultReg;
4206
}
4207
4208
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4209
uint64_t Shift, bool IsZExt) {
4210
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4211
"Unexpected source/return type pair.");
4212
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4213
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4214
"Unexpected source value type.");
4215
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4216
RetVT == MVT::i64) && "Unexpected return value type.");
4217
4218
bool Is64Bit = (RetVT == MVT::i64);
4219
unsigned RegSize = Is64Bit ? 64 : 32;
4220
unsigned DstBits = RetVT.getSizeInBits();
4221
unsigned SrcBits = SrcVT.getSizeInBits();
4222
const TargetRegisterClass *RC =
4223
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4224
4225
// Just emit a copy for "zero" shifts.
4226
if (Shift == 0) {
4227
if (RetVT == SrcVT) {
4228
Register ResultReg = createResultReg(RC);
4229
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4230
TII.get(TargetOpcode::COPY), ResultReg)
4231
.addReg(Op0);
4232
return ResultReg;
4233
} else
4234
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4235
}
4236
4237
// Don't deal with undefined shifts.
4238
if (Shift >= DstBits)
4239
return 0;
4240
4241
// For immediate shifts we can fold the zero-/sign-extension into the shift.
4242
// {S|U}BFM Wd, Wn, #r, #s
4243
// Wd<s-r:0> = Wn<s:r> when r <= s
4244
4245
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246
// %2 = lshr i16 %1, 4
4247
// Wd<7-4:0> = Wn<7:4>
4248
// 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4249
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4250
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4251
4252
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253
// %2 = lshr i16 %1, 8
4254
// Wd<7-7,0> = Wn<7:7>
4255
// 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4256
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4257
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4258
4259
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260
// %2 = lshr i16 %1, 12
4261
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4262
// 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4263
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265
4266
if (Shift >= SrcBits && IsZExt)
4267
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4268
4269
// It is not possible to fold a sign-extend into the LShr instruction. In this
4270
// case emit a sign-extend.
4271
if (!IsZExt) {
4272
Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273
if (!Op0)
4274
return 0;
4275
SrcVT = RetVT;
4276
SrcBits = SrcVT.getSizeInBits();
4277
IsZExt = true;
4278
}
4279
4280
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4281
unsigned ImmS = SrcBits - 1;
4282
static const unsigned OpcTable[2][2] = {
4283
{AArch64::SBFMWri, AArch64::SBFMXri},
4284
{AArch64::UBFMWri, AArch64::UBFMXri}
4285
};
4286
unsigned Opc = OpcTable[IsZExt][Is64Bit];
4287
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4288
Register TmpReg = MRI.createVirtualRegister(RC);
4289
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4290
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4291
.addImm(0)
4292
.addReg(Op0)
4293
.addImm(AArch64::sub_32);
4294
Op0 = TmpReg;
4295
}
4296
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4297
}
4298
4299
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4300
unsigned Op1Reg) {
4301
unsigned Opc = 0;
4302
bool NeedTrunc = false;
4303
uint64_t Mask = 0;
4304
switch (RetVT.SimpleTy) {
4305
default: return 0;
4306
case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4307
case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308
case MVT::i32: Opc = AArch64::ASRVWr; break;
4309
case MVT::i64: Opc = AArch64::ASRVXr; break;
4310
}
4311
4312
const TargetRegisterClass *RC =
4313
(RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314
if (NeedTrunc) {
4315
Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316
Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317
}
4318
Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319
if (NeedTrunc)
4320
ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321
return ResultReg;
4322
}
4323
4324
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4325
uint64_t Shift, bool IsZExt) {
4326
assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327
"Unexpected source/return type pair.");
4328
assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329
SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330
"Unexpected source value type.");
4331
assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332
RetVT == MVT::i64) && "Unexpected return value type.");
4333
4334
bool Is64Bit = (RetVT == MVT::i64);
4335
unsigned RegSize = Is64Bit ? 64 : 32;
4336
unsigned DstBits = RetVT.getSizeInBits();
4337
unsigned SrcBits = SrcVT.getSizeInBits();
4338
const TargetRegisterClass *RC =
4339
Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340
4341
// Just emit a copy for "zero" shifts.
4342
if (Shift == 0) {
4343
if (RetVT == SrcVT) {
4344
Register ResultReg = createResultReg(RC);
4345
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346
TII.get(TargetOpcode::COPY), ResultReg)
4347
.addReg(Op0);
4348
return ResultReg;
4349
} else
4350
return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351
}
4352
4353
// Don't deal with undefined shifts.
4354
if (Shift >= DstBits)
4355
return 0;
4356
4357
// For immediate shifts we can fold the zero-/sign-extension into the shift.
4358
// {S|U}BFM Wd, Wn, #r, #s
4359
// Wd<s-r:0> = Wn<s:r> when r <= s
4360
4361
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362
// %2 = ashr i16 %1, 4
4363
// Wd<7-4:0> = Wn<7:4>
4364
// 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365
// 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366
// 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367
4368
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369
// %2 = ashr i16 %1, 8
4370
// Wd<7-7,0> = Wn<7:7>
4371
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374
4375
// %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376
// %2 = ashr i16 %1, 12
4377
// Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378
// 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379
// 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380
// 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381
4382
if (Shift >= SrcBits && IsZExt)
4383
return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384
4385
unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386
unsigned ImmS = SrcBits - 1;
4387
static const unsigned OpcTable[2][2] = {
4388
{AArch64::SBFMWri, AArch64::SBFMXri},
4389
{AArch64::UBFMWri, AArch64::UBFMXri}
4390
};
4391
unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392
if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393
Register TmpReg = MRI.createVirtualRegister(RC);
4394
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395
TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396
.addImm(0)
4397
.addReg(Op0)
4398
.addImm(AArch64::sub_32);
4399
Op0 = TmpReg;
4400
}
4401
return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402
}
4403
4404
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4405
bool IsZExt) {
4406
assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407
4408
// FastISel does not have plumbing to deal with extensions where the SrcVT or
4409
// DestVT are odd things, so test to make sure that they are both types we can
4410
// handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411
// bail out to SelectionDAG.
4412
if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413
(DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414
((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4415
(SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4416
return 0;
4417
4418
unsigned Opc;
4419
unsigned Imm = 0;
4420
4421
switch (SrcVT.SimpleTy) {
4422
default:
4423
return 0;
4424
case MVT::i1:
4425
return emiti1Ext(SrcReg, DestVT, IsZExt);
4426
case MVT::i8:
4427
if (DestVT == MVT::i64)
4428
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429
else
4430
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431
Imm = 7;
4432
break;
4433
case MVT::i16:
4434
if (DestVT == MVT::i64)
4435
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436
else
4437
Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438
Imm = 15;
4439
break;
4440
case MVT::i32:
4441
assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442
Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443
Imm = 31;
4444
break;
4445
}
4446
4447
// Handle i8 and i16 as i32.
4448
if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449
DestVT = MVT::i32;
4450
else if (DestVT == MVT::i64) {
4451
Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453
TII.get(AArch64::SUBREG_TO_REG), Src64)
4454
.addImm(0)
4455
.addReg(SrcReg)
4456
.addImm(AArch64::sub_32);
4457
SrcReg = Src64;
4458
}
4459
4460
const TargetRegisterClass *RC =
4461
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462
return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463
}
4464
4465
static bool isZExtLoad(const MachineInstr *LI) {
4466
switch (LI->getOpcode()) {
4467
default:
4468
return false;
4469
case AArch64::LDURBBi:
4470
case AArch64::LDURHHi:
4471
case AArch64::LDURWi:
4472
case AArch64::LDRBBui:
4473
case AArch64::LDRHHui:
4474
case AArch64::LDRWui:
4475
case AArch64::LDRBBroX:
4476
case AArch64::LDRHHroX:
4477
case AArch64::LDRWroX:
4478
case AArch64::LDRBBroW:
4479
case AArch64::LDRHHroW:
4480
case AArch64::LDRWroW:
4481
return true;
4482
}
4483
}
4484
4485
static bool isSExtLoad(const MachineInstr *LI) {
4486
switch (LI->getOpcode()) {
4487
default:
4488
return false;
4489
case AArch64::LDURSBWi:
4490
case AArch64::LDURSHWi:
4491
case AArch64::LDURSBXi:
4492
case AArch64::LDURSHXi:
4493
case AArch64::LDURSWi:
4494
case AArch64::LDRSBWui:
4495
case AArch64::LDRSHWui:
4496
case AArch64::LDRSBXui:
4497
case AArch64::LDRSHXui:
4498
case AArch64::LDRSWui:
4499
case AArch64::LDRSBWroX:
4500
case AArch64::LDRSHWroX:
4501
case AArch64::LDRSBXroX:
4502
case AArch64::LDRSHXroX:
4503
case AArch64::LDRSWroX:
4504
case AArch64::LDRSBWroW:
4505
case AArch64::LDRSHWroW:
4506
case AArch64::LDRSBXroW:
4507
case AArch64::LDRSHXroW:
4508
case AArch64::LDRSWroW:
4509
return true;
4510
}
4511
}
4512
4513
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514
MVT SrcVT) {
4515
const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516
if (!LI || !LI->hasOneUse())
4517
return false;
4518
4519
// Check if the load instruction has already been selected.
4520
Register Reg = lookUpRegForValue(LI);
4521
if (!Reg)
4522
return false;
4523
4524
MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525
if (!MI)
4526
return false;
4527
4528
// Check if the correct load instruction has been emitted - SelectionDAG might
4529
// have emitted a zero-extending load, but we need a sign-extending load.
4530
bool IsZExt = isa<ZExtInst>(I);
4531
const auto *LoadMI = MI;
4532
if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533
LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534
Register LoadReg = MI->getOperand(1).getReg();
4535
LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536
assert(LoadMI && "Expected valid instruction");
4537
}
4538
if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539
return false;
4540
4541
// Nothing to be done.
4542
if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543
updateValueMap(I, Reg);
4544
return true;
4545
}
4546
4547
if (IsZExt) {
4548
Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550
TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551
.addImm(0)
4552
.addReg(Reg, getKillRegState(true))
4553
.addImm(AArch64::sub_32);
4554
Reg = Reg64;
4555
} else {
4556
assert((MI->getOpcode() == TargetOpcode::COPY &&
4557
MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558
"Expected copy instruction");
4559
Reg = MI->getOperand(1).getReg();
4560
MachineBasicBlock::iterator I(MI);
4561
removeDeadCode(I, std::next(I));
4562
}
4563
updateValueMap(I, Reg);
4564
return true;
4565
}
4566
4567
bool AArch64FastISel::selectIntExt(const Instruction *I) {
4568
assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4569
"Unexpected integer extend instruction.");
4570
MVT RetVT;
4571
MVT SrcVT;
4572
if (!isTypeSupported(I->getType(), RetVT))
4573
return false;
4574
4575
if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576
return false;
4577
4578
// Try to optimize already sign-/zero-extended values from load instructions.
4579
if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580
return true;
4581
4582
Register SrcReg = getRegForValue(I->getOperand(0));
4583
if (!SrcReg)
4584
return false;
4585
4586
// Try to optimize already sign-/zero-extended values from function arguments.
4587
bool IsZExt = isa<ZExtInst>(I);
4588
if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589
if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590
if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591
Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593
TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594
.addImm(0)
4595
.addReg(SrcReg)
4596
.addImm(AArch64::sub_32);
4597
SrcReg = ResultReg;
4598
}
4599
4600
updateValueMap(I, SrcReg);
4601
return true;
4602
}
4603
}
4604
4605
unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606
if (!ResultReg)
4607
return false;
4608
4609
updateValueMap(I, ResultReg);
4610
return true;
4611
}
4612
4613
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614
EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615
if (!DestEVT.isSimple())
4616
return false;
4617
4618
MVT DestVT = DestEVT.getSimpleVT();
4619
if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620
return false;
4621
4622
unsigned DivOpc;
4623
bool Is64bit = (DestVT == MVT::i64);
4624
switch (ISDOpcode) {
4625
default:
4626
return false;
4627
case ISD::SREM:
4628
DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629
break;
4630
case ISD::UREM:
4631
DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632
break;
4633
}
4634
unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635
Register Src0Reg = getRegForValue(I->getOperand(0));
4636
if (!Src0Reg)
4637
return false;
4638
4639
Register Src1Reg = getRegForValue(I->getOperand(1));
4640
if (!Src1Reg)
4641
return false;
4642
4643
const TargetRegisterClass *RC =
4644
(DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645
Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646
assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647
// The remainder is computed as numerator - (quotient * denominator) using the
4648
// MSUB instruction.
4649
Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650
updateValueMap(I, ResultReg);
4651
return true;
4652
}
4653
4654
bool AArch64FastISel::selectMul(const Instruction *I) {
4655
MVT VT;
4656
if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657
return false;
4658
4659
if (VT.isVector())
4660
return selectBinaryOp(I, ISD::MUL);
4661
4662
const Value *Src0 = I->getOperand(0);
4663
const Value *Src1 = I->getOperand(1);
4664
if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665
if (C->getValue().isPowerOf2())
4666
std::swap(Src0, Src1);
4667
4668
// Try to simplify to a shift instruction.
4669
if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670
if (C->getValue().isPowerOf2()) {
4671
uint64_t ShiftVal = C->getValue().logBase2();
4672
MVT SrcVT = VT;
4673
bool IsZExt = true;
4674
if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675
if (!isIntExtFree(ZExt)) {
4676
MVT VT;
4677
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678
SrcVT = VT;
4679
IsZExt = true;
4680
Src0 = ZExt->getOperand(0);
4681
}
4682
}
4683
} else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684
if (!isIntExtFree(SExt)) {
4685
MVT VT;
4686
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687
SrcVT = VT;
4688
IsZExt = false;
4689
Src0 = SExt->getOperand(0);
4690
}
4691
}
4692
}
4693
4694
Register Src0Reg = getRegForValue(Src0);
4695
if (!Src0Reg)
4696
return false;
4697
4698
unsigned ResultReg =
4699
emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4700
4701
if (ResultReg) {
4702
updateValueMap(I, ResultReg);
4703
return true;
4704
}
4705
}
4706
4707
Register Src0Reg = getRegForValue(I->getOperand(0));
4708
if (!Src0Reg)
4709
return false;
4710
4711
Register Src1Reg = getRegForValue(I->getOperand(1));
4712
if (!Src1Reg)
4713
return false;
4714
4715
unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4716
4717
if (!ResultReg)
4718
return false;
4719
4720
updateValueMap(I, ResultReg);
4721
return true;
4722
}
4723
4724
bool AArch64FastISel::selectShift(const Instruction *I) {
4725
MVT RetVT;
4726
if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4727
return false;
4728
4729
if (RetVT.isVector())
4730
return selectOperator(I, I->getOpcode());
4731
4732
if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4733
unsigned ResultReg = 0;
4734
uint64_t ShiftVal = C->getZExtValue();
4735
MVT SrcVT = RetVT;
4736
bool IsZExt = I->getOpcode() != Instruction::AShr;
4737
const Value *Op0 = I->getOperand(0);
4738
if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4739
if (!isIntExtFree(ZExt)) {
4740
MVT TmpVT;
4741
if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4742
SrcVT = TmpVT;
4743
IsZExt = true;
4744
Op0 = ZExt->getOperand(0);
4745
}
4746
}
4747
} else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4748
if (!isIntExtFree(SExt)) {
4749
MVT TmpVT;
4750
if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4751
SrcVT = TmpVT;
4752
IsZExt = false;
4753
Op0 = SExt->getOperand(0);
4754
}
4755
}
4756
}
4757
4758
Register Op0Reg = getRegForValue(Op0);
4759
if (!Op0Reg)
4760
return false;
4761
4762
switch (I->getOpcode()) {
4763
default: llvm_unreachable("Unexpected instruction.");
4764
case Instruction::Shl:
4765
ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4766
break;
4767
case Instruction::AShr:
4768
ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769
break;
4770
case Instruction::LShr:
4771
ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4772
break;
4773
}
4774
if (!ResultReg)
4775
return false;
4776
4777
updateValueMap(I, ResultReg);
4778
return true;
4779
}
4780
4781
Register Op0Reg = getRegForValue(I->getOperand(0));
4782
if (!Op0Reg)
4783
return false;
4784
4785
Register Op1Reg = getRegForValue(I->getOperand(1));
4786
if (!Op1Reg)
4787
return false;
4788
4789
unsigned ResultReg = 0;
4790
switch (I->getOpcode()) {
4791
default: llvm_unreachable("Unexpected instruction.");
4792
case Instruction::Shl:
4793
ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4794
break;
4795
case Instruction::AShr:
4796
ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4797
break;
4798
case Instruction::LShr:
4799
ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4800
break;
4801
}
4802
4803
if (!ResultReg)
4804
return false;
4805
4806
updateValueMap(I, ResultReg);
4807
return true;
4808
}
4809
4810
bool AArch64FastISel::selectBitCast(const Instruction *I) {
4811
MVT RetVT, SrcVT;
4812
4813
if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4814
return false;
4815
if (!isTypeLegal(I->getType(), RetVT))
4816
return false;
4817
4818
unsigned Opc;
4819
if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4820
Opc = AArch64::FMOVWSr;
4821
else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4822
Opc = AArch64::FMOVXDr;
4823
else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4824
Opc = AArch64::FMOVSWr;
4825
else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4826
Opc = AArch64::FMOVDXr;
4827
else
4828
return false;
4829
4830
const TargetRegisterClass *RC = nullptr;
4831
switch (RetVT.SimpleTy) {
4832
default: llvm_unreachable("Unexpected value type.");
4833
case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4834
case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4835
case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4836
case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4837
}
4838
Register Op0Reg = getRegForValue(I->getOperand(0));
4839
if (!Op0Reg)
4840
return false;
4841
4842
Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4843
if (!ResultReg)
4844
return false;
4845
4846
updateValueMap(I, ResultReg);
4847
return true;
4848
}
4849
4850
bool AArch64FastISel::selectFRem(const Instruction *I) {
4851
MVT RetVT;
4852
if (!isTypeLegal(I->getType(), RetVT))
4853
return false;
4854
4855
RTLIB::Libcall LC;
4856
switch (RetVT.SimpleTy) {
4857
default:
4858
return false;
4859
case MVT::f32:
4860
LC = RTLIB::REM_F32;
4861
break;
4862
case MVT::f64:
4863
LC = RTLIB::REM_F64;
4864
break;
4865
}
4866
4867
ArgListTy Args;
4868
Args.reserve(I->getNumOperands());
4869
4870
// Populate the argument list.
4871
for (auto &Arg : I->operands()) {
4872
ArgListEntry Entry;
4873
Entry.Val = Arg;
4874
Entry.Ty = Arg->getType();
4875
Args.push_back(Entry);
4876
}
4877
4878
CallLoweringInfo CLI;
4879
MCContext &Ctx = MF->getContext();
4880
CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4881
TLI.getLibcallName(LC), std::move(Args));
4882
if (!lowerCallTo(CLI))
4883
return false;
4884
updateValueMap(I, CLI.ResultReg);
4885
return true;
4886
}
4887
4888
bool AArch64FastISel::selectSDiv(const Instruction *I) {
4889
MVT VT;
4890
if (!isTypeLegal(I->getType(), VT))
4891
return false;
4892
4893
if (!isa<ConstantInt>(I->getOperand(1)))
4894
return selectBinaryOp(I, ISD::SDIV);
4895
4896
const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4897
if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4898
!(C.isPowerOf2() || C.isNegatedPowerOf2()))
4899
return selectBinaryOp(I, ISD::SDIV);
4900
4901
unsigned Lg2 = C.countr_zero();
4902
Register Src0Reg = getRegForValue(I->getOperand(0));
4903
if (!Src0Reg)
4904
return false;
4905
4906
if (cast<BinaryOperator>(I)->isExact()) {
4907
unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4908
if (!ResultReg)
4909
return false;
4910
updateValueMap(I, ResultReg);
4911
return true;
4912
}
4913
4914
int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4915
unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4916
if (!AddReg)
4917
return false;
4918
4919
// (Src0 < 0) ? Pow2 - 1 : 0;
4920
if (!emitICmp_ri(VT, Src0Reg, 0))
4921
return false;
4922
4923
unsigned SelectOpc;
4924
const TargetRegisterClass *RC;
4925
if (VT == MVT::i64) {
4926
SelectOpc = AArch64::CSELXr;
4927
RC = &AArch64::GPR64RegClass;
4928
} else {
4929
SelectOpc = AArch64::CSELWr;
4930
RC = &AArch64::GPR32RegClass;
4931
}
4932
Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4933
AArch64CC::LT);
4934
if (!SelectReg)
4935
return false;
4936
4937
// Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4938
// negate the result.
4939
unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4940
unsigned ResultReg;
4941
if (C.isNegative())
4942
ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4943
AArch64_AM::ASR, Lg2);
4944
else
4945
ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4946
4947
if (!ResultReg)
4948
return false;
4949
4950
updateValueMap(I, ResultReg);
4951
return true;
4952
}
4953
4954
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4955
/// have to duplicate it for AArch64, because otherwise we would fail during the
4956
/// sign-extend emission.
4957
unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4958
Register IdxN = getRegForValue(Idx);
4959
if (IdxN == 0)
4960
// Unhandled operand. Halt "fast" selection and bail.
4961
return 0;
4962
4963
// If the index is smaller or larger than intptr_t, truncate or extend it.
4964
MVT PtrVT = TLI.getPointerTy(DL);
4965
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4966
if (IdxVT.bitsLT(PtrVT)) {
4967
IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4968
} else if (IdxVT.bitsGT(PtrVT))
4969
llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4970
return IdxN;
4971
}
4972
4973
/// This is mostly a copy of the existing FastISel GEP code, but we have to
4974
/// duplicate it for AArch64, because otherwise we would bail out even for
4975
/// simple cases. This is because the standard fastEmit functions don't cover
4976
/// MUL at all and ADD is lowered very inefficientily.
4977
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4978
if (Subtarget->isTargetILP32())
4979
return false;
4980
4981
Register N = getRegForValue(I->getOperand(0));
4982
if (!N)
4983
return false;
4984
4985
// Keep a running tab of the total offset to coalesce multiple N = N + Offset
4986
// into a single N = N + TotalOffset.
4987
uint64_t TotalOffs = 0;
4988
MVT VT = TLI.getPointerTy(DL);
4989
for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4990
GTI != E; ++GTI) {
4991
const Value *Idx = GTI.getOperand();
4992
if (auto *StTy = GTI.getStructTypeOrNull()) {
4993
unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4994
// N = N + Offset
4995
if (Field)
4996
TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4997
} else {
4998
// If this is a constant subscript, handle it quickly.
4999
if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5000
if (CI->isZero())
5001
continue;
5002
// N = N + Offset
5003
TotalOffs += GTI.getSequentialElementStride(DL) *
5004
cast<ConstantInt>(CI)->getSExtValue();
5005
continue;
5006
}
5007
if (TotalOffs) {
5008
N = emitAdd_ri_(VT, N, TotalOffs);
5009
if (!N)
5010
return false;
5011
TotalOffs = 0;
5012
}
5013
5014
// N = N + Idx * ElementSize;
5015
uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5016
unsigned IdxN = getRegForGEPIndex(Idx);
5017
if (!IdxN)
5018
return false;
5019
5020
if (ElementSize != 1) {
5021
unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5022
if (!C)
5023
return false;
5024
IdxN = emitMul_rr(VT, IdxN, C);
5025
if (!IdxN)
5026
return false;
5027
}
5028
N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5029
if (!N)
5030
return false;
5031
}
5032
}
5033
if (TotalOffs) {
5034
N = emitAdd_ri_(VT, N, TotalOffs);
5035
if (!N)
5036
return false;
5037
}
5038
updateValueMap(I, N);
5039
return true;
5040
}
5041
5042
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5043
assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5044
"cmpxchg survived AtomicExpand at optlevel > -O0");
5045
5046
auto *RetPairTy = cast<StructType>(I->getType());
5047
Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5048
assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5049
"cmpxchg has a non-i1 status result");
5050
5051
MVT VT;
5052
if (!isTypeLegal(RetTy, VT))
5053
return false;
5054
5055
const TargetRegisterClass *ResRC;
5056
unsigned Opc, CmpOpc;
5057
// This only supports i32/i64, because i8/i16 aren't legal, and the generic
5058
// extractvalue selection doesn't support that.
5059
if (VT == MVT::i32) {
5060
Opc = AArch64::CMP_SWAP_32;
5061
CmpOpc = AArch64::SUBSWrs;
5062
ResRC = &AArch64::GPR32RegClass;
5063
} else if (VT == MVT::i64) {
5064
Opc = AArch64::CMP_SWAP_64;
5065
CmpOpc = AArch64::SUBSXrs;
5066
ResRC = &AArch64::GPR64RegClass;
5067
} else {
5068
return false;
5069
}
5070
5071
const MCInstrDesc &II = TII.get(Opc);
5072
5073
const Register AddrReg = constrainOperandRegClass(
5074
II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075
const Register DesiredReg = constrainOperandRegClass(
5076
II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077
const Register NewReg = constrainOperandRegClass(
5078
II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5079
5080
const Register ResultReg1 = createResultReg(ResRC);
5081
const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5082
const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5083
5084
// FIXME: MachineMemOperand doesn't support cmpxchg yet.
5085
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5086
.addDef(ResultReg1)
5087
.addDef(ScratchReg)
5088
.addUse(AddrReg)
5089
.addUse(DesiredReg)
5090
.addUse(NewReg);
5091
5092
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5093
.addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5094
.addUse(ResultReg1)
5095
.addUse(DesiredReg)
5096
.addImm(0);
5097
5098
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5099
.addDef(ResultReg2)
5100
.addUse(AArch64::WZR)
5101
.addUse(AArch64::WZR)
5102
.addImm(AArch64CC::NE);
5103
5104
assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5105
updateValueMap(I, ResultReg1, 2);
5106
return true;
5107
}
5108
5109
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5110
if (TLI.fallBackToDAGISel(*I))
5111
return false;
5112
switch (I->getOpcode()) {
5113
default:
5114
break;
5115
case Instruction::Add:
5116
case Instruction::Sub:
5117
return selectAddSub(I);
5118
case Instruction::Mul:
5119
return selectMul(I);
5120
case Instruction::SDiv:
5121
return selectSDiv(I);
5122
case Instruction::SRem:
5123
if (!selectBinaryOp(I, ISD::SREM))
5124
return selectRem(I, ISD::SREM);
5125
return true;
5126
case Instruction::URem:
5127
if (!selectBinaryOp(I, ISD::UREM))
5128
return selectRem(I, ISD::UREM);
5129
return true;
5130
case Instruction::Shl:
5131
case Instruction::LShr:
5132
case Instruction::AShr:
5133
return selectShift(I);
5134
case Instruction::And:
5135
case Instruction::Or:
5136
case Instruction::Xor:
5137
return selectLogicalOp(I);
5138
case Instruction::Br:
5139
return selectBranch(I);
5140
case Instruction::IndirectBr:
5141
return selectIndirectBr(I);
5142
case Instruction::BitCast:
5143
if (!FastISel::selectBitCast(I))
5144
return selectBitCast(I);
5145
return true;
5146
case Instruction::FPToSI:
5147
if (!selectCast(I, ISD::FP_TO_SINT))
5148
return selectFPToInt(I, /*Signed=*/true);
5149
return true;
5150
case Instruction::FPToUI:
5151
return selectFPToInt(I, /*Signed=*/false);
5152
case Instruction::ZExt:
5153
case Instruction::SExt:
5154
return selectIntExt(I);
5155
case Instruction::Trunc:
5156
if (!selectCast(I, ISD::TRUNCATE))
5157
return selectTrunc(I);
5158
return true;
5159
case Instruction::FPExt:
5160
return selectFPExt(I);
5161
case Instruction::FPTrunc:
5162
return selectFPTrunc(I);
5163
case Instruction::SIToFP:
5164
if (!selectCast(I, ISD::SINT_TO_FP))
5165
return selectIntToFP(I, /*Signed=*/true);
5166
return true;
5167
case Instruction::UIToFP:
5168
return selectIntToFP(I, /*Signed=*/false);
5169
case Instruction::Load:
5170
return selectLoad(I);
5171
case Instruction::Store:
5172
return selectStore(I);
5173
case Instruction::FCmp:
5174
case Instruction::ICmp:
5175
return selectCmp(I);
5176
case Instruction::Select:
5177
return selectSelect(I);
5178
case Instruction::Ret:
5179
return selectRet(I);
5180
case Instruction::FRem:
5181
return selectFRem(I);
5182
case Instruction::GetElementPtr:
5183
return selectGetElementPtr(I);
5184
case Instruction::AtomicCmpXchg:
5185
return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5186
}
5187
5188
// fall-back to target-independent instruction selection.
5189
return selectOperator(I, I->getOpcode());
5190
}
5191
5192
FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5193
const TargetLibraryInfo *LibInfo) {
5194
5195
SMEAttrs CallerAttrs(*FuncInfo.Fn);
5196
if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197
CallerAttrs.hasStreamingInterfaceOrBody() ||
5198
CallerAttrs.hasStreamingCompatibleInterface())
5199
return nullptr;
5200
return new AArch64FastISel(FuncInfo, LibInfo);
5201
}
5202
5203