CoCalc -- AArch64FastISel.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp
³⁵²⁶⁸ views
1
//===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the AArch64-specific support for the FastISel class. Some
10
// of the target-specific code is generated by tablegen in the file
11
// AArch64GenFastISel.inc, which is #included here.
12
//
13
//===----------------------------------------------------------------------===//
14

15
#include "AArch64.h"
16
#include "AArch64CallingConvention.h"
17
#include "AArch64MachineFunctionInfo.h"
18
#include "AArch64RegisterInfo.h"
19
#include "AArch64Subtarget.h"
20
#include "MCTargetDesc/AArch64AddressingModes.h"
21
#include "Utils/AArch64BaseInfo.h"
22
#include "llvm/ADT/APFloat.h"
23
#include "llvm/ADT/APInt.h"
24
#include "llvm/ADT/DenseMap.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/Analysis/BranchProbabilityInfo.h"
27
#include "llvm/CodeGen/CallingConvLower.h"
28
#include "llvm/CodeGen/FastISel.h"
29
#include "llvm/CodeGen/FunctionLoweringInfo.h"
30
#include "llvm/CodeGen/ISDOpcodes.h"
31
#include "llvm/CodeGen/MachineBasicBlock.h"
32
#include "llvm/CodeGen/MachineConstantPool.h"
33
#include "llvm/CodeGen/MachineFrameInfo.h"
34
#include "llvm/CodeGen/MachineInstr.h"
35
#include "llvm/CodeGen/MachineInstrBuilder.h"
36
#include "llvm/CodeGen/MachineMemOperand.h"
37
#include "llvm/CodeGen/MachineRegisterInfo.h"
38
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
39
#include "llvm/CodeGen/ValueTypes.h"
40
#include "llvm/CodeGenTypes/MachineValueType.h"
41
#include "llvm/IR/Argument.h"
42
#include "llvm/IR/Attributes.h"
43
#include "llvm/IR/BasicBlock.h"
44
#include "llvm/IR/CallingConv.h"
45
#include "llvm/IR/Constant.h"
46
#include "llvm/IR/Constants.h"
47
#include "llvm/IR/DataLayout.h"
48
#include "llvm/IR/DerivedTypes.h"
49
#include "llvm/IR/Function.h"
50
#include "llvm/IR/GetElementPtrTypeIterator.h"
51
#include "llvm/IR/GlobalValue.h"
52
#include "llvm/IR/InstrTypes.h"
53
#include "llvm/IR/Instruction.h"
54
#include "llvm/IR/Instructions.h"
55
#include "llvm/IR/IntrinsicInst.h"
56
#include "llvm/IR/Intrinsics.h"
57
#include "llvm/IR/IntrinsicsAArch64.h"
58
#include "llvm/IR/Module.h"
59
#include "llvm/IR/Operator.h"
60
#include "llvm/IR/Type.h"
61
#include "llvm/IR/User.h"
62
#include "llvm/IR/Value.h"
63
#include "llvm/MC/MCInstrDesc.h"
64
#include "llvm/MC/MCRegisterInfo.h"
65
#include "llvm/MC/MCSymbol.h"
66
#include "llvm/Support/AtomicOrdering.h"
67
#include "llvm/Support/Casting.h"
68
#include "llvm/Support/CodeGen.h"
69
#include "llvm/Support/Compiler.h"
70
#include "llvm/Support/ErrorHandling.h"
71
#include "llvm/Support/MathExtras.h"
72
#include <algorithm>
73
#include <cassert>
74
#include <cstdint>
75
#include <iterator>
76
#include <utility>
77

78
using namespace llvm;
79

80
namespace {
81

82
class AArch64FastISel final : public FastISel {
83
  class Address {
84
  public:
85
    using BaseKind = enum {
86
      RegBase,
87
      FrameIndexBase
88
    };
89

90
  private:
91
    BaseKind Kind = RegBase;
92
    AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
93
    union {
94
      unsigned Reg;
95
      int FI;
96
    } Base;
97
    unsigned OffsetReg = 0;
98
    unsigned Shift = 0;
99
    int64_t Offset = 0;
100
    const GlobalValue *GV = nullptr;
101

102
  public:
103
    Address() { Base.Reg = 0; }
104

105
    void setKind(BaseKind K) { Kind = K; }
106
    BaseKind getKind() const { return Kind; }
107
    void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
108
    AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
109
    bool isRegBase() const { return Kind == RegBase; }
110
    bool isFIBase() const { return Kind == FrameIndexBase; }
111

112
    void setReg(unsigned Reg) {
113
      assert(isRegBase() && "Invalid base register access!");
114
      Base.Reg = Reg;
115
    }
116

117
    unsigned getReg() const {
118
      assert(isRegBase() && "Invalid base register access!");
119
      return Base.Reg;
120
    }
121

122
    void setOffsetReg(unsigned Reg) {
123
      OffsetReg = Reg;
124
    }
125

126
    unsigned getOffsetReg() const {
127
      return OffsetReg;
128
    }
129

130
    void setFI(unsigned FI) {
131
      assert(isFIBase() && "Invalid base frame index  access!");
132
      Base.FI = FI;
133
    }
134

135
    unsigned getFI() const {
136
      assert(isFIBase() && "Invalid base frame index access!");
137
      return Base.FI;
138
    }
139

140
    void setOffset(int64_t O) { Offset = O; }
141
    int64_t getOffset() { return Offset; }
142
    void setShift(unsigned S) { Shift = S; }
143
    unsigned getShift() { return Shift; }
144

145
    void setGlobalValue(const GlobalValue *G) { GV = G; }
146
    const GlobalValue *getGlobalValue() { return GV; }
147
  };
148

149
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150
  /// make the right decision when generating code for different targets.
151
  const AArch64Subtarget *Subtarget;
152
  LLVMContext *Context;
153

154
  bool fastLowerArguments() override;
155
  bool fastLowerCall(CallLoweringInfo &CLI) override;
156
  bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157

158
private:
159
  // Selection routines.
160
  bool selectAddSub(const Instruction *I);
161
  bool selectLogicalOp(const Instruction *I);
162
  bool selectLoad(const Instruction *I);
163
  bool selectStore(const Instruction *I);
164
  bool selectBranch(const Instruction *I);
165
  bool selectIndirectBr(const Instruction *I);
166
  bool selectCmp(const Instruction *I);
167
  bool selectSelect(const Instruction *I);
168
  bool selectFPExt(const Instruction *I);
169
  bool selectFPTrunc(const Instruction *I);
170
  bool selectFPToInt(const Instruction *I, bool Signed);
171
  bool selectIntToFP(const Instruction *I, bool Signed);
172
  bool selectRem(const Instruction *I, unsigned ISDOpcode);
173
  bool selectRet(const Instruction *I);
174
  bool selectTrunc(const Instruction *I);
175
  bool selectIntExt(const Instruction *I);
176
  bool selectMul(const Instruction *I);
177
  bool selectShift(const Instruction *I);
178
  bool selectBitCast(const Instruction *I);
179
  bool selectFRem(const Instruction *I);
180
  bool selectSDiv(const Instruction *I);
181
  bool selectGetElementPtr(const Instruction *I);
182
  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183

184
  // Utility helper routines.
185
  bool isTypeLegal(Type *Ty, MVT &VT);
186
  bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187
  bool isValueAvailable(const Value *V) const;
188
  bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189
  bool computeCallAddress(const Value *V, Address &Addr);
190
  bool simplifyAddress(Address &Addr, MVT VT);
191
  void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192
                            MachineMemOperand::Flags Flags,
193
                            unsigned ScaleFactor, MachineMemOperand *MMO);
194
  bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195
  bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196
                          MaybeAlign Alignment);
197
  bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198
                         const Value *Cond);
199
  bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200
  bool optimizeSelect(const SelectInst *SI);
201
  unsigned getRegForGEPIndex(const Value *Idx);
202

203
  // Emit helper routines.
204
  unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205
                      const Value *RHS, bool SetFlags = false,
206
                      bool WantResult = true,  bool IsZExt = false);
207
  unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208
                         unsigned RHSReg, bool SetFlags = false,
209
                         bool WantResult = true);
210
  unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211
                         uint64_t Imm, bool SetFlags = false,
212
                         bool WantResult = true);
213
  unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214
                         unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215
                         uint64_t ShiftImm, bool SetFlags = false,
216
                         bool WantResult = true);
217
  unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218
                         unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219
                         uint64_t ShiftImm, bool SetFlags = false,
220
                         bool WantResult = true);
221

222
  // Emit functions.
223
  bool emitCompareAndBranch(const BranchInst *BI);
224
  bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225
  bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226
  bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227
  bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228
  unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229
                    MachineMemOperand *MMO = nullptr);
230
  bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231
                 MachineMemOperand *MMO = nullptr);
232
  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233
                        MachineMemOperand *MMO = nullptr);
234
  unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235
  unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236
  unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237
                   bool SetFlags = false, bool WantResult = true,
238
                   bool IsZExt = false);
239
  unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240
  unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241
                   bool SetFlags = false, bool WantResult = true,
242
                   bool IsZExt = false);
243
  unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244
                       bool WantResult = true);
245
  unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246
                       AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247
                       bool WantResult = true);
248
  unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249
                         const Value *RHS);
250
  unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251
                            uint64_t Imm);
252
  unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253
                            unsigned RHSReg, uint64_t ShiftImm);
254
  unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255
  unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256
  unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257
  unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258
  unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259
  unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260
                      bool IsZExt = true);
261
  unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262
  unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263
                      bool IsZExt = true);
264
  unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265
  unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266
                      bool IsZExt = false);
267

268
  unsigned materializeInt(const ConstantInt *CI, MVT VT);
269
  unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270
  unsigned materializeGV(const GlobalValue *GV);
271

272
  // Call handling routines.
273
private:
274
  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275
  bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276
                       unsigned &NumBytes);
277
  bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278

279
public:
280
  // Backend specific FastISel code.
281
  unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282
  unsigned fastMaterializeConstant(const Constant *C) override;
283
  unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284

285
  explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286
                           const TargetLibraryInfo *LibInfo)
287
      : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288
    Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289
    Context = &FuncInfo.Fn->getContext();
290
  }
291

292
  bool fastSelectInstruction(const Instruction *I) override;
293

294
#include "AArch64GenFastISel.inc"
295
};
296

297
} // end anonymous namespace
298

299
/// Check if the sign-/zero-extend will be a noop.
300
static bool isIntExtFree(const Instruction *I) {
301
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302
         "Unexpected integer extend instruction.");
303
  assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304
         "Unexpected value type.");
305
  bool IsZExt = isa<ZExtInst>(I);
306

307
  if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308
    if (LI->hasOneUse())
309
      return true;
310

311
  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312
    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313
      return true;
314

315
  return false;
316
}
317

318
/// Determine the implicit scale factor that is applied by a memory
319
/// operation for a given value type.
320
static unsigned getImplicitScaleFactor(MVT VT) {
321
  switch (VT.SimpleTy) {
322
  default:
323
    return 0;    // invalid
324
  case MVT::i1:  // fall-through
325
  case MVT::i8:
326
    return 1;
327
  case MVT::i16:
328
    return 2;
329
  case MVT::i32: // fall-through
330
  case MVT::f32:
331
    return 4;
332
  case MVT::i64: // fall-through
333
  case MVT::f64:
334
    return 8;
335
  }
336
}
337

338
CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339
  if (CC == CallingConv::GHC)
340
    return CC_AArch64_GHC;
341
  if (CC == CallingConv::CFGuard_Check)
342
    return CC_AArch64_Win64_CFGuard_Check;
343
  if (Subtarget->isTargetDarwin())
344
    return CC_AArch64_DarwinPCS;
345
  if (Subtarget->isTargetWindows())
346
    return CC_AArch64_Win64PCS;
347
  return CC_AArch64_AAPCS;
348
}
349

350
unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351
  assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352
         "Alloca should always return a pointer.");
353

354
  // Don't handle dynamic allocas.
355
  if (!FuncInfo.StaticAllocaMap.count(AI))
356
    return 0;
357

358
  DenseMap<const AllocaInst *, int>::iterator SI =
359
      FuncInfo.StaticAllocaMap.find(AI);
360

361
  if (SI != FuncInfo.StaticAllocaMap.end()) {
362
    Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364
            ResultReg)
365
        .addFrameIndex(SI->second)
366
        .addImm(0)
367
        .addImm(0);
368
    return ResultReg;
369
  }
370

371
  return 0;
372
}
373

374
unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375
  if (VT > MVT::i64)
376
    return 0;
377

378
  if (!CI->isZero())
379
    return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380

381
  // Create a copy from the zero register to materialize a "0" value.
382
  const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383
                                                   : &AArch64::GPR32RegClass;
384
  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385
  Register ResultReg = createResultReg(RC);
386
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387
          ResultReg).addReg(ZeroReg, getKillRegState(true));
388
  return ResultReg;
389
}
390

391
unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392
  // Positive zero (+0.0) has to be materialized with a fmov from the zero
393
  // register, because the immediate version of fmov cannot encode zero.
394
  if (CFP->isNullValue())
395
    return fastMaterializeFloatZero(CFP);
396

397
  if (VT != MVT::f32 && VT != MVT::f64)
398
    return 0;
399

400
  const APFloat Val = CFP->getValueAPF();
401
  bool Is64Bit = (VT == MVT::f64);
402
  // This checks to see if we can use FMOV instructions to materialize
403
  // a constant, otherwise we have to materialize via the constant pool.
404
  int Imm =
405
      Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406
  if (Imm != -1) {
407
    unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408
    return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409
  }
410

411
  // For the large code model materialize the FP constant in code.
412
  if (TM.getCodeModel() == CodeModel::Large) {
413
    unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414
    const TargetRegisterClass *RC = Is64Bit ?
415
        &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416

417
    Register TmpReg = createResultReg(RC);
418
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419
        .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
420

421
    Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423
            TII.get(TargetOpcode::COPY), ResultReg)
424
        .addReg(TmpReg, getKillRegState(true));
425

426
    return ResultReg;
427
  }
428

429
  // Materialize via constant pool.  MachineConstantPool wants an explicit
430
  // alignment.
431
  Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432

433
  unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434
  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436
          ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
437

438
  unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439
  Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441
      .addReg(ADRPReg)
442
      .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
443
  return ResultReg;
444
}
445

446
unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447
  // We can't handle thread-local variables quickly yet.
448
  if (GV->isThreadLocal())
449
    return 0;
450

451
  // MachO still uses GOT for large code-model accesses, but ELF requires
452
  // movz/movk sequences, which FastISel doesn't handle yet.
453
  if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454
    return 0;
455

456
  unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457

458
  EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459
  if (!DestEVT.isSimple())
460
    return 0;
461

462
  Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463
  unsigned ResultReg;
464

465
  if (OpFlags & AArch64II::MO_GOT) {
466
    // ADRP + LDRX
467
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468
            ADRPReg)
469
        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470

471
    unsigned LdrOpc;
472
    if (Subtarget->isTargetILP32()) {
473
      ResultReg = createResultReg(&AArch64::GPR32RegClass);
474
      LdrOpc = AArch64::LDRWui;
475
    } else {
476
      ResultReg = createResultReg(&AArch64::GPR64RegClass);
477
      LdrOpc = AArch64::LDRXui;
478
    }
479
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480
            ResultReg)
481
      .addReg(ADRPReg)
482
      .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
483
                        AArch64II::MO_NC | OpFlags);
484
    if (!Subtarget->isTargetILP32())
485
      return ResultReg;
486

487
    // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488
    // so we must extend the result on ILP32.
489
    Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491
            TII.get(TargetOpcode::SUBREG_TO_REG))
492
        .addDef(Result64)
493
        .addImm(0)
494
        .addReg(ResultReg, RegState::Kill)
495
        .addImm(AArch64::sub_32);
496
    return Result64;
497
  } else {
498
    // ADRP + ADDX
499
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500
            ADRPReg)
501
        .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502

503
    if (OpFlags & AArch64II::MO_TAGGED) {
504
      // MO_TAGGED on the page indicates a tagged address. Set the tag now.
505
      // We do so by creating a MOVK that sets bits 48-63 of the register to
506
      // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507
      // the small code model so we can assume a binary size of <= 4GB, which
508
      // makes the untagged PC relative offset positive. The binary must also be
509
      // loaded into address range [0, 2^48). Both of these properties need to
510
      // be ensured at runtime when using tagged addresses.
511
      //
512
      // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513
      // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514
      // are not exactly 1:1 with FastISel so we cannot easily abstract this
515
      // out. At some point, it would be nice to find a way to not have this
516
      // duplciate code.
517
      unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519
              DstReg)
520
          .addReg(ADRPReg)
521
          .addGlobalAddress(GV, /*Offset=*/0x100000000,
522
                            AArch64II::MO_PREL | AArch64II::MO_G3)
523
          .addImm(48);
524
      ADRPReg = DstReg;
525
    }
526

527
    ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529
            ResultReg)
530
        .addReg(ADRPReg)
531
        .addGlobalAddress(GV, 0,
532
                          AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
533
        .addImm(0);
534
  }
535
  return ResultReg;
536
}
537

538
unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539
  EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540

541
  // Only handle simple types.
542
  if (!CEVT.isSimple())
543
    return 0;
544
  MVT VT = CEVT.getSimpleVT();
545
  // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546
  // 'null' pointers need to have a somewhat special treatment.
547
  if (isa<ConstantPointerNull>(C)) {
548
    assert(VT == MVT::i64 && "Expected 64-bit pointers");
549
    return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550
  }
551

552
  if (const auto *CI = dyn_cast<ConstantInt>(C))
553
    return materializeInt(CI, VT);
554
  else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555
    return materializeFP(CFP, VT);
556
  else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557
    return materializeGV(GV);
558

559
  return 0;
560
}
561

562
unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563
  assert(CFP->isNullValue() &&
564
         "Floating-point constant is not a positive zero.");
565
  MVT VT;
566
  if (!isTypeLegal(CFP->getType(), VT))
567
    return 0;
568

569
  if (VT != MVT::f32 && VT != MVT::f64)
570
    return 0;
571

572
  bool Is64Bit = (VT == MVT::f64);
573
  unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574
  unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575
  return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576
}
577

578
/// Check if the multiply is by a power-of-2 constant.
579
static bool isMulPowOf2(const Value *I) {
580
  if (const auto *MI = dyn_cast<MulOperator>(I)) {
581
    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582
      if (C->getValue().isPowerOf2())
583
        return true;
584
    if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585
      if (C->getValue().isPowerOf2())
586
        return true;
587
  }
588
  return false;
589
}
590

591
// Computes the address to get to an object.
592
bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593
{
594
  const User *U = nullptr;
595
  unsigned Opcode = Instruction::UserOp1;
596
  if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597
    // Don't walk into other basic blocks unless the object is an alloca from
598
    // another block, otherwise it may not have a virtual register assigned.
599
    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600
        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
601
      Opcode = I->getOpcode();
602
      U = I;
603
    }
604
  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605
    Opcode = C->getOpcode();
606
    U = C;
607
  }
608

609
  if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610
    if (Ty->getAddressSpace() > 255)
611
      // Fast instruction selection doesn't support the special
612
      // address spaces.
613
      return false;
614

615
  switch (Opcode) {
616
  default:
617
    break;
618
  case Instruction::BitCast:
619
    // Look through bitcasts.
620
    return computeAddress(U->getOperand(0), Addr, Ty);
621

622
  case Instruction::IntToPtr:
623
    // Look past no-op inttoptrs.
624
    if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625
        TLI.getPointerTy(DL))
626
      return computeAddress(U->getOperand(0), Addr, Ty);
627
    break;
628

629
  case Instruction::PtrToInt:
630
    // Look past no-op ptrtoints.
631
    if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632
      return computeAddress(U->getOperand(0), Addr, Ty);
633
    break;
634

635
  case Instruction::GetElementPtr: {
636
    Address SavedAddr = Addr;
637
    uint64_t TmpOffset = Addr.getOffset();
638

639
    // Iterate through the GEP folding the constants into offsets where
640
    // we can.
641
    for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642
         GTI != E; ++GTI) {
643
      const Value *Op = GTI.getOperand();
644
      if (StructType *STy = GTI.getStructTypeOrNull()) {
645
        const StructLayout *SL = DL.getStructLayout(STy);
646
        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647
        TmpOffset += SL->getElementOffset(Idx);
648
      } else {
649
        uint64_t S = GTI.getSequentialElementStride(DL);
650
        while (true) {
651
          if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652
            // Constant-offset addressing.
653
            TmpOffset += CI->getSExtValue() * S;
654
            break;
655
          }
656
          if (canFoldAddIntoGEP(U, Op)) {
657
            // A compatible add with a constant operand. Fold the constant.
658
            ConstantInt *CI =
659
                cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660
            TmpOffset += CI->getSExtValue() * S;
661
            // Iterate on the other operand.
662
            Op = cast<AddOperator>(Op)->getOperand(0);
663
            continue;
664
          }
665
          // Unsupported
666
          goto unsupported_gep;
667
        }
668
      }
669
    }
670

671
    // Try to grab the base operand now.
672
    Addr.setOffset(TmpOffset);
673
    if (computeAddress(U->getOperand(0), Addr, Ty))
674
      return true;
675

676
    // We failed, restore everything and try the other options.
677
    Addr = SavedAddr;
678

679
  unsupported_gep:
680
    break;
681
  }
682
  case Instruction::Alloca: {
683
    const AllocaInst *AI = cast<AllocaInst>(Obj);
684
    DenseMap<const AllocaInst *, int>::iterator SI =
685
        FuncInfo.StaticAllocaMap.find(AI);
686
    if (SI != FuncInfo.StaticAllocaMap.end()) {
687
      Addr.setKind(Address::FrameIndexBase);
688
      Addr.setFI(SI->second);
689
      return true;
690
    }
691
    break;
692
  }
693
  case Instruction::Add: {
694
    // Adds of constants are common and easy enough.
695
    const Value *LHS = U->getOperand(0);
696
    const Value *RHS = U->getOperand(1);
697

698
    if (isa<ConstantInt>(LHS))
699
      std::swap(LHS, RHS);
700

701
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702
      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703
      return computeAddress(LHS, Addr, Ty);
704
    }
705

706
    Address Backup = Addr;
707
    if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708
      return true;
709
    Addr = Backup;
710

711
    break;
712
  }
713
  case Instruction::Sub: {
714
    // Subs of constants are common and easy enough.
715
    const Value *LHS = U->getOperand(0);
716
    const Value *RHS = U->getOperand(1);
717

718
    if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719
      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720
      return computeAddress(LHS, Addr, Ty);
721
    }
722
    break;
723
  }
724
  case Instruction::Shl: {
725
    if (Addr.getOffsetReg())
726
      break;
727

728
    const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729
    if (!CI)
730
      break;
731

732
    unsigned Val = CI->getZExtValue();
733
    if (Val < 1 || Val > 3)
734
      break;
735

736
    uint64_t NumBytes = 0;
737
    if (Ty && Ty->isSized()) {
738
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739
      NumBytes = NumBits / 8;
740
      if (!isPowerOf2_64(NumBits))
741
        NumBytes = 0;
742
    }
743

744
    if (NumBytes != (1ULL << Val))
745
      break;
746

747
    Addr.setShift(Val);
748
    Addr.setExtendType(AArch64_AM::LSL);
749

750
    const Value *Src = U->getOperand(0);
751
    if (const auto *I = dyn_cast<Instruction>(Src)) {
752
      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
753
        // Fold the zext or sext when it won't become a noop.
754
        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755
          if (!isIntExtFree(ZE) &&
756
              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757
            Addr.setExtendType(AArch64_AM::UXTW);
758
            Src = ZE->getOperand(0);
759
          }
760
        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761
          if (!isIntExtFree(SE) &&
762
              SE->getOperand(0)->getType()->isIntegerTy(32)) {
763
            Addr.setExtendType(AArch64_AM::SXTW);
764
            Src = SE->getOperand(0);
765
          }
766
        }
767
      }
768
    }
769

770
    if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771
      if (AI->getOpcode() == Instruction::And) {
772
        const Value *LHS = AI->getOperand(0);
773
        const Value *RHS = AI->getOperand(1);
774

775
        if (const auto *C = dyn_cast<ConstantInt>(LHS))
776
          if (C->getValue() == 0xffffffff)
777
            std::swap(LHS, RHS);
778

779
        if (const auto *C = dyn_cast<ConstantInt>(RHS))
780
          if (C->getValue() == 0xffffffff) {
781
            Addr.setExtendType(AArch64_AM::UXTW);
782
            Register Reg = getRegForValue(LHS);
783
            if (!Reg)
784
              return false;
785
            Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786
            Addr.setOffsetReg(Reg);
787
            return true;
788
          }
789
      }
790

791
    Register Reg = getRegForValue(Src);
792
    if (!Reg)
793
      return false;
794
    Addr.setOffsetReg(Reg);
795
    return true;
796
  }
797
  case Instruction::Mul: {
798
    if (Addr.getOffsetReg())
799
      break;
800

801
    if (!isMulPowOf2(U))
802
      break;
803

804
    const Value *LHS = U->getOperand(0);
805
    const Value *RHS = U->getOperand(1);
806

807
    // Canonicalize power-of-2 value to the RHS.
808
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
809
      if (C->getValue().isPowerOf2())
810
        std::swap(LHS, RHS);
811

812
    assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813
    const auto *C = cast<ConstantInt>(RHS);
814
    unsigned Val = C->getValue().logBase2();
815
    if (Val < 1 || Val > 3)
816
      break;
817

818
    uint64_t NumBytes = 0;
819
    if (Ty && Ty->isSized()) {
820
      uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821
      NumBytes = NumBits / 8;
822
      if (!isPowerOf2_64(NumBits))
823
        NumBytes = 0;
824
    }
825

826
    if (NumBytes != (1ULL << Val))
827
      break;
828

829
    Addr.setShift(Val);
830
    Addr.setExtendType(AArch64_AM::LSL);
831

832
    const Value *Src = LHS;
833
    if (const auto *I = dyn_cast<Instruction>(Src)) {
834
      if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835
        // Fold the zext or sext when it won't become a noop.
836
        if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837
          if (!isIntExtFree(ZE) &&
838
              ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839
            Addr.setExtendType(AArch64_AM::UXTW);
840
            Src = ZE->getOperand(0);
841
          }
842
        } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843
          if (!isIntExtFree(SE) &&
844
              SE->getOperand(0)->getType()->isIntegerTy(32)) {
845
            Addr.setExtendType(AArch64_AM::SXTW);
846
            Src = SE->getOperand(0);
847
          }
848
        }
849
      }
850
    }
851

852
    Register Reg = getRegForValue(Src);
853
    if (!Reg)
854
      return false;
855
    Addr.setOffsetReg(Reg);
856
    return true;
857
  }
858
  case Instruction::And: {
859
    if (Addr.getOffsetReg())
860
      break;
861

862
    if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863
      break;
864

865
    const Value *LHS = U->getOperand(0);
866
    const Value *RHS = U->getOperand(1);
867

868
    if (const auto *C = dyn_cast<ConstantInt>(LHS))
869
      if (C->getValue() == 0xffffffff)
870
        std::swap(LHS, RHS);
871

872
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
873
      if (C->getValue() == 0xffffffff) {
874
        Addr.setShift(0);
875
        Addr.setExtendType(AArch64_AM::LSL);
876
        Addr.setExtendType(AArch64_AM::UXTW);
877

878
        Register Reg = getRegForValue(LHS);
879
        if (!Reg)
880
          return false;
881
        Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882
        Addr.setOffsetReg(Reg);
883
        return true;
884
      }
885
    break;
886
  }
887
  case Instruction::SExt:
888
  case Instruction::ZExt: {
889
    if (!Addr.getReg() || Addr.getOffsetReg())
890
      break;
891

892
    const Value *Src = nullptr;
893
    // Fold the zext or sext when it won't become a noop.
894
    if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895
      if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896
        Addr.setExtendType(AArch64_AM::UXTW);
897
        Src = ZE->getOperand(0);
898
      }
899
    } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900
      if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901
        Addr.setExtendType(AArch64_AM::SXTW);
902
        Src = SE->getOperand(0);
903
      }
904
    }
905

906
    if (!Src)
907
      break;
908

909
    Addr.setShift(0);
910
    Register Reg = getRegForValue(Src);
911
    if (!Reg)
912
      return false;
913
    Addr.setOffsetReg(Reg);
914
    return true;
915
  }
916
  } // end switch
917

918
  if (Addr.isRegBase() && !Addr.getReg()) {
919
    Register Reg = getRegForValue(Obj);
920
    if (!Reg)
921
      return false;
922
    Addr.setReg(Reg);
923
    return true;
924
  }
925

926
  if (!Addr.getOffsetReg()) {
927
    Register Reg = getRegForValue(Obj);
928
    if (!Reg)
929
      return false;
930
    Addr.setOffsetReg(Reg);
931
    return true;
932
  }
933

934
  return false;
935
}
936

937
bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938
  const User *U = nullptr;
939
  unsigned Opcode = Instruction::UserOp1;
940
  bool InMBB = true;
941

942
  if (const auto *I = dyn_cast<Instruction>(V)) {
943
    Opcode = I->getOpcode();
944
    U = I;
945
    InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946
  } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947
    Opcode = C->getOpcode();
948
    U = C;
949
  }
950

951
  switch (Opcode) {
952
  default: break;
953
  case Instruction::BitCast:
954
    // Look past bitcasts if its operand is in the same BB.
955
    if (InMBB)
956
      return computeCallAddress(U->getOperand(0), Addr);
957
    break;
958
  case Instruction::IntToPtr:
959
    // Look past no-op inttoptrs if its operand is in the same BB.
960
    if (InMBB &&
961
        TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962
            TLI.getPointerTy(DL))
963
      return computeCallAddress(U->getOperand(0), Addr);
964
    break;
965
  case Instruction::PtrToInt:
966
    // Look past no-op ptrtoints if its operand is in the same BB.
967
    if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968
      return computeCallAddress(U->getOperand(0), Addr);
969
    break;
970
  }
971

972
  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973
    Addr.setGlobalValue(GV);
974
    return true;
975
  }
976

977
  // If all else fails, try to materialize the value in a register.
978
  if (!Addr.getGlobalValue()) {
979
    Addr.setReg(getRegForValue(V));
980
    return Addr.getReg() != 0;
981
  }
982

983
  return false;
984
}
985

986
bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987
  EVT evt = TLI.getValueType(DL, Ty, true);
988

989
  if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990
    return false;
991

992
  // Only handle simple types.
993
  if (evt == MVT::Other || !evt.isSimple())
994
    return false;
995
  VT = evt.getSimpleVT();
996

997
  // This is a legal type, but it's not something we handle in fast-isel.
998
  if (VT == MVT::f128)
999
    return false;
1000

1001
  // Handle all other legal types, i.e. a register that will directly hold this
1002
  // value.
1003
  return TLI.isTypeLegal(VT);
1004
}
1005

1006
/// Determine if the value type is supported by FastISel.
1007
///
1008
/// FastISel for AArch64 can handle more value types than are legal. This adds
1009
/// simple value type such as i1, i8, and i16.
1010
bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011
  if (Ty->isVectorTy() && !IsVectorAllowed)
1012
    return false;
1013

1014
  if (isTypeLegal(Ty, VT))
1015
    return true;
1016

1017
  // If this is a type than can be sign or zero-extended to a basic operation
1018
  // go ahead and accept it now.
1019
  if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020
    return true;
1021

1022
  return false;
1023
}
1024

1025
bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026
  if (!isa<Instruction>(V))
1027
    return true;
1028

1029
  const auto *I = cast<Instruction>(V);
1030
  return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1031
}
1032

1033
bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034
  if (Subtarget->isTargetILP32())
1035
    return false;
1036

1037
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038
  if (!ScaleFactor)
1039
    return false;
1040

1041
  bool ImmediateOffsetNeedsLowering = false;
1042
  bool RegisterOffsetNeedsLowering = false;
1043
  int64_t Offset = Addr.getOffset();
1044
  if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045
    ImmediateOffsetNeedsLowering = true;
1046
  else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047
           !isUInt<12>(Offset / ScaleFactor))
1048
    ImmediateOffsetNeedsLowering = true;
1049

1050
  // Cannot encode an offset register and an immediate offset in the same
1051
  // instruction. Fold the immediate offset into the load/store instruction and
1052
  // emit an additional add to take care of the offset register.
1053
  if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054
    RegisterOffsetNeedsLowering = true;
1055

1056
  // Cannot encode zero register as base.
1057
  if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058
    RegisterOffsetNeedsLowering = true;
1059

1060
  // If this is a stack pointer and the offset needs to be simplified then put
1061
  // the alloca address into a register, set the base type back to register and
1062
  // continue. This should almost never happen.
1063
  if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064
  {
1065
    Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067
            ResultReg)
1068
      .addFrameIndex(Addr.getFI())
1069
      .addImm(0)
1070
      .addImm(0);
1071
    Addr.setKind(Address::RegBase);
1072
    Addr.setReg(ResultReg);
1073
  }
1074

1075
  if (RegisterOffsetNeedsLowering) {
1076
    unsigned ResultReg = 0;
1077
    if (Addr.getReg()) {
1078
      if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079
          Addr.getExtendType() == AArch64_AM::UXTW   )
1080
        ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081
                                  Addr.getOffsetReg(), Addr.getExtendType(),
1082
                                  Addr.getShift());
1083
      else
1084
        ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085
                                  Addr.getOffsetReg(), AArch64_AM::LSL,
1086
                                  Addr.getShift());
1087
    } else {
1088
      if (Addr.getExtendType() == AArch64_AM::UXTW)
1089
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090
                               Addr.getShift(), /*IsZExt=*/true);
1091
      else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092
        ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093
                               Addr.getShift(), /*IsZExt=*/false);
1094
      else
1095
        ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096
                               Addr.getShift());
1097
    }
1098
    if (!ResultReg)
1099
      return false;
1100

1101
    Addr.setReg(ResultReg);
1102
    Addr.setOffsetReg(0);
1103
    Addr.setShift(0);
1104
    Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105
  }
1106

1107
  // Since the offset is too large for the load/store instruction get the
1108
  // reg+offset into a register.
1109
  if (ImmediateOffsetNeedsLowering) {
1110
    unsigned ResultReg;
1111
    if (Addr.getReg())
1112
      // Try to fold the immediate into the add instruction.
1113
      ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114
    else
1115
      ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116

1117
    if (!ResultReg)
1118
      return false;
1119
    Addr.setReg(ResultReg);
1120
    Addr.setOffset(0);
1121
  }
1122
  return true;
1123
}
1124

1125
void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126
                                           const MachineInstrBuilder &MIB,
1127
                                           MachineMemOperand::Flags Flags,
1128
                                           unsigned ScaleFactor,
1129
                                           MachineMemOperand *MMO) {
1130
  int64_t Offset = Addr.getOffset() / ScaleFactor;
1131
  // Frame base works a bit differently. Handle it separately.
1132
  if (Addr.isFIBase()) {
1133
    int FI = Addr.getFI();
1134
    // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1135
    // and alignment should be based on the VT.
1136
    MMO = FuncInfo.MF->getMachineMemOperand(
1137
        MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138
        MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139
    // Now add the rest of the operands.
1140
    MIB.addFrameIndex(FI).addImm(Offset);
1141
  } else {
1142
    assert(Addr.isRegBase() && "Unexpected address kind.");
1143
    const MCInstrDesc &II = MIB->getDesc();
1144
    unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145
    Addr.setReg(
1146
      constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147
    Addr.setOffsetReg(
1148
      constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149
    if (Addr.getOffsetReg()) {
1150
      assert(Addr.getOffset() == 0 && "Unexpected offset");
1151
      bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152
                      Addr.getExtendType() == AArch64_AM::SXTX;
1153
      MIB.addReg(Addr.getReg());
1154
      MIB.addReg(Addr.getOffsetReg());
1155
      MIB.addImm(IsSigned);
1156
      MIB.addImm(Addr.getShift() != 0);
1157
    } else
1158
      MIB.addReg(Addr.getReg()).addImm(Offset);
1159
  }
1160

1161
  if (MMO)
1162
    MIB.addMemOperand(MMO);
1163
}
1164

1165
unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166
                                     const Value *RHS, bool SetFlags,
1167
                                     bool WantResult,  bool IsZExt) {
1168
  AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1169
  bool NeedExtend = false;
1170
  switch (RetVT.SimpleTy) {
1171
  default:
1172
    return 0;
1173
  case MVT::i1:
1174
    NeedExtend = true;
1175
    break;
1176
  case MVT::i8:
1177
    NeedExtend = true;
1178
    ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179
    break;
1180
  case MVT::i16:
1181
    NeedExtend = true;
1182
    ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183
    break;
1184
  case MVT::i32:  // fall-through
1185
  case MVT::i64:
1186
    break;
1187
  }
1188
  MVT SrcVT = RetVT;
1189
  RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190

1191
  // Canonicalize immediates to the RHS first.
1192
  if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193
    std::swap(LHS, RHS);
1194

1195
  // Canonicalize mul by power of 2 to the RHS.
1196
  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197
    if (isMulPowOf2(LHS))
1198
      std::swap(LHS, RHS);
1199

1200
  // Canonicalize shift immediate to the RHS.
1201
  if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202
    if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203
      if (isa<ConstantInt>(SI->getOperand(1)))
1204
        if (SI->getOpcode() == Instruction::Shl  ||
1205
            SI->getOpcode() == Instruction::LShr ||
1206
            SI->getOpcode() == Instruction::AShr   )
1207
          std::swap(LHS, RHS);
1208

1209
  Register LHSReg = getRegForValue(LHS);
1210
  if (!LHSReg)
1211
    return 0;
1212

1213
  if (NeedExtend)
1214
    LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215

1216
  unsigned ResultReg = 0;
1217
  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218
    uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219
    if (C->isNegative())
1220
      ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221
                                WantResult);
1222
    else
1223
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224
                                WantResult);
1225
  } else if (const auto *C = dyn_cast<Constant>(RHS))
1226
    if (C->isNullValue())
1227
      ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228

1229
  if (ResultReg)
1230
    return ResultReg;
1231

1232
  // Only extend the RHS within the instruction if there is a valid extend type.
1233
  if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234
      isValueAvailable(RHS)) {
1235
    Register RHSReg = getRegForValue(RHS);
1236
    if (!RHSReg)
1237
      return 0;
1238
    return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239
                         SetFlags, WantResult);
1240
  }
1241

1242
  // Check if the mul can be folded into the instruction.
1243
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244
    if (isMulPowOf2(RHS)) {
1245
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247

1248
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249
        if (C->getValue().isPowerOf2())
1250
          std::swap(MulLHS, MulRHS);
1251

1252
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254
      Register RHSReg = getRegForValue(MulLHS);
1255
      if (!RHSReg)
1256
        return 0;
1257
      ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258
                                ShiftVal, SetFlags, WantResult);
1259
      if (ResultReg)
1260
        return ResultReg;
1261
    }
1262
  }
1263

1264
  // Check if the shift can be folded into the instruction.
1265
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266
    if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268
        AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1269
        switch (SI->getOpcode()) {
1270
        default: break;
1271
        case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1272
        case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273
        case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274
        }
1275
        uint64_t ShiftVal = C->getZExtValue();
1276
        if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277
          Register RHSReg = getRegForValue(SI->getOperand(0));
1278
          if (!RHSReg)
1279
            return 0;
1280
          ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281
                                    ShiftVal, SetFlags, WantResult);
1282
          if (ResultReg)
1283
            return ResultReg;
1284
        }
1285
      }
1286
    }
1287
  }
1288

1289
  Register RHSReg = getRegForValue(RHS);
1290
  if (!RHSReg)
1291
    return 0;
1292

1293
  if (NeedExtend)
1294
    RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295

1296
  return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297
}
1298

1299
unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300
                                        unsigned RHSReg, bool SetFlags,
1301
                                        bool WantResult) {
1302
  assert(LHSReg && RHSReg && "Invalid register number.");
1303

1304
  if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305
      RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306
    return 0;
1307

1308
  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309
    return 0;
1310

1311
  static const unsigned OpcTable[2][2][2] = {
1312
    { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1313
      { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1314
    { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315
      { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1316
  };
1317
  bool Is64Bit = RetVT == MVT::i64;
1318
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319
  const TargetRegisterClass *RC =
1320
      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321
  unsigned ResultReg;
1322
  if (WantResult)
1323
    ResultReg = createResultReg(RC);
1324
  else
1325
    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326

1327
  const MCInstrDesc &II = TII.get(Opc);
1328
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331
      .addReg(LHSReg)
1332
      .addReg(RHSReg);
1333
  return ResultReg;
1334
}
1335

1336
unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337
                                        uint64_t Imm, bool SetFlags,
1338
                                        bool WantResult) {
1339
  assert(LHSReg && "Invalid register number.");
1340

1341
  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342
    return 0;
1343

1344
  unsigned ShiftImm;
1345
  if (isUInt<12>(Imm))
1346
    ShiftImm = 0;
1347
  else if ((Imm & 0xfff000) == Imm) {
1348
    ShiftImm = 12;
1349
    Imm >>= 12;
1350
  } else
1351
    return 0;
1352

1353
  static const unsigned OpcTable[2][2][2] = {
1354
    { { AArch64::SUBWri,  AArch64::SUBXri  },
1355
      { AArch64::ADDWri,  AArch64::ADDXri  }  },
1356
    { { AArch64::SUBSWri, AArch64::SUBSXri },
1357
      { AArch64::ADDSWri, AArch64::ADDSXri }  }
1358
  };
1359
  bool Is64Bit = RetVT == MVT::i64;
1360
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361
  const TargetRegisterClass *RC;
1362
  if (SetFlags)
1363
    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364
  else
1365
    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366
  unsigned ResultReg;
1367
  if (WantResult)
1368
    ResultReg = createResultReg(RC);
1369
  else
1370
    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371

1372
  const MCInstrDesc &II = TII.get(Opc);
1373
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375
      .addReg(LHSReg)
1376
      .addImm(Imm)
1377
      .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378
  return ResultReg;
1379
}
1380

1381
unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382
                                        unsigned RHSReg,
1383
                                        AArch64_AM::ShiftExtendType ShiftType,
1384
                                        uint64_t ShiftImm, bool SetFlags,
1385
                                        bool WantResult) {
1386
  assert(LHSReg && RHSReg && "Invalid register number.");
1387
  assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388
         RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389

1390
  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391
    return 0;
1392

1393
  // Don't deal with undefined shifts.
1394
  if (ShiftImm >= RetVT.getSizeInBits())
1395
    return 0;
1396

1397
  static const unsigned OpcTable[2][2][2] = {
1398
    { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1399
      { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1400
    { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401
      { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1402
  };
1403
  bool Is64Bit = RetVT == MVT::i64;
1404
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405
  const TargetRegisterClass *RC =
1406
      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407
  unsigned ResultReg;
1408
  if (WantResult)
1409
    ResultReg = createResultReg(RC);
1410
  else
1411
    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412

1413
  const MCInstrDesc &II = TII.get(Opc);
1414
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417
      .addReg(LHSReg)
1418
      .addReg(RHSReg)
1419
      .addImm(getShifterImm(ShiftType, ShiftImm));
1420
  return ResultReg;
1421
}
1422

1423
unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424
                                        unsigned RHSReg,
1425
                                        AArch64_AM::ShiftExtendType ExtType,
1426
                                        uint64_t ShiftImm, bool SetFlags,
1427
                                        bool WantResult) {
1428
  assert(LHSReg && RHSReg && "Invalid register number.");
1429
  assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430
         RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431

1432
  if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433
    return 0;
1434

1435
  if (ShiftImm >= 4)
1436
    return 0;
1437

1438
  static const unsigned OpcTable[2][2][2] = {
1439
    { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1440
      { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1441
    { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442
      { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1443
  };
1444
  bool Is64Bit = RetVT == MVT::i64;
1445
  unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446
  const TargetRegisterClass *RC = nullptr;
1447
  if (SetFlags)
1448
    RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449
  else
1450
    RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451
  unsigned ResultReg;
1452
  if (WantResult)
1453
    ResultReg = createResultReg(RC);
1454
  else
1455
    ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456

1457
  const MCInstrDesc &II = TII.get(Opc);
1458
  LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459
  RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461
      .addReg(LHSReg)
1462
      .addReg(RHSReg)
1463
      .addImm(getArithExtendImm(ExtType, ShiftImm));
1464
  return ResultReg;
1465
}
1466

1467
bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468
  Type *Ty = LHS->getType();
1469
  EVT EVT = TLI.getValueType(DL, Ty, true);
1470
  if (!EVT.isSimple())
1471
    return false;
1472
  MVT VT = EVT.getSimpleVT();
1473

1474
  switch (VT.SimpleTy) {
1475
  default:
1476
    return false;
1477
  case MVT::i1:
1478
  case MVT::i8:
1479
  case MVT::i16:
1480
  case MVT::i32:
1481
  case MVT::i64:
1482
    return emitICmp(VT, LHS, RHS, IsZExt);
1483
  case MVT::f32:
1484
  case MVT::f64:
1485
    return emitFCmp(VT, LHS, RHS);
1486
  }
1487
}
1488

1489
bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490
                               bool IsZExt) {
1491
  return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492
                 IsZExt) != 0;
1493
}
1494

1495
bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496
  return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497
                       /*SetFlags=*/true, /*WantResult=*/false) != 0;
1498
}
1499

1500
bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501
  if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502
    return false;
1503

1504
  // Check to see if the 2nd operand is a constant that we can encode directly
1505
  // in the compare.
1506
  bool UseImm = false;
1507
  if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508
    if (CFP->isZero() && !CFP->isNegative())
1509
      UseImm = true;
1510

1511
  Register LHSReg = getRegForValue(LHS);
1512
  if (!LHSReg)
1513
    return false;
1514

1515
  if (UseImm) {
1516
    unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518
        .addReg(LHSReg);
1519
    return true;
1520
  }
1521

1522
  Register RHSReg = getRegForValue(RHS);
1523
  if (!RHSReg)
1524
    return false;
1525

1526
  unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528
      .addReg(LHSReg)
1529
      .addReg(RHSReg);
1530
  return true;
1531
}
1532

1533
unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1535
  return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536
                    IsZExt);
1537
}
1538

1539
/// This method is a wrapper to simplify add emission.
1540
///
1541
/// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542
/// that fails, then try to materialize the immediate into a register and use
1543
/// emitAddSub_rr instead.
1544
unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545
  unsigned ResultReg;
1546
  if (Imm < 0)
1547
    ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548
  else
1549
    ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550

1551
  if (ResultReg)
1552
    return ResultReg;
1553

1554
  unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555
  if (!CReg)
1556
    return 0;
1557

1558
  ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559
  return ResultReg;
1560
}
1561

1562
unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563
                                  bool SetFlags, bool WantResult, bool IsZExt) {
1564
  return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565
                    IsZExt);
1566
}
1567

1568
unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569
                                      unsigned RHSReg, bool WantResult) {
1570
  return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571
                       /*SetFlags=*/true, WantResult);
1572
}
1573

1574
unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575
                                      unsigned RHSReg,
1576
                                      AArch64_AM::ShiftExtendType ShiftType,
1577
                                      uint64_t ShiftImm, bool WantResult) {
1578
  return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579
                       ShiftImm, /*SetFlags=*/true, WantResult);
1580
}
1581

1582
unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583
                                        const Value *LHS, const Value *RHS) {
1584
  // Canonicalize immediates to the RHS first.
1585
  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586
    std::swap(LHS, RHS);
1587

1588
  // Canonicalize mul by power-of-2 to the RHS.
1589
  if (LHS->hasOneUse() && isValueAvailable(LHS))
1590
    if (isMulPowOf2(LHS))
1591
      std::swap(LHS, RHS);
1592

1593
  // Canonicalize shift immediate to the RHS.
1594
  if (LHS->hasOneUse() && isValueAvailable(LHS))
1595
    if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596
      if (isa<ConstantInt>(SI->getOperand(1)))
1597
        std::swap(LHS, RHS);
1598

1599
  Register LHSReg = getRegForValue(LHS);
1600
  if (!LHSReg)
1601
    return 0;
1602

1603
  unsigned ResultReg = 0;
1604
  if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605
    uint64_t Imm = C->getZExtValue();
1606
    ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607
  }
1608
  if (ResultReg)
1609
    return ResultReg;
1610

1611
  // Check if the mul can be folded into the instruction.
1612
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613
    if (isMulPowOf2(RHS)) {
1614
      const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615
      const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616

1617
      if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618
        if (C->getValue().isPowerOf2())
1619
          std::swap(MulLHS, MulRHS);
1620

1621
      assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622
      uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623

1624
      Register RHSReg = getRegForValue(MulLHS);
1625
      if (!RHSReg)
1626
        return 0;
1627
      ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628
      if (ResultReg)
1629
        return ResultReg;
1630
    }
1631
  }
1632

1633
  // Check if the shift can be folded into the instruction.
1634
  if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635
    if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636
      if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637
        uint64_t ShiftVal = C->getZExtValue();
1638
        Register RHSReg = getRegForValue(SI->getOperand(0));
1639
        if (!RHSReg)
1640
          return 0;
1641
        ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642
        if (ResultReg)
1643
          return ResultReg;
1644
      }
1645
  }
1646

1647
  Register RHSReg = getRegForValue(RHS);
1648
  if (!RHSReg)
1649
    return 0;
1650

1651
  MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652
  ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653
  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654
    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656
  }
1657
  return ResultReg;
1658
}
1659

1660
unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661
                                           unsigned LHSReg, uint64_t Imm) {
1662
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663
                "ISD nodes are not consecutive!");
1664
  static const unsigned OpcTable[3][2] = {
1665
    { AArch64::ANDWri, AArch64::ANDXri },
1666
    { AArch64::ORRWri, AArch64::ORRXri },
1667
    { AArch64::EORWri, AArch64::EORXri }
1668
  };
1669
  const TargetRegisterClass *RC;
1670
  unsigned Opc;
1671
  unsigned RegSize;
1672
  switch (RetVT.SimpleTy) {
1673
  default:
1674
    return 0;
1675
  case MVT::i1:
1676
  case MVT::i8:
1677
  case MVT::i16:
1678
  case MVT::i32: {
1679
    unsigned Idx = ISDOpc - ISD::AND;
1680
    Opc = OpcTable[Idx][0];
1681
    RC = &AArch64::GPR32spRegClass;
1682
    RegSize = 32;
1683
    break;
1684
  }
1685
  case MVT::i64:
1686
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1687
    RC = &AArch64::GPR64spRegClass;
1688
    RegSize = 64;
1689
    break;
1690
  }
1691

1692
  if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1693
    return 0;
1694

1695
  Register ResultReg =
1696
      fastEmitInst_ri(Opc, RC, LHSReg,
1697
                      AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1698
  if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699
    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701
  }
1702
  return ResultReg;
1703
}
1704

1705
unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706
                                           unsigned LHSReg, unsigned RHSReg,
1707
                                           uint64_t ShiftImm) {
1708
  static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709
                "ISD nodes are not consecutive!");
1710
  static const unsigned OpcTable[3][2] = {
1711
    { AArch64::ANDWrs, AArch64::ANDXrs },
1712
    { AArch64::ORRWrs, AArch64::ORRXrs },
1713
    { AArch64::EORWrs, AArch64::EORXrs }
1714
  };
1715

1716
  // Don't deal with undefined shifts.
1717
  if (ShiftImm >= RetVT.getSizeInBits())
1718
    return 0;
1719

1720
  const TargetRegisterClass *RC;
1721
  unsigned Opc;
1722
  switch (RetVT.SimpleTy) {
1723
  default:
1724
    return 0;
1725
  case MVT::i1:
1726
  case MVT::i8:
1727
  case MVT::i16:
1728
  case MVT::i32:
1729
    Opc = OpcTable[ISDOpc - ISD::AND][0];
1730
    RC = &AArch64::GPR32RegClass;
1731
    break;
1732
  case MVT::i64:
1733
    Opc = OpcTable[ISDOpc - ISD::AND][1];
1734
    RC = &AArch64::GPR64RegClass;
1735
    break;
1736
  }
1737
  Register ResultReg =
1738
      fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739
                       AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740
  if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741
    uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743
  }
1744
  return ResultReg;
1745
}
1746

1747
unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748
                                     uint64_t Imm) {
1749
  return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750
}
1751

1752
unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753
                                   bool WantZExt, MachineMemOperand *MMO) {
1754
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755
    return 0;
1756

1757
  // Simplify this down to something we can handle.
1758
  if (!simplifyAddress(Addr, VT))
1759
    return 0;
1760

1761
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762
  if (!ScaleFactor)
1763
    llvm_unreachable("Unexpected value type.");
1764

1765
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767
  bool UseScaled = true;
1768
  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769
    UseScaled = false;
1770
    ScaleFactor = 1;
1771
  }
1772

1773
  static const unsigned GPOpcTable[2][8][4] = {
1774
    // Sign-extend.
1775
    { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1776
        AArch64::LDURXi  },
1777
      { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1778
        AArch64::LDURXi  },
1779
      { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1780
        AArch64::LDRXui  },
1781
      { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1782
        AArch64::LDRXui  },
1783
      { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784
        AArch64::LDRXroX },
1785
      { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786
        AArch64::LDRXroX },
1787
      { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788
        AArch64::LDRXroW },
1789
      { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790
        AArch64::LDRXroW }
1791
    },
1792
    // Zero-extend.
1793
    { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794
        AArch64::LDURXi  },
1795
      { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1796
        AArch64::LDURXi  },
1797
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798
        AArch64::LDRXui  },
1799
      { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1800
        AArch64::LDRXui  },
1801
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802
        AArch64::LDRXroX },
1803
      { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1804
        AArch64::LDRXroX },
1805
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806
        AArch64::LDRXroW },
1807
      { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1808
        AArch64::LDRXroW }
1809
    }
1810
  };
1811

1812
  static const unsigned FPOpcTable[4][2] = {
1813
    { AArch64::LDURSi,  AArch64::LDURDi  },
1814
    { AArch64::LDRSui,  AArch64::LDRDui  },
1815
    { AArch64::LDRSroX, AArch64::LDRDroX },
1816
    { AArch64::LDRSroW, AArch64::LDRDroW }
1817
  };
1818

1819
  unsigned Opc;
1820
  const TargetRegisterClass *RC;
1821
  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822
                      Addr.getOffsetReg();
1823
  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825
      Addr.getExtendType() == AArch64_AM::SXTW)
1826
    Idx++;
1827

1828
  bool IsRet64Bit = RetVT == MVT::i64;
1829
  switch (VT.SimpleTy) {
1830
  default:
1831
    llvm_unreachable("Unexpected value type.");
1832
  case MVT::i1: // Intentional fall-through.
1833
  case MVT::i8:
1834
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835
    RC = (IsRet64Bit && !WantZExt) ?
1836
             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837
    break;
1838
  case MVT::i16:
1839
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840
    RC = (IsRet64Bit && !WantZExt) ?
1841
             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842
    break;
1843
  case MVT::i32:
1844
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845
    RC = (IsRet64Bit && !WantZExt) ?
1846
             &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847
    break;
1848
  case MVT::i64:
1849
    Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850
    RC = &AArch64::GPR64RegClass;
1851
    break;
1852
  case MVT::f32:
1853
    Opc = FPOpcTable[Idx][0];
1854
    RC = &AArch64::FPR32RegClass;
1855
    break;
1856
  case MVT::f64:
1857
    Opc = FPOpcTable[Idx][1];
1858
    RC = &AArch64::FPR64RegClass;
1859
    break;
1860
  }
1861

1862
  // Create the base instruction, then add the operands.
1863
  Register ResultReg = createResultReg(RC);
1864
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865
                                    TII.get(Opc), ResultReg);
1866
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867

1868
  // Loading an i1 requires special handling.
1869
  if (VT == MVT::i1) {
1870
    unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871
    assert(ANDReg && "Unexpected AND instruction emission failure.");
1872
    ResultReg = ANDReg;
1873
  }
1874

1875
  // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876
  // the 32bit reg to a 64bit reg.
1877
  if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878
    Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881
        .addImm(0)
1882
        .addReg(ResultReg, getKillRegState(true))
1883
        .addImm(AArch64::sub_32);
1884
    ResultReg = Reg64;
1885
  }
1886
  return ResultReg;
1887
}
1888

1889
bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890
  MVT VT;
1891
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892
    return false;
1893

1894
  if (VT.isVector())
1895
    return selectOperator(I, I->getOpcode());
1896

1897
  unsigned ResultReg;
1898
  switch (I->getOpcode()) {
1899
  default:
1900
    llvm_unreachable("Unexpected instruction.");
1901
  case Instruction::Add:
1902
    ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903
    break;
1904
  case Instruction::Sub:
1905
    ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906
    break;
1907
  }
1908
  if (!ResultReg)
1909
    return false;
1910

1911
  updateValueMap(I, ResultReg);
1912
  return true;
1913
}
1914

1915
bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916
  MVT VT;
1917
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918
    return false;
1919

1920
  if (VT.isVector())
1921
    return selectOperator(I, I->getOpcode());
1922

1923
  unsigned ResultReg;
1924
  switch (I->getOpcode()) {
1925
  default:
1926
    llvm_unreachable("Unexpected instruction.");
1927
  case Instruction::And:
1928
    ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929
    break;
1930
  case Instruction::Or:
1931
    ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932
    break;
1933
  case Instruction::Xor:
1934
    ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935
    break;
1936
  }
1937
  if (!ResultReg)
1938
    return false;
1939

1940
  updateValueMap(I, ResultReg);
1941
  return true;
1942
}
1943

1944
bool AArch64FastISel::selectLoad(const Instruction *I) {
1945
  MVT VT;
1946
  // Verify we have a legal type before going any further.  Currently, we handle
1947
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950
      cast<LoadInst>(I)->isAtomic())
1951
    return false;
1952

1953
  const Value *SV = I->getOperand(0);
1954
  if (TLI.supportSwiftError()) {
1955
    // Swifterror values can come from either a function parameter with
1956
    // swifterror attribute or an alloca with swifterror attribute.
1957
    if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958
      if (Arg->hasSwiftErrorAttr())
1959
        return false;
1960
    }
1961

1962
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963
      if (Alloca->isSwiftError())
1964
        return false;
1965
    }
1966
  }
1967

1968
  // See if we can handle this address.
1969
  Address Addr;
1970
  if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971
    return false;
1972

1973
  // Fold the following sign-/zero-extend into the load instruction.
1974
  bool WantZExt = true;
1975
  MVT RetVT = VT;
1976
  const Value *IntExtVal = nullptr;
1977
  if (I->hasOneUse()) {
1978
    if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979
      if (isTypeSupported(ZE->getType(), RetVT))
1980
        IntExtVal = ZE;
1981
      else
1982
        RetVT = VT;
1983
    } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984
      if (isTypeSupported(SE->getType(), RetVT))
1985
        IntExtVal = SE;
1986
      else
1987
        RetVT = VT;
1988
      WantZExt = false;
1989
    }
1990
  }
1991

1992
  unsigned ResultReg =
1993
      emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994
  if (!ResultReg)
1995
    return false;
1996

1997
  // There are a few different cases we have to handle, because the load or the
1998
  // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999
  // SelectionDAG. There is also an ordering issue when both instructions are in
2000
  // different basic blocks.
2001
  // 1.) The load instruction is selected by FastISel, but the integer extend
2002
  //     not. This usually happens when the integer extend is in a different
2003
  //     basic block and SelectionDAG took over for that basic block.
2004
  // 2.) The load instruction is selected before the integer extend. This only
2005
  //     happens when the integer extend is in a different basic block.
2006
  // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007
  //     by FastISel. This happens if there are instructions between the load
2008
  //     and the integer extend that couldn't be selected by FastISel.
2009
  if (IntExtVal) {
2010
    // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011
    // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012
    // it when it selects the integer extend.
2013
    Register Reg = lookUpRegForValue(IntExtVal);
2014
    auto *MI = MRI.getUniqueVRegDef(Reg);
2015
    if (!MI) {
2016
      if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017
        if (WantZExt) {
2018
          // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019
          MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020
          ResultReg = std::prev(I)->getOperand(0).getReg();
2021
          removeDeadCode(I, std::next(I));
2022
        } else
2023
          ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024
                                                 AArch64::sub_32);
2025
      }
2026
      updateValueMap(I, ResultReg);
2027
      return true;
2028
    }
2029

2030
    // The integer extend has already been emitted - delete all the instructions
2031
    // that have been emitted by the integer extend lowering code and use the
2032
    // result from the load instruction directly.
2033
    while (MI) {
2034
      Reg = 0;
2035
      for (auto &Opnd : MI->uses()) {
2036
        if (Opnd.isReg()) {
2037
          Reg = Opnd.getReg();
2038
          break;
2039
        }
2040
      }
2041
      MachineBasicBlock::iterator I(MI);
2042
      removeDeadCode(I, std::next(I));
2043
      MI = nullptr;
2044
      if (Reg)
2045
        MI = MRI.getUniqueVRegDef(Reg);
2046
    }
2047
    updateValueMap(IntExtVal, ResultReg);
2048
    return true;
2049
  }
2050

2051
  updateValueMap(I, ResultReg);
2052
  return true;
2053
}
2054

2055
bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056
                                       unsigned AddrReg,
2057
                                       MachineMemOperand *MMO) {
2058
  unsigned Opc;
2059
  switch (VT.SimpleTy) {
2060
  default: return false;
2061
  case MVT::i8:  Opc = AArch64::STLRB; break;
2062
  case MVT::i16: Opc = AArch64::STLRH; break;
2063
  case MVT::i32: Opc = AArch64::STLRW; break;
2064
  case MVT::i64: Opc = AArch64::STLRX; break;
2065
  }
2066

2067
  const MCInstrDesc &II = TII.get(Opc);
2068
  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069
  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071
      .addReg(SrcReg)
2072
      .addReg(AddrReg)
2073
      .addMemOperand(MMO);
2074
  return true;
2075
}
2076

2077
bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078
                                MachineMemOperand *MMO) {
2079
  if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080
    return false;
2081

2082
  // Simplify this down to something we can handle.
2083
  if (!simplifyAddress(Addr, VT))
2084
    return false;
2085

2086
  unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087
  if (!ScaleFactor)
2088
    llvm_unreachable("Unexpected value type.");
2089

2090
  // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091
  // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092
  bool UseScaled = true;
2093
  if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094
    UseScaled = false;
2095
    ScaleFactor = 1;
2096
  }
2097

2098
  static const unsigned OpcTable[4][6] = {
2099
    { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2100
      AArch64::STURSi,   AArch64::STURDi },
2101
    { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2102
      AArch64::STRSui,   AArch64::STRDui },
2103
    { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104
      AArch64::STRSroX,  AArch64::STRDroX },
2105
    { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106
      AArch64::STRSroW,  AArch64::STRDroW }
2107
  };
2108

2109
  unsigned Opc;
2110
  bool VTIsi1 = false;
2111
  bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112
                      Addr.getOffsetReg();
2113
  unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114
  if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115
      Addr.getExtendType() == AArch64_AM::SXTW)
2116
    Idx++;
2117

2118
  switch (VT.SimpleTy) {
2119
  default: llvm_unreachable("Unexpected value type.");
2120
  case MVT::i1:  VTIsi1 = true; [[fallthrough]];
2121
  case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2122
  case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123
  case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124
  case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125
  case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126
  case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127
  }
2128

2129
  // Storing an i1 requires special handling.
2130
  if (VTIsi1 && SrcReg != AArch64::WZR) {
2131
    unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132
    assert(ANDReg && "Unexpected AND instruction emission failure.");
2133
    SrcReg = ANDReg;
2134
  }
2135
  // Create the base instruction, then add the operands.
2136
  const MCInstrDesc &II = TII.get(Opc);
2137
  SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138
  MachineInstrBuilder MIB =
2139
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140
  addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141

2142
  return true;
2143
}
2144

2145
bool AArch64FastISel::selectStore(const Instruction *I) {
2146
  MVT VT;
2147
  const Value *Op0 = I->getOperand(0);
2148
  // Verify we have a legal type before going any further.  Currently, we handle
2149
  // simple types that will directly fit in a register (i32/f32/i64/f64) or
2150
  // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151
  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152
    return false;
2153

2154
  const Value *PtrV = I->getOperand(1);
2155
  if (TLI.supportSwiftError()) {
2156
    // Swifterror values can come from either a function parameter with
2157
    // swifterror attribute or an alloca with swifterror attribute.
2158
    if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159
      if (Arg->hasSwiftErrorAttr())
2160
        return false;
2161
    }
2162

2163
    if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164
      if (Alloca->isSwiftError())
2165
        return false;
2166
    }
2167
  }
2168

2169
  // Get the value to be stored into a register. Use the zero register directly
2170
  // when possible to avoid an unnecessary copy and a wasted register.
2171
  unsigned SrcReg = 0;
2172
  if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173
    if (CI->isZero())
2174
      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175
  } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176
    if (CF->isZero() && !CF->isNegative()) {
2177
      VT = MVT::getIntegerVT(VT.getSizeInBits());
2178
      SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179
    }
2180
  }
2181

2182
  if (!SrcReg)
2183
    SrcReg = getRegForValue(Op0);
2184

2185
  if (!SrcReg)
2186
    return false;
2187

2188
  auto *SI = cast<StoreInst>(I);
2189

2190
  // Try to emit a STLR for seq_cst/release.
2191
  if (SI->isAtomic()) {
2192
    AtomicOrdering Ord = SI->getOrdering();
2193
    // The non-atomic instructions are sufficient for relaxed stores.
2194
    if (isReleaseOrStronger(Ord)) {
2195
      // The STLR addressing mode only supports a base reg; pass that directly.
2196
      Register AddrReg = getRegForValue(PtrV);
2197
      return emitStoreRelease(VT, SrcReg, AddrReg,
2198
                              createMachineMemOperandFor(I));
2199
    }
2200
  }
2201

2202
  // See if we can handle this address.
2203
  Address Addr;
2204
  if (!computeAddress(PtrV, Addr, Op0->getType()))
2205
    return false;
2206

2207
  if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208
    return false;
2209
  return true;
2210
}
2211

2212
static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2213
  switch (Pred) {
2214
  case CmpInst::FCMP_ONE:
2215
  case CmpInst::FCMP_UEQ:
2216
  default:
2217
    // AL is our "false" for now. The other two need more compares.
2218
    return AArch64CC::AL;
2219
  case CmpInst::ICMP_EQ:
2220
  case CmpInst::FCMP_OEQ:
2221
    return AArch64CC::EQ;
2222
  case CmpInst::ICMP_SGT:
2223
  case CmpInst::FCMP_OGT:
2224
    return AArch64CC::GT;
2225
  case CmpInst::ICMP_SGE:
2226
  case CmpInst::FCMP_OGE:
2227
    return AArch64CC::GE;
2228
  case CmpInst::ICMP_UGT:
2229
  case CmpInst::FCMP_UGT:
2230
    return AArch64CC::HI;
2231
  case CmpInst::FCMP_OLT:
2232
    return AArch64CC::MI;
2233
  case CmpInst::ICMP_ULE:
2234
  case CmpInst::FCMP_OLE:
2235
    return AArch64CC::LS;
2236
  case CmpInst::FCMP_ORD:
2237
    return AArch64CC::VC;
2238
  case CmpInst::FCMP_UNO:
2239
    return AArch64CC::VS;
2240
  case CmpInst::FCMP_UGE:
2241
    return AArch64CC::PL;
2242
  case CmpInst::ICMP_SLT:
2243
  case CmpInst::FCMP_ULT:
2244
    return AArch64CC::LT;
2245
  case CmpInst::ICMP_SLE:
2246
  case CmpInst::FCMP_ULE:
2247
    return AArch64CC::LE;
2248
  case CmpInst::FCMP_UNE:
2249
  case CmpInst::ICMP_NE:
2250
    return AArch64CC::NE;
2251
  case CmpInst::ICMP_UGE:
2252
    return AArch64CC::HS;
2253
  case CmpInst::ICMP_ULT:
2254
    return AArch64CC::LO;
2255
  }
2256
}
2257

2258
/// Try to emit a combined compare-and-branch instruction.
2259
bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260
  // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261
  // will not be produced, as they are conditional branch instructions that do
2262
  // not set flags.
2263
  if (FuncInfo.MF->getFunction().hasFnAttribute(
2264
          Attribute::SpeculativeLoadHardening))
2265
    return false;
2266

2267
  assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268
  const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270

2271
  const Value *LHS = CI->getOperand(0);
2272
  const Value *RHS = CI->getOperand(1);
2273

2274
  MVT VT;
2275
  if (!isTypeSupported(LHS->getType(), VT))
2276
    return false;
2277

2278
  unsigned BW = VT.getSizeInBits();
2279
  if (BW > 64)
2280
    return false;
2281

2282
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2283
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2284

2285
  // Try to take advantage of fallthrough opportunities.
2286
  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287
    std::swap(TBB, FBB);
2288
    Predicate = CmpInst::getInversePredicate(Predicate);
2289
  }
2290

2291
  int TestBit = -1;
2292
  bool IsCmpNE;
2293
  switch (Predicate) {
2294
  default:
2295
    return false;
2296
  case CmpInst::ICMP_EQ:
2297
  case CmpInst::ICMP_NE:
2298
    if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299
      std::swap(LHS, RHS);
2300

2301
    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302
      return false;
2303

2304
    if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305
      if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306
        const Value *AndLHS = AI->getOperand(0);
2307
        const Value *AndRHS = AI->getOperand(1);
2308

2309
        if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310
          if (C->getValue().isPowerOf2())
2311
            std::swap(AndLHS, AndRHS);
2312

2313
        if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314
          if (C->getValue().isPowerOf2()) {
2315
            TestBit = C->getValue().logBase2();
2316
            LHS = AndLHS;
2317
          }
2318
      }
2319

2320
    if (VT == MVT::i1)
2321
      TestBit = 0;
2322

2323
    IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324
    break;
2325
  case CmpInst::ICMP_SLT:
2326
  case CmpInst::ICMP_SGE:
2327
    if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328
      return false;
2329

2330
    TestBit = BW - 1;
2331
    IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332
    break;
2333
  case CmpInst::ICMP_SGT:
2334
  case CmpInst::ICMP_SLE:
2335
    if (!isa<ConstantInt>(RHS))
2336
      return false;
2337

2338
    if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339
      return false;
2340

2341
    TestBit = BW - 1;
2342
    IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343
    break;
2344
  } // end switch
2345

2346
  static const unsigned OpcTable[2][2][2] = {
2347
    { {AArch64::CBZW,  AArch64::CBZX },
2348
      {AArch64::CBNZW, AArch64::CBNZX} },
2349
    { {AArch64::TBZW,  AArch64::TBZX },
2350
      {AArch64::TBNZW, AArch64::TBNZX} }
2351
  };
2352

2353
  bool IsBitTest = TestBit != -1;
2354
  bool Is64Bit = BW == 64;
2355
  if (TestBit < 32 && TestBit >= 0)
2356
    Is64Bit = false;
2357

2358
  unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359
  const MCInstrDesc &II = TII.get(Opc);
2360

2361
  Register SrcReg = getRegForValue(LHS);
2362
  if (!SrcReg)
2363
    return false;
2364

2365
  if (BW == 64 && !Is64Bit)
2366
    SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367

2368
  if ((BW < 32) && !IsBitTest)
2369
    SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370

2371
  // Emit the combined compare and branch instruction.
2372
  SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2373
  MachineInstrBuilder MIB =
2374
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375
          .addReg(SrcReg);
2376
  if (IsBitTest)
2377
    MIB.addImm(TestBit);
2378
  MIB.addMBB(TBB);
2379

2380
  finishCondBranch(BI->getParent(), TBB, FBB);
2381
  return true;
2382
}
2383

2384
bool AArch64FastISel::selectBranch(const Instruction *I) {
2385
  const BranchInst *BI = cast<BranchInst>(I);
2386
  if (BI->isUnconditional()) {
2387
    MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2388
    fastEmitBranch(MSucc, BI->getDebugLoc());
2389
    return true;
2390
  }
2391

2392
  MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393
  MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394

2395
  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396
    if (CI->hasOneUse() && isValueAvailable(CI)) {
2397
      // Try to optimize or fold the cmp.
2398
      CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399
      switch (Predicate) {
2400
      default:
2401
        break;
2402
      case CmpInst::FCMP_FALSE:
2403
        fastEmitBranch(FBB, MIMD.getDL());
2404
        return true;
2405
      case CmpInst::FCMP_TRUE:
2406
        fastEmitBranch(TBB, MIMD.getDL());
2407
        return true;
2408
      }
2409

2410
      // Try to emit a combined compare-and-branch first.
2411
      if (emitCompareAndBranch(BI))
2412
        return true;
2413

2414
      // Try to take advantage of fallthrough opportunities.
2415
      if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416
        std::swap(TBB, FBB);
2417
        Predicate = CmpInst::getInversePredicate(Predicate);
2418
      }
2419

2420
      // Emit the cmp.
2421
      if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422
        return false;
2423

2424
      // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425
      // instruction.
2426
      AArch64CC::CondCode CC = getCompareCC(Predicate);
2427
      AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2428
      switch (Predicate) {
2429
      default:
2430
        break;
2431
      case CmpInst::FCMP_UEQ:
2432
        ExtraCC = AArch64CC::EQ;
2433
        CC = AArch64CC::VS;
2434
        break;
2435
      case CmpInst::FCMP_ONE:
2436
        ExtraCC = AArch64CC::MI;
2437
        CC = AArch64CC::GT;
2438
        break;
2439
      }
2440
      assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441

2442
      // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443
      if (ExtraCC != AArch64CC::AL) {
2444
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445
            .addImm(ExtraCC)
2446
            .addMBB(TBB);
2447
      }
2448

2449
      // Emit the branch.
2450
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451
          .addImm(CC)
2452
          .addMBB(TBB);
2453

2454
      finishCondBranch(BI->getParent(), TBB, FBB);
2455
      return true;
2456
    }
2457
  } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458
    uint64_t Imm = CI->getZExtValue();
2459
    MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461
        .addMBB(Target);
2462

2463
    // Obtain the branch probability and add the target to the successor list.
2464
    if (FuncInfo.BPI) {
2465
      auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466
          BI->getParent(), Target->getBasicBlock());
2467
      FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468
    } else
2469
      FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470
    return true;
2471
  } else {
2472
    AArch64CC::CondCode CC = AArch64CC::NE;
2473
    if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474
      // Fake request the condition, otherwise the intrinsic might be completely
2475
      // optimized away.
2476
      Register CondReg = getRegForValue(BI->getCondition());
2477
      if (!CondReg)
2478
        return false;
2479

2480
      // Emit the branch.
2481
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482
        .addImm(CC)
2483
        .addMBB(TBB);
2484

2485
      finishCondBranch(BI->getParent(), TBB, FBB);
2486
      return true;
2487
    }
2488
  }
2489

2490
  Register CondReg = getRegForValue(BI->getCondition());
2491
  if (CondReg == 0)
2492
    return false;
2493

2494
  // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495
  unsigned Opcode = AArch64::TBNZW;
2496
  if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497
    std::swap(TBB, FBB);
2498
    Opcode = AArch64::TBZW;
2499
  }
2500

2501
  const MCInstrDesc &II = TII.get(Opcode);
2502
  Register ConstrainedCondReg
2503
    = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505
      .addReg(ConstrainedCondReg)
2506
      .addImm(0)
2507
      .addMBB(TBB);
2508

2509
  finishCondBranch(BI->getParent(), TBB, FBB);
2510
  return true;
2511
}
2512

2513
bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514
  const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515
  Register AddrReg = getRegForValue(BI->getOperand(0));
2516
  if (AddrReg == 0)
2517
    return false;
2518

2519
  // Authenticated indirectbr is not implemented yet.
2520
  if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2521
    return false;
2522

2523
  // Emit the indirect branch.
2524
  const MCInstrDesc &II = TII.get(AArch64::BR);
2525
  AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2526
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2527

2528
  // Make sure the CFG is up-to-date.
2529
  for (const auto *Succ : BI->successors())
2530
    FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531

2532
  return true;
2533
}
2534

2535
bool AArch64FastISel::selectCmp(const Instruction *I) {
2536
  const CmpInst *CI = cast<CmpInst>(I);
2537

2538
  // Vectors of i1 are weird: bail out.
2539
  if (CI->getType()->isVectorTy())
2540
    return false;
2541

2542
  // Try to optimize or fold the cmp.
2543
  CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544
  unsigned ResultReg = 0;
2545
  switch (Predicate) {
2546
  default:
2547
    break;
2548
  case CmpInst::FCMP_FALSE:
2549
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2551
            TII.get(TargetOpcode::COPY), ResultReg)
2552
        .addReg(AArch64::WZR, getKillRegState(true));
2553
    break;
2554
  case CmpInst::FCMP_TRUE:
2555
    ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556
    break;
2557
  }
2558

2559
  if (ResultReg) {
2560
    updateValueMap(I, ResultReg);
2561
    return true;
2562
  }
2563

2564
  // Emit the cmp.
2565
  if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566
    return false;
2567

2568
  ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569

2570
  // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571
  // condition codes are inverted, because they are used by CSINC.
2572
  static unsigned CondCodeTable[2][2] = {
2573
    { AArch64CC::NE, AArch64CC::VC },
2574
    { AArch64CC::PL, AArch64CC::LE }
2575
  };
2576
  unsigned *CondCodes = nullptr;
2577
  switch (Predicate) {
2578
  default:
2579
    break;
2580
  case CmpInst::FCMP_UEQ:
2581
    CondCodes = &CondCodeTable[0][0];
2582
    break;
2583
  case CmpInst::FCMP_ONE:
2584
    CondCodes = &CondCodeTable[1][0];
2585
    break;
2586
  }
2587

2588
  if (CondCodes) {
2589
    Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591
            TmpReg1)
2592
        .addReg(AArch64::WZR, getKillRegState(true))
2593
        .addReg(AArch64::WZR, getKillRegState(true))
2594
        .addImm(CondCodes[0]);
2595
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2596
            ResultReg)
2597
        .addReg(TmpReg1, getKillRegState(true))
2598
        .addReg(AArch64::WZR, getKillRegState(true))
2599
        .addImm(CondCodes[1]);
2600

2601
    updateValueMap(I, ResultReg);
2602
    return true;
2603
  }
2604

2605
  // Now set a register based on the comparison.
2606
  AArch64CC::CondCode CC = getCompareCC(Predicate);
2607
  assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608
  AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2610
          ResultReg)
2611
      .addReg(AArch64::WZR, getKillRegState(true))
2612
      .addReg(AArch64::WZR, getKillRegState(true))
2613
      .addImm(invertedCC);
2614

2615
  updateValueMap(I, ResultReg);
2616
  return true;
2617
}
2618

2619
/// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620
/// value.
2621
bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622
  if (!SI->getType()->isIntegerTy(1))
2623
    return false;
2624

2625
  const Value *Src1Val, *Src2Val;
2626
  unsigned Opc = 0;
2627
  bool NeedExtraOp = false;
2628
  if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629
    if (CI->isOne()) {
2630
      Src1Val = SI->getCondition();
2631
      Src2Val = SI->getFalseValue();
2632
      Opc = AArch64::ORRWrr;
2633
    } else {
2634
      assert(CI->isZero());
2635
      Src1Val = SI->getFalseValue();
2636
      Src2Val = SI->getCondition();
2637
      Opc = AArch64::BICWrr;
2638
    }
2639
  } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640
    if (CI->isOne()) {
2641
      Src1Val = SI->getCondition();
2642
      Src2Val = SI->getTrueValue();
2643
      Opc = AArch64::ORRWrr;
2644
      NeedExtraOp = true;
2645
    } else {
2646
      assert(CI->isZero());
2647
      Src1Val = SI->getCondition();
2648
      Src2Val = SI->getTrueValue();
2649
      Opc = AArch64::ANDWrr;
2650
    }
2651
  }
2652

2653
  if (!Opc)
2654
    return false;
2655

2656
  Register Src1Reg = getRegForValue(Src1Val);
2657
  if (!Src1Reg)
2658
    return false;
2659

2660
  Register Src2Reg = getRegForValue(Src2Val);
2661
  if (!Src2Reg)
2662
    return false;
2663

2664
  if (NeedExtraOp)
2665
    Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2666

2667
  Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2668
                                       Src2Reg);
2669
  updateValueMap(SI, ResultReg);
2670
  return true;
2671
}
2672

2673
bool AArch64FastISel::selectSelect(const Instruction *I) {
2674
  assert(isa<SelectInst>(I) && "Expected a select instruction.");
2675
  MVT VT;
2676
  if (!isTypeSupported(I->getType(), VT))
2677
    return false;
2678

2679
  unsigned Opc;
2680
  const TargetRegisterClass *RC;
2681
  switch (VT.SimpleTy) {
2682
  default:
2683
    return false;
2684
  case MVT::i1:
2685
  case MVT::i8:
2686
  case MVT::i16:
2687
  case MVT::i32:
2688
    Opc = AArch64::CSELWr;
2689
    RC = &AArch64::GPR32RegClass;
2690
    break;
2691
  case MVT::i64:
2692
    Opc = AArch64::CSELXr;
2693
    RC = &AArch64::GPR64RegClass;
2694
    break;
2695
  case MVT::f32:
2696
    Opc = AArch64::FCSELSrrr;
2697
    RC = &AArch64::FPR32RegClass;
2698
    break;
2699
  case MVT::f64:
2700
    Opc = AArch64::FCSELDrrr;
2701
    RC = &AArch64::FPR64RegClass;
2702
    break;
2703
  }
2704

2705
  const SelectInst *SI = cast<SelectInst>(I);
2706
  const Value *Cond = SI->getCondition();
2707
  AArch64CC::CondCode CC = AArch64CC::NE;
2708
  AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2709

2710
  if (optimizeSelect(SI))
2711
    return true;
2712

2713
  // Try to pickup the flags, so we don't have to emit another compare.
2714
  if (foldXALUIntrinsic(CC, I, Cond)) {
2715
    // Fake request the condition to force emission of the XALU intrinsic.
2716
    Register CondReg = getRegForValue(Cond);
2717
    if (!CondReg)
2718
      return false;
2719
  } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2720
             isValueAvailable(Cond)) {
2721
    const auto *Cmp = cast<CmpInst>(Cond);
2722
    // Try to optimize or fold the cmp.
2723
    CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2724
    const Value *FoldSelect = nullptr;
2725
    switch (Predicate) {
2726
    default:
2727
      break;
2728
    case CmpInst::FCMP_FALSE:
2729
      FoldSelect = SI->getFalseValue();
2730
      break;
2731
    case CmpInst::FCMP_TRUE:
2732
      FoldSelect = SI->getTrueValue();
2733
      break;
2734
    }
2735

2736
    if (FoldSelect) {
2737
      Register SrcReg = getRegForValue(FoldSelect);
2738
      if (!SrcReg)
2739
        return false;
2740

2741
      updateValueMap(I, SrcReg);
2742
      return true;
2743
    }
2744

2745
    // Emit the cmp.
2746
    if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2747
      return false;
2748

2749
    // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2750
    CC = getCompareCC(Predicate);
2751
    switch (Predicate) {
2752
    default:
2753
      break;
2754
    case CmpInst::FCMP_UEQ:
2755
      ExtraCC = AArch64CC::EQ;
2756
      CC = AArch64CC::VS;
2757
      break;
2758
    case CmpInst::FCMP_ONE:
2759
      ExtraCC = AArch64CC::MI;
2760
      CC = AArch64CC::GT;
2761
      break;
2762
    }
2763
    assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2764
  } else {
2765
    Register CondReg = getRegForValue(Cond);
2766
    if (!CondReg)
2767
      return false;
2768

2769
    const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2770
    CondReg = constrainOperandRegClass(II, CondReg, 1);
2771

2772
    // Emit a TST instruction (ANDS wzr, reg, #imm).
2773
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2774
            AArch64::WZR)
2775
        .addReg(CondReg)
2776
        .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2777
  }
2778

2779
  Register Src1Reg = getRegForValue(SI->getTrueValue());
2780
  Register Src2Reg = getRegForValue(SI->getFalseValue());
2781

2782
  if (!Src1Reg || !Src2Reg)
2783
    return false;
2784

2785
  if (ExtraCC != AArch64CC::AL)
2786
    Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2787

2788
  Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2789
  updateValueMap(I, ResultReg);
2790
  return true;
2791
}
2792

2793
bool AArch64FastISel::selectFPExt(const Instruction *I) {
2794
  Value *V = I->getOperand(0);
2795
  if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2796
    return false;
2797

2798
  Register Op = getRegForValue(V);
2799
  if (Op == 0)
2800
    return false;
2801

2802
  Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2803
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2804
          ResultReg).addReg(Op);
2805
  updateValueMap(I, ResultReg);
2806
  return true;
2807
}
2808

2809
bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2810
  Value *V = I->getOperand(0);
2811
  if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2812
    return false;
2813

2814
  Register Op = getRegForValue(V);
2815
  if (Op == 0)
2816
    return false;
2817

2818
  Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2819
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2820
          ResultReg).addReg(Op);
2821
  updateValueMap(I, ResultReg);
2822
  return true;
2823
}
2824

2825
// FPToUI and FPToSI
2826
bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2827
  MVT DestVT;
2828
  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2829
    return false;
2830

2831
  Register SrcReg = getRegForValue(I->getOperand(0));
2832
  if (SrcReg == 0)
2833
    return false;
2834

2835
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2836
  if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2837
    return false;
2838

2839
  unsigned Opc;
2840
  if (SrcVT == MVT::f64) {
2841
    if (Signed)
2842
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2843
    else
2844
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2845
  } else {
2846
    if (Signed)
2847
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2848
    else
2849
      Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2850
  }
2851
  Register ResultReg = createResultReg(
2852
      DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2853
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2854
      .addReg(SrcReg);
2855
  updateValueMap(I, ResultReg);
2856
  return true;
2857
}
2858

2859
bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2860
  MVT DestVT;
2861
  if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2862
    return false;
2863
  // Let regular ISEL handle FP16
2864
  if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2865
    return false;
2866

2867
  assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2868
         "Unexpected value type.");
2869

2870
  Register SrcReg = getRegForValue(I->getOperand(0));
2871
  if (!SrcReg)
2872
    return false;
2873

2874
  EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2875

2876
  // Handle sign-extension.
2877
  if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2878
    SrcReg =
2879
        emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2880
    if (!SrcReg)
2881
      return false;
2882
  }
2883

2884
  unsigned Opc;
2885
  if (SrcVT == MVT::i64) {
2886
    if (Signed)
2887
      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2888
    else
2889
      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2890
  } else {
2891
    if (Signed)
2892
      Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2893
    else
2894
      Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2895
  }
2896

2897
  Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2898
  updateValueMap(I, ResultReg);
2899
  return true;
2900
}
2901

2902
bool AArch64FastISel::fastLowerArguments() {
2903
  if (!FuncInfo.CanLowerReturn)
2904
    return false;
2905

2906
  const Function *F = FuncInfo.Fn;
2907
  if (F->isVarArg())
2908
    return false;
2909

2910
  CallingConv::ID CC = F->getCallingConv();
2911
  if (CC != CallingConv::C && CC != CallingConv::Swift)
2912
    return false;
2913

2914
  if (Subtarget->hasCustomCallingConv())
2915
    return false;
2916

2917
  // Only handle simple cases of up to 8 GPR and FPR each.
2918
  unsigned GPRCnt = 0;
2919
  unsigned FPRCnt = 0;
2920
  for (auto const &Arg : F->args()) {
2921
    if (Arg.hasAttribute(Attribute::ByVal) ||
2922
        Arg.hasAttribute(Attribute::InReg) ||
2923
        Arg.hasAttribute(Attribute::StructRet) ||
2924
        Arg.hasAttribute(Attribute::SwiftSelf) ||
2925
        Arg.hasAttribute(Attribute::SwiftAsync) ||
2926
        Arg.hasAttribute(Attribute::SwiftError) ||
2927
        Arg.hasAttribute(Attribute::Nest))
2928
      return false;
2929

2930
    Type *ArgTy = Arg.getType();
2931
    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2932
      return false;
2933

2934
    EVT ArgVT = TLI.getValueType(DL, ArgTy);
2935
    if (!ArgVT.isSimple())
2936
      return false;
2937

2938
    MVT VT = ArgVT.getSimpleVT().SimpleTy;
2939
    if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2940
      return false;
2941

2942
    if (VT.isVector() &&
2943
        (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2944
      return false;
2945

2946
    if (VT >= MVT::i1 && VT <= MVT::i64)
2947
      ++GPRCnt;
2948
    else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2949
             VT.is128BitVector())
2950
      ++FPRCnt;
2951
    else
2952
      return false;
2953

2954
    if (GPRCnt > 8 || FPRCnt > 8)
2955
      return false;
2956
  }
2957

2958
  static const MCPhysReg Registers[6][8] = {
2959
    { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2960
      AArch64::W5, AArch64::W6, AArch64::W7 },
2961
    { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2962
      AArch64::X5, AArch64::X6, AArch64::X7 },
2963
    { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2964
      AArch64::H5, AArch64::H6, AArch64::H7 },
2965
    { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2966
      AArch64::S5, AArch64::S6, AArch64::S7 },
2967
    { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2968
      AArch64::D5, AArch64::D6, AArch64::D7 },
2969
    { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2970
      AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2971
  };
2972

2973
  unsigned GPRIdx = 0;
2974
  unsigned FPRIdx = 0;
2975
  for (auto const &Arg : F->args()) {
2976
    MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2977
    unsigned SrcReg;
2978
    const TargetRegisterClass *RC;
2979
    if (VT >= MVT::i1 && VT <= MVT::i32) {
2980
      SrcReg = Registers[0][GPRIdx++];
2981
      RC = &AArch64::GPR32RegClass;
2982
      VT = MVT::i32;
2983
    } else if (VT == MVT::i64) {
2984
      SrcReg = Registers[1][GPRIdx++];
2985
      RC = &AArch64::GPR64RegClass;
2986
    } else if (VT == MVT::f16 || VT == MVT::bf16) {
2987
      SrcReg = Registers[2][FPRIdx++];
2988
      RC = &AArch64::FPR16RegClass;
2989
    } else if (VT ==  MVT::f32) {
2990
      SrcReg = Registers[3][FPRIdx++];
2991
      RC = &AArch64::FPR32RegClass;
2992
    } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2993
      SrcReg = Registers[4][FPRIdx++];
2994
      RC = &AArch64::FPR64RegClass;
2995
    } else if (VT.is128BitVector()) {
2996
      SrcReg = Registers[5][FPRIdx++];
2997
      RC = &AArch64::FPR128RegClass;
2998
    } else
2999
      llvm_unreachable("Unexpected value type.");
3000

3001
    Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3002
    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3003
    // Without this, EmitLiveInCopies may eliminate the livein if its only
3004
    // use is a bitcast (which isn't turned into an instruction).
3005
    Register ResultReg = createResultReg(RC);
3006
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3007
            TII.get(TargetOpcode::COPY), ResultReg)
3008
        .addReg(DstReg, getKillRegState(true));
3009
    updateValueMap(&Arg, ResultReg);
3010
  }
3011
  return true;
3012
}
3013

3014
bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3015
                                      SmallVectorImpl<MVT> &OutVTs,
3016
                                      unsigned &NumBytes) {
3017
  CallingConv::ID CC = CLI.CallConv;
3018
  SmallVector<CCValAssign, 16> ArgLocs;
3019
  CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3020
  CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3021

3022
  // Get a count of how many bytes are to be pushed on the stack.
3023
  NumBytes = CCInfo.getStackSize();
3024

3025
  // Issue CALLSEQ_START
3026
  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028
    .addImm(NumBytes).addImm(0);
3029

3030
  // Process the args.
3031
  for (CCValAssign &VA : ArgLocs) {
3032
    const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033
    MVT ArgVT = OutVTs[VA.getValNo()];
3034

3035
    Register ArgReg = getRegForValue(ArgVal);
3036
    if (!ArgReg)
3037
      return false;
3038

3039
    // Handle arg promotion: SExt, ZExt, AExt.
3040
    switch (VA.getLocInfo()) {
3041
    case CCValAssign::Full:
3042
      break;
3043
    case CCValAssign::SExt: {
3044
      MVT DestVT = VA.getLocVT();
3045
      MVT SrcVT = ArgVT;
3046
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047
      if (!ArgReg)
3048
        return false;
3049
      break;
3050
    }
3051
    case CCValAssign::AExt:
3052
    // Intentional fall-through.
3053
    case CCValAssign::ZExt: {
3054
      MVT DestVT = VA.getLocVT();
3055
      MVT SrcVT = ArgVT;
3056
      ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057
      if (!ArgReg)
3058
        return false;
3059
      break;
3060
    }
3061
    default:
3062
      llvm_unreachable("Unknown arg promotion!");
3063
    }
3064

3065
    // Now copy/store arg to correct locations.
3066
    if (VA.isRegLoc() && !VA.needsCustom()) {
3067
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068
              TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069
      CLI.OutRegs.push_back(VA.getLocReg());
3070
    } else if (VA.needsCustom()) {
3071
      // FIXME: Handle custom args.
3072
      return false;
3073
    } else {
3074
      assert(VA.isMemLoc() && "Assuming store on stack.");
3075

3076
      // Don't emit stores for undef values.
3077
      if (isa<UndefValue>(ArgVal))
3078
        continue;
3079

3080
      // Need to store on the stack.
3081
      unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082

3083
      unsigned BEAlign = 0;
3084
      if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085
        BEAlign = 8 - ArgSize;
3086

3087
      Address Addr;
3088
      Addr.setKind(Address::RegBase);
3089
      Addr.setReg(AArch64::SP);
3090
      Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091

3092
      Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093
      MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094
          MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095
          MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096

3097
      if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098
        return false;
3099
    }
3100
  }
3101
  return true;
3102
}
3103

3104
bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105
  CallingConv::ID CC = CLI.CallConv;
3106

3107
  // Issue CALLSEQ_END
3108
  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110
    .addImm(NumBytes).addImm(0);
3111

3112
  // Now the return values.
3113
  SmallVector<CCValAssign, 16> RVLocs;
3114
  CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115
  CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116

3117
  Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119
    CCValAssign &VA = RVLocs[i];
3120
    MVT CopyVT = VA.getValVT();
3121
    unsigned CopyReg = ResultReg + i;
3122

3123
    // TODO: Handle big-endian results
3124
    if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125
      return false;
3126

3127
    // Copy result out of their specified physreg.
3128
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129
            CopyReg)
3130
        .addReg(VA.getLocReg());
3131
    CLI.InRegs.push_back(VA.getLocReg());
3132
  }
3133

3134
  CLI.ResultReg = ResultReg;
3135
  CLI.NumResultRegs = RVLocs.size();
3136

3137
  return true;
3138
}
3139

3140
bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141
  CallingConv::ID CC  = CLI.CallConv;
3142
  bool IsTailCall     = CLI.IsTailCall;
3143
  bool IsVarArg       = CLI.IsVarArg;
3144
  const Value *Callee = CLI.Callee;
3145
  MCSymbol *Symbol = CLI.Symbol;
3146

3147
  if (!Callee && !Symbol)
3148
    return false;
3149

3150
  // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151
  // a bti instruction following the call.
3152
  if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153
      !Subtarget->noBTIAtReturnTwice() &&
3154
      MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155
    return false;
3156

3157
  // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158
  if (CLI.CB && CLI.CB->isIndirectCall() &&
3159
      CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160
    return false;
3161

3162
  // Allow SelectionDAG isel to handle tail calls.
3163
  if (IsTailCall)
3164
    return false;
3165

3166
  // FIXME: we could and should support this, but for now correctness at -O0 is
3167
  // more important.
3168
  if (Subtarget->isTargetILP32())
3169
    return false;
3170

3171
  CodeModel::Model CM = TM.getCodeModel();
3172
  // Only support the small-addressing and large code models.
3173
  if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174
    return false;
3175

3176
  // FIXME: Add large code model support for ELF.
3177
  if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178
    return false;
3179

3180
  // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181
  // attribute. Check "RtLibUseGOT" instead.
3182
  if (MF->getFunction().getParent()->getRtLibUseGOT())
3183
    return false;
3184

3185
  // Let SDISel handle vararg functions.
3186
  if (IsVarArg)
3187
    return false;
3188

3189
  if (Subtarget->isWindowsArm64EC())
3190
    return false;
3191

3192
  for (auto Flag : CLI.OutFlags)
3193
    if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194
        Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195
      return false;
3196

3197
  // Set up the argument vectors.
3198
  SmallVector<MVT, 16> OutVTs;
3199
  OutVTs.reserve(CLI.OutVals.size());
3200

3201
  for (auto *Val : CLI.OutVals) {
3202
    MVT VT;
3203
    if (!isTypeLegal(Val->getType(), VT) &&
3204
        !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205
      return false;
3206

3207
    // We don't handle vector parameters yet.
3208
    if (VT.isVector() || VT.getSizeInBits() > 64)
3209
      return false;
3210

3211
    OutVTs.push_back(VT);
3212
  }
3213

3214
  Address Addr;
3215
  if (Callee && !computeCallAddress(Callee, Addr))
3216
    return false;
3217

3218
  // The weak function target may be zero; in that case we must use indirect
3219
  // addressing via a stub on windows as it may be out of range for a
3220
  // PC-relative jump.
3221
  if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3222
      Addr.getGlobalValue()->hasExternalWeakLinkage())
3223
    return false;
3224

3225
  // Handle the arguments now that we've gotten them.
3226
  unsigned NumBytes;
3227
  if (!processCallArgs(CLI, OutVTs, NumBytes))
3228
    return false;
3229

3230
  const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3231
  if (RegInfo->isAnyArgRegReserved(*MF))
3232
    RegInfo->emitReservedArgRegCallError(*MF);
3233

3234
  // Issue the call.
3235
  MachineInstrBuilder MIB;
3236
  if (Subtarget->useSmallAddressing()) {
3237
    const MCInstrDesc &II =
3238
        TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3239
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3240
    if (Symbol)
3241
      MIB.addSym(Symbol, 0);
3242
    else if (Addr.getGlobalValue())
3243
      MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3244
    else if (Addr.getReg()) {
3245
      Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3246
      MIB.addReg(Reg);
3247
    } else
3248
      return false;
3249
  } else {
3250
    unsigned CallReg = 0;
3251
    if (Symbol) {
3252
      Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3253
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3254
              ADRPReg)
3255
          .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3256

3257
      CallReg = createResultReg(&AArch64::GPR64RegClass);
3258
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3259
              TII.get(AArch64::LDRXui), CallReg)
3260
          .addReg(ADRPReg)
3261
          .addSym(Symbol,
3262
                  AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3263
    } else if (Addr.getGlobalValue())
3264
      CallReg = materializeGV(Addr.getGlobalValue());
3265
    else if (Addr.getReg())
3266
      CallReg = Addr.getReg();
3267

3268
    if (!CallReg)
3269
      return false;
3270

3271
    const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3272
    CallReg = constrainOperandRegClass(II, CallReg, 0);
3273
    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3274
  }
3275

3276
  // Add implicit physical register uses to the call.
3277
  for (auto Reg : CLI.OutRegs)
3278
    MIB.addReg(Reg, RegState::Implicit);
3279

3280
  // Add a register mask with the call-preserved registers.
3281
  // Proper defs for return values will be added by setPhysRegsDeadExcept().
3282
  MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3283

3284
  CLI.Call = MIB;
3285

3286
  // Finish off the call including any return values.
3287
  return finishCall(CLI, NumBytes);
3288
}
3289

3290
bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3291
  if (Alignment)
3292
    return Len / Alignment->value() <= 4;
3293
  else
3294
    return Len < 32;
3295
}
3296

3297
bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3298
                                         uint64_t Len, MaybeAlign Alignment) {
3299
  // Make sure we don't bloat code by inlining very large memcpy's.
3300
  if (!isMemCpySmall(Len, Alignment))
3301
    return false;
3302

3303
  int64_t UnscaledOffset = 0;
3304
  Address OrigDest = Dest;
3305
  Address OrigSrc = Src;
3306

3307
  while (Len) {
3308
    MVT VT;
3309
    if (!Alignment || *Alignment >= 8) {
3310
      if (Len >= 8)
3311
        VT = MVT::i64;
3312
      else if (Len >= 4)
3313
        VT = MVT::i32;
3314
      else if (Len >= 2)
3315
        VT = MVT::i16;
3316
      else {
3317
        VT = MVT::i8;
3318
      }
3319
    } else {
3320
      assert(Alignment && "Alignment is set in this branch");
3321
      // Bound based on alignment.
3322
      if (Len >= 4 && *Alignment == 4)
3323
        VT = MVT::i32;
3324
      else if (Len >= 2 && *Alignment == 2)
3325
        VT = MVT::i16;
3326
      else {
3327
        VT = MVT::i8;
3328
      }
3329
    }
3330

3331
    unsigned ResultReg = emitLoad(VT, VT, Src);
3332
    if (!ResultReg)
3333
      return false;
3334

3335
    if (!emitStore(VT, ResultReg, Dest))
3336
      return false;
3337

3338
    int64_t Size = VT.getSizeInBits() / 8;
3339
    Len -= Size;
3340
    UnscaledOffset += Size;
3341

3342
    // We need to recompute the unscaled offset for each iteration.
3343
    Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3344
    Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3345
  }
3346

3347
  return true;
3348
}
3349

3350
/// Check if it is possible to fold the condition from the XALU intrinsic
3351
/// into the user. The condition code will only be updated on success.
3352
bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3353
                                        const Instruction *I,
3354
                                        const Value *Cond) {
3355
  if (!isa<ExtractValueInst>(Cond))
3356
    return false;
3357

3358
  const auto *EV = cast<ExtractValueInst>(Cond);
3359
  if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3360
    return false;
3361

3362
  const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3363
  MVT RetVT;
3364
  const Function *Callee = II->getCalledFunction();
3365
  Type *RetTy =
3366
  cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3367
  if (!isTypeLegal(RetTy, RetVT))
3368
    return false;
3369

3370
  if (RetVT != MVT::i32 && RetVT != MVT::i64)
3371
    return false;
3372

3373
  const Value *LHS = II->getArgOperand(0);
3374
  const Value *RHS = II->getArgOperand(1);
3375

3376
  // Canonicalize immediate to the RHS.
3377
  if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3378
    std::swap(LHS, RHS);
3379

3380
  // Simplify multiplies.
3381
  Intrinsic::ID IID = II->getIntrinsicID();
3382
  switch (IID) {
3383
  default:
3384
    break;
3385
  case Intrinsic::smul_with_overflow:
3386
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387
      if (C->getValue() == 2)
3388
        IID = Intrinsic::sadd_with_overflow;
3389
    break;
3390
  case Intrinsic::umul_with_overflow:
3391
    if (const auto *C = dyn_cast<ConstantInt>(RHS))
3392
      if (C->getValue() == 2)
3393
        IID = Intrinsic::uadd_with_overflow;
3394
    break;
3395
  }
3396

3397
  AArch64CC::CondCode TmpCC;
3398
  switch (IID) {
3399
  default:
3400
    return false;
3401
  case Intrinsic::sadd_with_overflow:
3402
  case Intrinsic::ssub_with_overflow:
3403
    TmpCC = AArch64CC::VS;
3404
    break;
3405
  case Intrinsic::uadd_with_overflow:
3406
    TmpCC = AArch64CC::HS;
3407
    break;
3408
  case Intrinsic::usub_with_overflow:
3409
    TmpCC = AArch64CC::LO;
3410
    break;
3411
  case Intrinsic::smul_with_overflow:
3412
  case Intrinsic::umul_with_overflow:
3413
    TmpCC = AArch64CC::NE;
3414
    break;
3415
  }
3416

3417
  // Check if both instructions are in the same basic block.
3418
  if (!isValueAvailable(II))
3419
    return false;
3420

3421
  // Make sure nothing is in the way
3422
  BasicBlock::const_iterator Start(I);
3423
  BasicBlock::const_iterator End(II);
3424
  for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3425
    // We only expect extractvalue instructions between the intrinsic and the
3426
    // instruction to be selected.
3427
    if (!isa<ExtractValueInst>(Itr))
3428
      return false;
3429

3430
    // Check that the extractvalue operand comes from the intrinsic.
3431
    const auto *EVI = cast<ExtractValueInst>(Itr);
3432
    if (EVI->getAggregateOperand() != II)
3433
      return false;
3434
  }
3435

3436
  CC = TmpCC;
3437
  return true;
3438
}
3439

3440
bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3441
  // FIXME: Handle more intrinsics.
3442
  switch (II->getIntrinsicID()) {
3443
  default: return false;
3444
  case Intrinsic::frameaddress: {
3445
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446
    MFI.setFrameAddressIsTaken(true);
3447

3448
    const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3449
    Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3450
    Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3451
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3452
            TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3453
    // Recursively load frame address
3454
    // ldr x0, [fp]
3455
    // ldr x0, [x0]
3456
    // ldr x0, [x0]
3457
    // ...
3458
    unsigned DestReg;
3459
    unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3460
    while (Depth--) {
3461
      DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3462
                                SrcReg, 0);
3463
      assert(DestReg && "Unexpected LDR instruction emission failure.");
3464
      SrcReg = DestReg;
3465
    }
3466

3467
    updateValueMap(II, SrcReg);
3468
    return true;
3469
  }
3470
  case Intrinsic::sponentry: {
3471
    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3472

3473
    // SP = FP + Fixed Object + 16
3474
    int FI = MFI.CreateFixedObject(4, 0, false);
3475
    Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3476
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3477
            TII.get(AArch64::ADDXri), ResultReg)
3478
            .addFrameIndex(FI)
3479
            .addImm(0)
3480
            .addImm(0);
3481

3482
    updateValueMap(II, ResultReg);
3483
    return true;
3484
  }
3485
  case Intrinsic::memcpy:
3486
  case Intrinsic::memmove: {
3487
    const auto *MTI = cast<MemTransferInst>(II);
3488
    // Don't handle volatile.
3489
    if (MTI->isVolatile())
3490
      return false;
3491

3492
    // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3493
    // we would emit dead code because we don't currently handle memmoves.
3494
    bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3495
    if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3496
      // Small memcpy's are common enough that we want to do them without a call
3497
      // if possible.
3498
      uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3499
      MaybeAlign Alignment;
3500
      if (MTI->getDestAlign() || MTI->getSourceAlign())
3501
        Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3502
                             MTI->getSourceAlign().valueOrOne());
3503
      if (isMemCpySmall(Len, Alignment)) {
3504
        Address Dest, Src;
3505
        if (!computeAddress(MTI->getRawDest(), Dest) ||
3506
            !computeAddress(MTI->getRawSource(), Src))
3507
          return false;
3508
        if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3509
          return true;
3510
      }
3511
    }
3512

3513
    if (!MTI->getLength()->getType()->isIntegerTy(64))
3514
      return false;
3515

3516
    if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3517
      // Fast instruction selection doesn't support the special
3518
      // address spaces.
3519
      return false;
3520

3521
    const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3522
    return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3523
  }
3524
  case Intrinsic::memset: {
3525
    const MemSetInst *MSI = cast<MemSetInst>(II);
3526
    // Don't handle volatile.
3527
    if (MSI->isVolatile())
3528
      return false;
3529

3530
    if (!MSI->getLength()->getType()->isIntegerTy(64))
3531
      return false;
3532

3533
    if (MSI->getDestAddressSpace() > 255)
3534
      // Fast instruction selection doesn't support the special
3535
      // address spaces.
3536
      return false;
3537

3538
    return lowerCallTo(II, "memset", II->arg_size() - 1);
3539
  }
3540
  case Intrinsic::sin:
3541
  case Intrinsic::cos:
3542
  case Intrinsic::tan:
3543
  case Intrinsic::pow: {
3544
    MVT RetVT;
3545
    if (!isTypeLegal(II->getType(), RetVT))
3546
      return false;
3547

3548
    if (RetVT != MVT::f32 && RetVT != MVT::f64)
3549
      return false;
3550

3551
    static const RTLIB::Libcall LibCallTable[4][2] = {
3552
        {RTLIB::SIN_F32, RTLIB::SIN_F64},
3553
        {RTLIB::COS_F32, RTLIB::COS_F64},
3554
        {RTLIB::TAN_F32, RTLIB::TAN_F64},
3555
        {RTLIB::POW_F32, RTLIB::POW_F64}};
3556
    RTLIB::Libcall LC;
3557
    bool Is64Bit = RetVT == MVT::f64;
3558
    switch (II->getIntrinsicID()) {
3559
    default:
3560
      llvm_unreachable("Unexpected intrinsic.");
3561
    case Intrinsic::sin:
3562
      LC = LibCallTable[0][Is64Bit];
3563
      break;
3564
    case Intrinsic::cos:
3565
      LC = LibCallTable[1][Is64Bit];
3566
      break;
3567
    case Intrinsic::tan:
3568
      LC = LibCallTable[2][Is64Bit];
3569
      break;
3570
    case Intrinsic::pow:
3571
      LC = LibCallTable[3][Is64Bit];
3572
      break;
3573
    }
3574

3575
    ArgListTy Args;
3576
    Args.reserve(II->arg_size());
3577

3578
    // Populate the argument list.
3579
    for (auto &Arg : II->args()) {
3580
      ArgListEntry Entry;
3581
      Entry.Val = Arg;
3582
      Entry.Ty = Arg->getType();
3583
      Args.push_back(Entry);
3584
    }
3585

3586
    CallLoweringInfo CLI;
3587
    MCContext &Ctx = MF->getContext();
3588
    CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3589
                  TLI.getLibcallName(LC), std::move(Args));
3590
    if (!lowerCallTo(CLI))
3591
      return false;
3592
    updateValueMap(II, CLI.ResultReg);
3593
    return true;
3594
  }
3595
  case Intrinsic::fabs: {
3596
    MVT VT;
3597
    if (!isTypeLegal(II->getType(), VT))
3598
      return false;
3599

3600
    unsigned Opc;
3601
    switch (VT.SimpleTy) {
3602
    default:
3603
      return false;
3604
    case MVT::f32:
3605
      Opc = AArch64::FABSSr;
3606
      break;
3607
    case MVT::f64:
3608
      Opc = AArch64::FABSDr;
3609
      break;
3610
    }
3611
    Register SrcReg = getRegForValue(II->getOperand(0));
3612
    if (!SrcReg)
3613
      return false;
3614
    Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3615
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3616
      .addReg(SrcReg);
3617
    updateValueMap(II, ResultReg);
3618
    return true;
3619
  }
3620
  case Intrinsic::trap:
3621
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622
        .addImm(1);
3623
    return true;
3624
  case Intrinsic::debugtrap:
3625
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3626
        .addImm(0xF000);
3627
    return true;
3628

3629
  case Intrinsic::sqrt: {
3630
    Type *RetTy = II->getCalledFunction()->getReturnType();
3631

3632
    MVT VT;
3633
    if (!isTypeLegal(RetTy, VT))
3634
      return false;
3635

3636
    Register Op0Reg = getRegForValue(II->getOperand(0));
3637
    if (!Op0Reg)
3638
      return false;
3639

3640
    unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3641
    if (!ResultReg)
3642
      return false;
3643

3644
    updateValueMap(II, ResultReg);
3645
    return true;
3646
  }
3647
  case Intrinsic::sadd_with_overflow:
3648
  case Intrinsic::uadd_with_overflow:
3649
  case Intrinsic::ssub_with_overflow:
3650
  case Intrinsic::usub_with_overflow:
3651
  case Intrinsic::smul_with_overflow:
3652
  case Intrinsic::umul_with_overflow: {
3653
    // This implements the basic lowering of the xalu with overflow intrinsics.
3654
    const Function *Callee = II->getCalledFunction();
3655
    auto *Ty = cast<StructType>(Callee->getReturnType());
3656
    Type *RetTy = Ty->getTypeAtIndex(0U);
3657

3658
    MVT VT;
3659
    if (!isTypeLegal(RetTy, VT))
3660
      return false;
3661

3662
    if (VT != MVT::i32 && VT != MVT::i64)
3663
      return false;
3664

3665
    const Value *LHS = II->getArgOperand(0);
3666
    const Value *RHS = II->getArgOperand(1);
3667
    // Canonicalize immediate to the RHS.
3668
    if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3669
      std::swap(LHS, RHS);
3670

3671
    // Simplify multiplies.
3672
    Intrinsic::ID IID = II->getIntrinsicID();
3673
    switch (IID) {
3674
    default:
3675
      break;
3676
    case Intrinsic::smul_with_overflow:
3677
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3678
        if (C->getValue() == 2) {
3679
          IID = Intrinsic::sadd_with_overflow;
3680
          RHS = LHS;
3681
        }
3682
      break;
3683
    case Intrinsic::umul_with_overflow:
3684
      if (const auto *C = dyn_cast<ConstantInt>(RHS))
3685
        if (C->getValue() == 2) {
3686
          IID = Intrinsic::uadd_with_overflow;
3687
          RHS = LHS;
3688
        }
3689
      break;
3690
    }
3691

3692
    unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693
    AArch64CC::CondCode CC = AArch64CC::Invalid;
3694
    switch (IID) {
3695
    default: llvm_unreachable("Unexpected intrinsic!");
3696
    case Intrinsic::sadd_with_overflow:
3697
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698
      CC = AArch64CC::VS;
3699
      break;
3700
    case Intrinsic::uadd_with_overflow:
3701
      ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3702
      CC = AArch64CC::HS;
3703
      break;
3704
    case Intrinsic::ssub_with_overflow:
3705
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706
      CC = AArch64CC::VS;
3707
      break;
3708
    case Intrinsic::usub_with_overflow:
3709
      ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3710
      CC = AArch64CC::LO;
3711
      break;
3712
    case Intrinsic::smul_with_overflow: {
3713
      CC = AArch64CC::NE;
3714
      Register LHSReg = getRegForValue(LHS);
3715
      if (!LHSReg)
3716
        return false;
3717

3718
      Register RHSReg = getRegForValue(RHS);
3719
      if (!RHSReg)
3720
        return false;
3721

3722
      if (VT == MVT::i32) {
3723
        MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3724
        Register MulSubReg =
3725
            fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3726
        // cmp xreg, wreg, sxtw
3727
        emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3728
                      AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3729
                      /*WantResult=*/false);
3730
        MulReg = MulSubReg;
3731
      } else {
3732
        assert(VT == MVT::i64 && "Unexpected value type.");
3733
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3734
        // reused in the next instruction.
3735
        MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3736
        unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3737
        emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3738
                    /*WantResult=*/false);
3739
      }
3740
      break;
3741
    }
3742
    case Intrinsic::umul_with_overflow: {
3743
      CC = AArch64CC::NE;
3744
      Register LHSReg = getRegForValue(LHS);
3745
      if (!LHSReg)
3746
        return false;
3747

3748
      Register RHSReg = getRegForValue(RHS);
3749
      if (!RHSReg)
3750
        return false;
3751

3752
      if (VT == MVT::i32) {
3753
        MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3754
        // tst xreg, #0xffffffff00000000
3755
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3756
                TII.get(AArch64::ANDSXri), AArch64::XZR)
3757
            .addReg(MulReg)
3758
            .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3759
        MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3760
      } else {
3761
        assert(VT == MVT::i64 && "Unexpected value type.");
3762
        // LHSReg and RHSReg cannot be killed by this Mul, since they are
3763
        // reused in the next instruction.
3764
        MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3765
        unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3766
        emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3767
      }
3768
      break;
3769
    }
3770
    }
3771

3772
    if (MulReg) {
3773
      ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3774
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3775
              TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776
    }
3777

3778
    if (!ResultReg1)
3779
      return false;
3780

3781
    ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3782
                                  AArch64::WZR, AArch64::WZR,
3783
                                  getInvertedCondCode(CC));
3784
    (void)ResultReg2;
3785
    assert((ResultReg1 + 1) == ResultReg2 &&
3786
           "Nonconsecutive result registers.");
3787
    updateValueMap(II, ResultReg1, 2);
3788
    return true;
3789
  }
3790
  case Intrinsic::aarch64_crc32b:
3791
  case Intrinsic::aarch64_crc32h:
3792
  case Intrinsic::aarch64_crc32w:
3793
  case Intrinsic::aarch64_crc32x:
3794
  case Intrinsic::aarch64_crc32cb:
3795
  case Intrinsic::aarch64_crc32ch:
3796
  case Intrinsic::aarch64_crc32cw:
3797
  case Intrinsic::aarch64_crc32cx: {
3798
    if (!Subtarget->hasCRC())
3799
      return false;
3800

3801
    unsigned Opc;
3802
    switch (II->getIntrinsicID()) {
3803
    default:
3804
      llvm_unreachable("Unexpected intrinsic!");
3805
    case Intrinsic::aarch64_crc32b:
3806
      Opc = AArch64::CRC32Brr;
3807
      break;
3808
    case Intrinsic::aarch64_crc32h:
3809
      Opc = AArch64::CRC32Hrr;
3810
      break;
3811
    case Intrinsic::aarch64_crc32w:
3812
      Opc = AArch64::CRC32Wrr;
3813
      break;
3814
    case Intrinsic::aarch64_crc32x:
3815
      Opc = AArch64::CRC32Xrr;
3816
      break;
3817
    case Intrinsic::aarch64_crc32cb:
3818
      Opc = AArch64::CRC32CBrr;
3819
      break;
3820
    case Intrinsic::aarch64_crc32ch:
3821
      Opc = AArch64::CRC32CHrr;
3822
      break;
3823
    case Intrinsic::aarch64_crc32cw:
3824
      Opc = AArch64::CRC32CWrr;
3825
      break;
3826
    case Intrinsic::aarch64_crc32cx:
3827
      Opc = AArch64::CRC32CXrr;
3828
      break;
3829
    }
3830

3831
    Register LHSReg = getRegForValue(II->getArgOperand(0));
3832
    Register RHSReg = getRegForValue(II->getArgOperand(1));
3833
    if (!LHSReg || !RHSReg)
3834
      return false;
3835

3836
    Register ResultReg =
3837
        fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3838
    updateValueMap(II, ResultReg);
3839
    return true;
3840
  }
3841
  }
3842
  return false;
3843
}
3844

3845
bool AArch64FastISel::selectRet(const Instruction *I) {
3846
  const ReturnInst *Ret = cast<ReturnInst>(I);
3847
  const Function &F = *I->getParent()->getParent();
3848

3849
  if (!FuncInfo.CanLowerReturn)
3850
    return false;
3851

3852
  if (F.isVarArg())
3853
    return false;
3854

3855
  if (TLI.supportSwiftError() &&
3856
      F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3857
    return false;
3858

3859
  if (TLI.supportSplitCSR(FuncInfo.MF))
3860
    return false;
3861

3862
  // Build a list of return value registers.
3863
  SmallVector<unsigned, 4> RetRegs;
3864

3865
  if (Ret->getNumOperands() > 0) {
3866
    CallingConv::ID CC = F.getCallingConv();
3867
    SmallVector<ISD::OutputArg, 4> Outs;
3868
    GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3869

3870
    // Analyze operands of the call, assigning locations to each operand.
3871
    SmallVector<CCValAssign, 16> ValLocs;
3872
    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3873
    CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3874

3875
    // Only handle a single return value for now.
3876
    if (ValLocs.size() != 1)
3877
      return false;
3878

3879
    CCValAssign &VA = ValLocs[0];
3880
    const Value *RV = Ret->getOperand(0);
3881

3882
    // Don't bother handling odd stuff for now.
3883
    if ((VA.getLocInfo() != CCValAssign::Full) &&
3884
        (VA.getLocInfo() != CCValAssign::BCvt))
3885
      return false;
3886

3887
    // Only handle register returns for now.
3888
    if (!VA.isRegLoc())
3889
      return false;
3890

3891
    Register Reg = getRegForValue(RV);
3892
    if (Reg == 0)
3893
      return false;
3894

3895
    unsigned SrcReg = Reg + VA.getValNo();
3896
    Register DestReg = VA.getLocReg();
3897
    // Avoid a cross-class copy. This is very unlikely.
3898
    if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3899
      return false;
3900

3901
    EVT RVEVT = TLI.getValueType(DL, RV->getType());
3902
    if (!RVEVT.isSimple())
3903
      return false;
3904

3905
    // Vectors (of > 1 lane) in big endian need tricky handling.
3906
    if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3907
        !Subtarget->isLittleEndian())
3908
      return false;
3909

3910
    MVT RVVT = RVEVT.getSimpleVT();
3911
    if (RVVT == MVT::f128)
3912
      return false;
3913

3914
    MVT DestVT = VA.getValVT();
3915
    // Special handling for extended integers.
3916
    if (RVVT != DestVT) {
3917
      if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3918
        return false;
3919

3920
      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3921
        return false;
3922

3923
      bool IsZExt = Outs[0].Flags.isZExt();
3924
      SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3925
      if (SrcReg == 0)
3926
        return false;
3927
    }
3928

3929
    // "Callee" (i.e. value producer) zero extends pointers at function
3930
    // boundary.
3931
    if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3932
      SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3933

3934
    // Make the copy.
3935
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3936
            TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3937

3938
    // Add register to return instruction.
3939
    RetRegs.push_back(VA.getLocReg());
3940
  }
3941

3942
  MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3943
                                    TII.get(AArch64::RET_ReallyLR));
3944
  for (unsigned RetReg : RetRegs)
3945
    MIB.addReg(RetReg, RegState::Implicit);
3946
  return true;
3947
}
3948

3949
bool AArch64FastISel::selectTrunc(const Instruction *I) {
3950
  Type *DestTy = I->getType();
3951
  Value *Op = I->getOperand(0);
3952
  Type *SrcTy = Op->getType();
3953

3954
  EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3955
  EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3956
  if (!SrcEVT.isSimple())
3957
    return false;
3958
  if (!DestEVT.isSimple())
3959
    return false;
3960

3961
  MVT SrcVT = SrcEVT.getSimpleVT();
3962
  MVT DestVT = DestEVT.getSimpleVT();
3963

3964
  if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3965
      SrcVT != MVT::i8)
3966
    return false;
3967
  if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3968
      DestVT != MVT::i1)
3969
    return false;
3970

3971
  Register SrcReg = getRegForValue(Op);
3972
  if (!SrcReg)
3973
    return false;
3974

3975
  // If we're truncating from i64 to a smaller non-legal type then generate an
3976
  // AND. Otherwise, we know the high bits are undefined and a truncate only
3977
  // generate a COPY. We cannot mark the source register also as result
3978
  // register, because this can incorrectly transfer the kill flag onto the
3979
  // source register.
3980
  unsigned ResultReg;
3981
  if (SrcVT == MVT::i64) {
3982
    uint64_t Mask = 0;
3983
    switch (DestVT.SimpleTy) {
3984
    default:
3985
      // Trunc i64 to i32 is handled by the target-independent fast-isel.
3986
      return false;
3987
    case MVT::i1:
3988
      Mask = 0x1;
3989
      break;
3990
    case MVT::i8:
3991
      Mask = 0xff;
3992
      break;
3993
    case MVT::i16:
3994
      Mask = 0xffff;
3995
      break;
3996
    }
3997
    // Issue an extract_subreg to get the lower 32-bits.
3998
    Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3999
                                                AArch64::sub_32);
4000
    // Create the AND instruction which performs the actual truncation.
4001
    ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4002
    assert(ResultReg && "Unexpected AND instruction emission failure.");
4003
  } else {
4004
    ResultReg = createResultReg(&AArch64::GPR32RegClass);
4005
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4006
            TII.get(TargetOpcode::COPY), ResultReg)
4007
        .addReg(SrcReg);
4008
  }
4009

4010
  updateValueMap(I, ResultReg);
4011
  return true;
4012
}
4013

4014
unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4015
  assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4016
          DestVT == MVT::i64) &&
4017
         "Unexpected value type.");
4018
  // Handle i8 and i16 as i32.
4019
  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4020
    DestVT = MVT::i32;
4021

4022
  if (IsZExt) {
4023
    unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4024
    assert(ResultReg && "Unexpected AND instruction emission failure.");
4025
    if (DestVT == MVT::i64) {
4026
      // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
4027
      // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
4028
      Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4029
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4030
              TII.get(AArch64::SUBREG_TO_REG), Reg64)
4031
          .addImm(0)
4032
          .addReg(ResultReg)
4033
          .addImm(AArch64::sub_32);
4034
      ResultReg = Reg64;
4035
    }
4036
    return ResultReg;
4037
  } else {
4038
    if (DestVT == MVT::i64) {
4039
      // FIXME: We're SExt i1 to i64.
4040
      return 0;
4041
    }
4042
    return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043
                            0, 0);
4044
  }
4045
}
4046

4047
unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4048
  unsigned Opc, ZReg;
4049
  switch (RetVT.SimpleTy) {
4050
  default: return 0;
4051
  case MVT::i8:
4052
  case MVT::i16:
4053
  case MVT::i32:
4054
    RetVT = MVT::i32;
4055
    Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056
  case MVT::i64:
4057
    Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058
  }
4059

4060
  const TargetRegisterClass *RC =
4061
      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062
  return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063
}
4064

4065
unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066
  if (RetVT != MVT::i64)
4067
    return 0;
4068

4069
  return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070
                          Op0, Op1, AArch64::XZR);
4071
}
4072

4073
unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4074
  if (RetVT != MVT::i64)
4075
    return 0;
4076

4077
  return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078
                          Op0, Op1, AArch64::XZR);
4079
}
4080

4081
unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4082
                                     unsigned Op1Reg) {
4083
  unsigned Opc = 0;
4084
  bool NeedTrunc = false;
4085
  uint64_t Mask = 0;
4086
  switch (RetVT.SimpleTy) {
4087
  default: return 0;
4088
  case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4089
  case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4090
  case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4091
  case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4092
  }
4093

4094
  const TargetRegisterClass *RC =
4095
      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4096
  if (NeedTrunc)
4097
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4098

4099
  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4100
  if (NeedTrunc)
4101
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4102
  return ResultReg;
4103
}
4104

4105
unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4106
                                     uint64_t Shift, bool IsZExt) {
4107
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4108
         "Unexpected source/return type pair.");
4109
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4110
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4111
         "Unexpected source value type.");
4112
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4113
          RetVT == MVT::i64) && "Unexpected return value type.");
4114

4115
  bool Is64Bit = (RetVT == MVT::i64);
4116
  unsigned RegSize = Is64Bit ? 64 : 32;
4117
  unsigned DstBits = RetVT.getSizeInBits();
4118
  unsigned SrcBits = SrcVT.getSizeInBits();
4119
  const TargetRegisterClass *RC =
4120
      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121

4122
  // Just emit a copy for "zero" shifts.
4123
  if (Shift == 0) {
4124
    if (RetVT == SrcVT) {
4125
      Register ResultReg = createResultReg(RC);
4126
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4127
              TII.get(TargetOpcode::COPY), ResultReg)
4128
          .addReg(Op0);
4129
      return ResultReg;
4130
    } else
4131
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4132
  }
4133

4134
  // Don't deal with undefined shifts.
4135
  if (Shift >= DstBits)
4136
    return 0;
4137

4138
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4139
  // {S|U}BFM Wd, Wn, #r, #s
4140
  // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4141

4142
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143
  // %2 = shl i16 %1, 4
4144
  // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4145
  // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4146
  // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4147
  // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4148

4149
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4150
  // %2 = shl i16 %1, 8
4151
  // Wd<32+7-24,32-24> = Wn<7:0>
4152
  // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4153
  // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4154
  // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4155

4156
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157
  // %2 = shl i16 %1, 12
4158
  // Wd<32+3-20,32-20> = Wn<3:0>
4159
  // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4160
  // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4161
  // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4162

4163
  unsigned ImmR = RegSize - Shift;
4164
  // Limit the width to the length of the source type.
4165
  unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4166
  static const unsigned OpcTable[2][2] = {
4167
    {AArch64::SBFMWri, AArch64::SBFMXri},
4168
    {AArch64::UBFMWri, AArch64::UBFMXri}
4169
  };
4170
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4171
  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4172
    Register TmpReg = MRI.createVirtualRegister(RC);
4173
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4174
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4175
        .addImm(0)
4176
        .addReg(Op0)
4177
        .addImm(AArch64::sub_32);
4178
    Op0 = TmpReg;
4179
  }
4180
  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4181
}
4182

4183
unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4184
                                     unsigned Op1Reg) {
4185
  unsigned Opc = 0;
4186
  bool NeedTrunc = false;
4187
  uint64_t Mask = 0;
4188
  switch (RetVT.SimpleTy) {
4189
  default: return 0;
4190
  case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4191
  case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192
  case MVT::i32: Opc = AArch64::LSRVWr; break;
4193
  case MVT::i64: Opc = AArch64::LSRVXr; break;
4194
  }
4195

4196
  const TargetRegisterClass *RC =
4197
      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198
  if (NeedTrunc) {
4199
    Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4200
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4201
  }
4202
  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4203
  if (NeedTrunc)
4204
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4205
  return ResultReg;
4206
}
4207

4208
unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4209
                                     uint64_t Shift, bool IsZExt) {
4210
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4211
         "Unexpected source/return type pair.");
4212
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4213
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4214
         "Unexpected source value type.");
4215
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4216
          RetVT == MVT::i64) && "Unexpected return value type.");
4217

4218
  bool Is64Bit = (RetVT == MVT::i64);
4219
  unsigned RegSize = Is64Bit ? 64 : 32;
4220
  unsigned DstBits = RetVT.getSizeInBits();
4221
  unsigned SrcBits = SrcVT.getSizeInBits();
4222
  const TargetRegisterClass *RC =
4223
      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4224

4225
  // Just emit a copy for "zero" shifts.
4226
  if (Shift == 0) {
4227
    if (RetVT == SrcVT) {
4228
      Register ResultReg = createResultReg(RC);
4229
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4230
              TII.get(TargetOpcode::COPY), ResultReg)
4231
      .addReg(Op0);
4232
      return ResultReg;
4233
    } else
4234
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4235
  }
4236

4237
  // Don't deal with undefined shifts.
4238
  if (Shift >= DstBits)
4239
    return 0;
4240

4241
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4242
  // {S|U}BFM Wd, Wn, #r, #s
4243
  // Wd<s-r:0> = Wn<s:r> when r <= s
4244

4245
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246
  // %2 = lshr i16 %1, 4
4247
  // Wd<7-4:0> = Wn<7:4>
4248
  // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4249
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4250
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4251

4252
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253
  // %2 = lshr i16 %1, 8
4254
  // Wd<7-7,0> = Wn<7:7>
4255
  // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4256
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4257
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4258

4259
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260
  // %2 = lshr i16 %1, 12
4261
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4262
  // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4263
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265

4266
  if (Shift >= SrcBits && IsZExt)
4267
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4268

4269
  // It is not possible to fold a sign-extend into the LShr instruction. In this
4270
  // case emit a sign-extend.
4271
  if (!IsZExt) {
4272
    Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273
    if (!Op0)
4274
      return 0;
4275
    SrcVT = RetVT;
4276
    SrcBits = SrcVT.getSizeInBits();
4277
    IsZExt = true;
4278
  }
4279

4280
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4281
  unsigned ImmS = SrcBits - 1;
4282
  static const unsigned OpcTable[2][2] = {
4283
    {AArch64::SBFMWri, AArch64::SBFMXri},
4284
    {AArch64::UBFMWri, AArch64::UBFMXri}
4285
  };
4286
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4287
  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4288
    Register TmpReg = MRI.createVirtualRegister(RC);
4289
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4290
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4291
        .addImm(0)
4292
        .addReg(Op0)
4293
        .addImm(AArch64::sub_32);
4294
    Op0 = TmpReg;
4295
  }
4296
  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4297
}
4298

4299
unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4300
                                     unsigned Op1Reg) {
4301
  unsigned Opc = 0;
4302
  bool NeedTrunc = false;
4303
  uint64_t Mask = 0;
4304
  switch (RetVT.SimpleTy) {
4305
  default: return 0;
4306
  case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4307
  case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308
  case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4309
  case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4310
  }
4311

4312
  const TargetRegisterClass *RC =
4313
      (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314
  if (NeedTrunc) {
4315
    Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316
    Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317
  }
4318
  Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319
  if (NeedTrunc)
4320
    ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321
  return ResultReg;
4322
}
4323

4324
unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4325
                                     uint64_t Shift, bool IsZExt) {
4326
  assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327
         "Unexpected source/return type pair.");
4328
  assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329
          SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330
         "Unexpected source value type.");
4331
  assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332
          RetVT == MVT::i64) && "Unexpected return value type.");
4333

4334
  bool Is64Bit = (RetVT == MVT::i64);
4335
  unsigned RegSize = Is64Bit ? 64 : 32;
4336
  unsigned DstBits = RetVT.getSizeInBits();
4337
  unsigned SrcBits = SrcVT.getSizeInBits();
4338
  const TargetRegisterClass *RC =
4339
      Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340

4341
  // Just emit a copy for "zero" shifts.
4342
  if (Shift == 0) {
4343
    if (RetVT == SrcVT) {
4344
      Register ResultReg = createResultReg(RC);
4345
      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346
              TII.get(TargetOpcode::COPY), ResultReg)
4347
      .addReg(Op0);
4348
      return ResultReg;
4349
    } else
4350
      return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351
  }
4352

4353
  // Don't deal with undefined shifts.
4354
  if (Shift >= DstBits)
4355
    return 0;
4356

4357
  // For immediate shifts we can fold the zero-/sign-extension into the shift.
4358
  // {S|U}BFM Wd, Wn, #r, #s
4359
  // Wd<s-r:0> = Wn<s:r> when r <= s
4360

4361
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362
  // %2 = ashr i16 %1, 4
4363
  // Wd<7-4:0> = Wn<7:4>
4364
  // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365
  // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366
  // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367

4368
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369
  // %2 = ashr i16 %1, 8
4370
  // Wd<7-7,0> = Wn<7:7>
4371
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374

4375
  // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376
  // %2 = ashr i16 %1, 12
4377
  // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378
  // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380
  // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381

4382
  if (Shift >= SrcBits && IsZExt)
4383
    return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384

4385
  unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386
  unsigned ImmS = SrcBits - 1;
4387
  static const unsigned OpcTable[2][2] = {
4388
    {AArch64::SBFMWri, AArch64::SBFMXri},
4389
    {AArch64::UBFMWri, AArch64::UBFMXri}
4390
  };
4391
  unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392
  if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393
    Register TmpReg = MRI.createVirtualRegister(RC);
4394
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395
            TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396
        .addImm(0)
4397
        .addReg(Op0)
4398
        .addImm(AArch64::sub_32);
4399
    Op0 = TmpReg;
4400
  }
4401
  return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402
}
4403

4404
unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4405
                                     bool IsZExt) {
4406
  assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407

4408
  // FastISel does not have plumbing to deal with extensions where the SrcVT or
4409
  // DestVT are odd things, so test to make sure that they are both types we can
4410
  // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411
  // bail out to SelectionDAG.
4412
  if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413
       (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414
      ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4415
       (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4416
    return 0;
4417

4418
  unsigned Opc;
4419
  unsigned Imm = 0;
4420

4421
  switch (SrcVT.SimpleTy) {
4422
  default:
4423
    return 0;
4424
  case MVT::i1:
4425
    return emiti1Ext(SrcReg, DestVT, IsZExt);
4426
  case MVT::i8:
4427
    if (DestVT == MVT::i64)
4428
      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429
    else
4430
      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431
    Imm = 7;
4432
    break;
4433
  case MVT::i16:
4434
    if (DestVT == MVT::i64)
4435
      Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436
    else
4437
      Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438
    Imm = 15;
4439
    break;
4440
  case MVT::i32:
4441
    assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442
    Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443
    Imm = 31;
4444
    break;
4445
  }
4446

4447
  // Handle i8 and i16 as i32.
4448
  if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449
    DestVT = MVT::i32;
4450
  else if (DestVT == MVT::i64) {
4451
    Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453
            TII.get(AArch64::SUBREG_TO_REG), Src64)
4454
        .addImm(0)
4455
        .addReg(SrcReg)
4456
        .addImm(AArch64::sub_32);
4457
    SrcReg = Src64;
4458
  }
4459

4460
  const TargetRegisterClass *RC =
4461
      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462
  return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463
}
4464

4465
static bool isZExtLoad(const MachineInstr *LI) {
4466
  switch (LI->getOpcode()) {
4467
  default:
4468
    return false;
4469
  case AArch64::LDURBBi:
4470
  case AArch64::LDURHHi:
4471
  case AArch64::LDURWi:
4472
  case AArch64::LDRBBui:
4473
  case AArch64::LDRHHui:
4474
  case AArch64::LDRWui:
4475
  case AArch64::LDRBBroX:
4476
  case AArch64::LDRHHroX:
4477
  case AArch64::LDRWroX:
4478
  case AArch64::LDRBBroW:
4479
  case AArch64::LDRHHroW:
4480
  case AArch64::LDRWroW:
4481
    return true;
4482
  }
4483
}
4484

4485
static bool isSExtLoad(const MachineInstr *LI) {
4486
  switch (LI->getOpcode()) {
4487
  default:
4488
    return false;
4489
  case AArch64::LDURSBWi:
4490
  case AArch64::LDURSHWi:
4491
  case AArch64::LDURSBXi:
4492
  case AArch64::LDURSHXi:
4493
  case AArch64::LDURSWi:
4494
  case AArch64::LDRSBWui:
4495
  case AArch64::LDRSHWui:
4496
  case AArch64::LDRSBXui:
4497
  case AArch64::LDRSHXui:
4498
  case AArch64::LDRSWui:
4499
  case AArch64::LDRSBWroX:
4500
  case AArch64::LDRSHWroX:
4501
  case AArch64::LDRSBXroX:
4502
  case AArch64::LDRSHXroX:
4503
  case AArch64::LDRSWroX:
4504
  case AArch64::LDRSBWroW:
4505
  case AArch64::LDRSHWroW:
4506
  case AArch64::LDRSBXroW:
4507
  case AArch64::LDRSHXroW:
4508
  case AArch64::LDRSWroW:
4509
    return true;
4510
  }
4511
}
4512

4513
bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514
                                         MVT SrcVT) {
4515
  const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516
  if (!LI || !LI->hasOneUse())
4517
    return false;
4518

4519
  // Check if the load instruction has already been selected.
4520
  Register Reg = lookUpRegForValue(LI);
4521
  if (!Reg)
4522
    return false;
4523

4524
  MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525
  if (!MI)
4526
    return false;
4527

4528
  // Check if the correct load instruction has been emitted - SelectionDAG might
4529
  // have emitted a zero-extending load, but we need a sign-extending load.
4530
  bool IsZExt = isa<ZExtInst>(I);
4531
  const auto *LoadMI = MI;
4532
  if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533
      LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534
    Register LoadReg = MI->getOperand(1).getReg();
4535
    LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536
    assert(LoadMI && "Expected valid instruction");
4537
  }
4538
  if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539
    return false;
4540

4541
  // Nothing to be done.
4542
  if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543
    updateValueMap(I, Reg);
4544
    return true;
4545
  }
4546

4547
  if (IsZExt) {
4548
    Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549
    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550
            TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551
        .addImm(0)
4552
        .addReg(Reg, getKillRegState(true))
4553
        .addImm(AArch64::sub_32);
4554
    Reg = Reg64;
4555
  } else {
4556
    assert((MI->getOpcode() == TargetOpcode::COPY &&
4557
            MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558
           "Expected copy instruction");
4559
    Reg = MI->getOperand(1).getReg();
4560
    MachineBasicBlock::iterator I(MI);
4561
    removeDeadCode(I, std::next(I));
4562
  }
4563
  updateValueMap(I, Reg);
4564
  return true;
4565
}
4566

4567
bool AArch64FastISel::selectIntExt(const Instruction *I) {
4568
  assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4569
         "Unexpected integer extend instruction.");
4570
  MVT RetVT;
4571
  MVT SrcVT;
4572
  if (!isTypeSupported(I->getType(), RetVT))
4573
    return false;
4574

4575
  if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576
    return false;
4577

4578
  // Try to optimize already sign-/zero-extended values from load instructions.
4579
  if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580
    return true;
4581

4582
  Register SrcReg = getRegForValue(I->getOperand(0));
4583
  if (!SrcReg)
4584
    return false;
4585

4586
  // Try to optimize already sign-/zero-extended values from function arguments.
4587
  bool IsZExt = isa<ZExtInst>(I);
4588
  if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589
    if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590
      if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591
        Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592
        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593
                TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594
            .addImm(0)
4595
            .addReg(SrcReg)
4596
            .addImm(AArch64::sub_32);
4597
        SrcReg = ResultReg;
4598
      }
4599

4600
      updateValueMap(I, SrcReg);
4601
      return true;
4602
    }
4603
  }
4604

4605
  unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606
  if (!ResultReg)
4607
    return false;
4608

4609
  updateValueMap(I, ResultReg);
4610
  return true;
4611
}
4612

4613
bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614
  EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615
  if (!DestEVT.isSimple())
4616
    return false;
4617

4618
  MVT DestVT = DestEVT.getSimpleVT();
4619
  if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620
    return false;
4621

4622
  unsigned DivOpc;
4623
  bool Is64bit = (DestVT == MVT::i64);
4624
  switch (ISDOpcode) {
4625
  default:
4626
    return false;
4627
  case ISD::SREM:
4628
    DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629
    break;
4630
  case ISD::UREM:
4631
    DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632
    break;
4633
  }
4634
  unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635
  Register Src0Reg = getRegForValue(I->getOperand(0));
4636
  if (!Src0Reg)
4637
    return false;
4638

4639
  Register Src1Reg = getRegForValue(I->getOperand(1));
4640
  if (!Src1Reg)
4641
    return false;
4642

4643
  const TargetRegisterClass *RC =
4644
      (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645
  Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646
  assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647
  // The remainder is computed as numerator - (quotient * denominator) using the
4648
  // MSUB instruction.
4649
  Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650
  updateValueMap(I, ResultReg);
4651
  return true;
4652
}
4653

4654
bool AArch64FastISel::selectMul(const Instruction *I) {
4655
  MVT VT;
4656
  if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657
    return false;
4658

4659
  if (VT.isVector())
4660
    return selectBinaryOp(I, ISD::MUL);
4661

4662
  const Value *Src0 = I->getOperand(0);
4663
  const Value *Src1 = I->getOperand(1);
4664
  if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665
    if (C->getValue().isPowerOf2())
4666
      std::swap(Src0, Src1);
4667

4668
  // Try to simplify to a shift instruction.
4669
  if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670
    if (C->getValue().isPowerOf2()) {
4671
      uint64_t ShiftVal = C->getValue().logBase2();
4672
      MVT SrcVT = VT;
4673
      bool IsZExt = true;
4674
      if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675
        if (!isIntExtFree(ZExt)) {
4676
          MVT VT;
4677
          if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678
            SrcVT = VT;
4679
            IsZExt = true;
4680
            Src0 = ZExt->getOperand(0);
4681
          }
4682
        }
4683
      } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684
        if (!isIntExtFree(SExt)) {
4685
          MVT VT;
4686
          if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687
            SrcVT = VT;
4688
            IsZExt = false;
4689
            Src0 = SExt->getOperand(0);
4690
          }
4691
        }
4692
      }
4693

4694
      Register Src0Reg = getRegForValue(Src0);
4695
      if (!Src0Reg)
4696
        return false;
4697

4698
      unsigned ResultReg =
4699
          emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4700

4701
      if (ResultReg) {
4702
        updateValueMap(I, ResultReg);
4703
        return true;
4704
      }
4705
    }
4706

4707
  Register Src0Reg = getRegForValue(I->getOperand(0));
4708
  if (!Src0Reg)
4709
    return false;
4710

4711
  Register Src1Reg = getRegForValue(I->getOperand(1));
4712
  if (!Src1Reg)
4713
    return false;
4714

4715
  unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4716

4717
  if (!ResultReg)
4718
    return false;
4719

4720
  updateValueMap(I, ResultReg);
4721
  return true;
4722
}
4723

4724
bool AArch64FastISel::selectShift(const Instruction *I) {
4725
  MVT RetVT;
4726
  if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4727
    return false;
4728

4729
  if (RetVT.isVector())
4730
    return selectOperator(I, I->getOpcode());
4731

4732
  if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4733
    unsigned ResultReg = 0;
4734
    uint64_t ShiftVal = C->getZExtValue();
4735
    MVT SrcVT = RetVT;
4736
    bool IsZExt = I->getOpcode() != Instruction::AShr;
4737
    const Value *Op0 = I->getOperand(0);
4738
    if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4739
      if (!isIntExtFree(ZExt)) {
4740
        MVT TmpVT;
4741
        if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4742
          SrcVT = TmpVT;
4743
          IsZExt = true;
4744
          Op0 = ZExt->getOperand(0);
4745
        }
4746
      }
4747
    } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4748
      if (!isIntExtFree(SExt)) {
4749
        MVT TmpVT;
4750
        if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4751
          SrcVT = TmpVT;
4752
          IsZExt = false;
4753
          Op0 = SExt->getOperand(0);
4754
        }
4755
      }
4756
    }
4757

4758
    Register Op0Reg = getRegForValue(Op0);
4759
    if (!Op0Reg)
4760
      return false;
4761

4762
    switch (I->getOpcode()) {
4763
    default: llvm_unreachable("Unexpected instruction.");
4764
    case Instruction::Shl:
4765
      ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4766
      break;
4767
    case Instruction::AShr:
4768
      ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769
      break;
4770
    case Instruction::LShr:
4771
      ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4772
      break;
4773
    }
4774
    if (!ResultReg)
4775
      return false;
4776

4777
    updateValueMap(I, ResultReg);
4778
    return true;
4779
  }
4780

4781
  Register Op0Reg = getRegForValue(I->getOperand(0));
4782
  if (!Op0Reg)
4783
    return false;
4784

4785
  Register Op1Reg = getRegForValue(I->getOperand(1));
4786
  if (!Op1Reg)
4787
    return false;
4788

4789
  unsigned ResultReg = 0;
4790
  switch (I->getOpcode()) {
4791
  default: llvm_unreachable("Unexpected instruction.");
4792
  case Instruction::Shl:
4793
    ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4794
    break;
4795
  case Instruction::AShr:
4796
    ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4797
    break;
4798
  case Instruction::LShr:
4799
    ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4800
    break;
4801
  }
4802

4803
  if (!ResultReg)
4804
    return false;
4805

4806
  updateValueMap(I, ResultReg);
4807
  return true;
4808
}
4809

4810
bool AArch64FastISel::selectBitCast(const Instruction *I) {
4811
  MVT RetVT, SrcVT;
4812

4813
  if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4814
    return false;
4815
  if (!isTypeLegal(I->getType(), RetVT))
4816
    return false;
4817

4818
  unsigned Opc;
4819
  if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4820
    Opc = AArch64::FMOVWSr;
4821
  else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4822
    Opc = AArch64::FMOVXDr;
4823
  else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4824
    Opc = AArch64::FMOVSWr;
4825
  else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4826
    Opc = AArch64::FMOVDXr;
4827
  else
4828
    return false;
4829

4830
  const TargetRegisterClass *RC = nullptr;
4831
  switch (RetVT.SimpleTy) {
4832
  default: llvm_unreachable("Unexpected value type.");
4833
  case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4834
  case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4835
  case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4836
  case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4837
  }
4838
  Register Op0Reg = getRegForValue(I->getOperand(0));
4839
  if (!Op0Reg)
4840
    return false;
4841

4842
  Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4843
  if (!ResultReg)
4844
    return false;
4845

4846
  updateValueMap(I, ResultReg);
4847
  return true;
4848
}
4849

4850
bool AArch64FastISel::selectFRem(const Instruction *I) {
4851
  MVT RetVT;
4852
  if (!isTypeLegal(I->getType(), RetVT))
4853
    return false;
4854

4855
  RTLIB::Libcall LC;
4856
  switch (RetVT.SimpleTy) {
4857
  default:
4858
    return false;
4859
  case MVT::f32:
4860
    LC = RTLIB::REM_F32;
4861
    break;
4862
  case MVT::f64:
4863
    LC = RTLIB::REM_F64;
4864
    break;
4865
  }
4866

4867
  ArgListTy Args;
4868
  Args.reserve(I->getNumOperands());
4869

4870
  // Populate the argument list.
4871
  for (auto &Arg : I->operands()) {
4872
    ArgListEntry Entry;
4873
    Entry.Val = Arg;
4874
    Entry.Ty = Arg->getType();
4875
    Args.push_back(Entry);
4876
  }
4877

4878
  CallLoweringInfo CLI;
4879
  MCContext &Ctx = MF->getContext();
4880
  CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4881
                TLI.getLibcallName(LC), std::move(Args));
4882
  if (!lowerCallTo(CLI))
4883
    return false;
4884
  updateValueMap(I, CLI.ResultReg);
4885
  return true;
4886
}
4887

4888
bool AArch64FastISel::selectSDiv(const Instruction *I) {
4889
  MVT VT;
4890
  if (!isTypeLegal(I->getType(), VT))
4891
    return false;
4892

4893
  if (!isa<ConstantInt>(I->getOperand(1)))
4894
    return selectBinaryOp(I, ISD::SDIV);
4895

4896
  const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4897
  if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4898
      !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4899
    return selectBinaryOp(I, ISD::SDIV);
4900

4901
  unsigned Lg2 = C.countr_zero();
4902
  Register Src0Reg = getRegForValue(I->getOperand(0));
4903
  if (!Src0Reg)
4904
    return false;
4905

4906
  if (cast<BinaryOperator>(I)->isExact()) {
4907
    unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4908
    if (!ResultReg)
4909
      return false;
4910
    updateValueMap(I, ResultReg);
4911
    return true;
4912
  }
4913

4914
  int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4915
  unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4916
  if (!AddReg)
4917
    return false;
4918

4919
  // (Src0 < 0) ? Pow2 - 1 : 0;
4920
  if (!emitICmp_ri(VT, Src0Reg, 0))
4921
    return false;
4922

4923
  unsigned SelectOpc;
4924
  const TargetRegisterClass *RC;
4925
  if (VT == MVT::i64) {
4926
    SelectOpc = AArch64::CSELXr;
4927
    RC = &AArch64::GPR64RegClass;
4928
  } else {
4929
    SelectOpc = AArch64::CSELWr;
4930
    RC = &AArch64::GPR32RegClass;
4931
  }
4932
  Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4933
                                        AArch64CC::LT);
4934
  if (!SelectReg)
4935
    return false;
4936

4937
  // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4938
  // negate the result.
4939
  unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4940
  unsigned ResultReg;
4941
  if (C.isNegative())
4942
    ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4943
                              AArch64_AM::ASR, Lg2);
4944
  else
4945
    ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4946

4947
  if (!ResultReg)
4948
    return false;
4949

4950
  updateValueMap(I, ResultReg);
4951
  return true;
4952
}
4953

4954
/// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4955
/// have to duplicate it for AArch64, because otherwise we would fail during the
4956
/// sign-extend emission.
4957
unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4958
  Register IdxN = getRegForValue(Idx);
4959
  if (IdxN == 0)
4960
    // Unhandled operand. Halt "fast" selection and bail.
4961
    return 0;
4962

4963
  // If the index is smaller or larger than intptr_t, truncate or extend it.
4964
  MVT PtrVT = TLI.getPointerTy(DL);
4965
  EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4966
  if (IdxVT.bitsLT(PtrVT)) {
4967
    IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4968
  } else if (IdxVT.bitsGT(PtrVT))
4969
    llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4970
  return IdxN;
4971
}
4972

4973
/// This is mostly a copy of the existing FastISel GEP code, but we have to
4974
/// duplicate it for AArch64, because otherwise we would bail out even for
4975
/// simple cases. This is because the standard fastEmit functions don't cover
4976
/// MUL at all and ADD is lowered very inefficientily.
4977
bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4978
  if (Subtarget->isTargetILP32())
4979
    return false;
4980

4981
  Register N = getRegForValue(I->getOperand(0));
4982
  if (!N)
4983
    return false;
4984

4985
  // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4986
  // into a single N = N + TotalOffset.
4987
  uint64_t TotalOffs = 0;
4988
  MVT VT = TLI.getPointerTy(DL);
4989
  for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4990
       GTI != E; ++GTI) {
4991
    const Value *Idx = GTI.getOperand();
4992
    if (auto *StTy = GTI.getStructTypeOrNull()) {
4993
      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4994
      // N = N + Offset
4995
      if (Field)
4996
        TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4997
    } else {
4998
      // If this is a constant subscript, handle it quickly.
4999
      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5000
        if (CI->isZero())
5001
          continue;
5002
        // N = N + Offset
5003
        TotalOffs += GTI.getSequentialElementStride(DL) *
5004
                     cast<ConstantInt>(CI)->getSExtValue();
5005
        continue;
5006
      }
5007
      if (TotalOffs) {
5008
        N = emitAdd_ri_(VT, N, TotalOffs);
5009
        if (!N)
5010
          return false;
5011
        TotalOffs = 0;
5012
      }
5013

5014
      // N = N + Idx * ElementSize;
5015
      uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5016
      unsigned IdxN = getRegForGEPIndex(Idx);
5017
      if (!IdxN)
5018
        return false;
5019

5020
      if (ElementSize != 1) {
5021
        unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5022
        if (!C)
5023
          return false;
5024
        IdxN = emitMul_rr(VT, IdxN, C);
5025
        if (!IdxN)
5026
          return false;
5027
      }
5028
      N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5029
      if (!N)
5030
        return false;
5031
    }
5032
  }
5033
  if (TotalOffs) {
5034
    N = emitAdd_ri_(VT, N, TotalOffs);
5035
    if (!N)
5036
      return false;
5037
  }
5038
  updateValueMap(I, N);
5039
  return true;
5040
}
5041

5042
bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5043
  assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5044
         "cmpxchg survived AtomicExpand at optlevel > -O0");
5045

5046
  auto *RetPairTy = cast<StructType>(I->getType());
5047
  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5048
  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5049
         "cmpxchg has a non-i1 status result");
5050

5051
  MVT VT;
5052
  if (!isTypeLegal(RetTy, VT))
5053
    return false;
5054

5055
  const TargetRegisterClass *ResRC;
5056
  unsigned Opc, CmpOpc;
5057
  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5058
  // extractvalue selection doesn't support that.
5059
  if (VT == MVT::i32) {
5060
    Opc = AArch64::CMP_SWAP_32;
5061
    CmpOpc = AArch64::SUBSWrs;
5062
    ResRC = &AArch64::GPR32RegClass;
5063
  } else if (VT == MVT::i64) {
5064
    Opc = AArch64::CMP_SWAP_64;
5065
    CmpOpc = AArch64::SUBSXrs;
5066
    ResRC = &AArch64::GPR64RegClass;
5067
  } else {
5068
    return false;
5069
  }
5070

5071
  const MCInstrDesc &II = TII.get(Opc);
5072

5073
  const Register AddrReg = constrainOperandRegClass(
5074
      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075
  const Register DesiredReg = constrainOperandRegClass(
5076
      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077
  const Register NewReg = constrainOperandRegClass(
5078
      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5079

5080
  const Register ResultReg1 = createResultReg(ResRC);
5081
  const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5082
  const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5083

5084
  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5085
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5086
      .addDef(ResultReg1)
5087
      .addDef(ScratchReg)
5088
      .addUse(AddrReg)
5089
      .addUse(DesiredReg)
5090
      .addUse(NewReg);
5091

5092
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5093
      .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5094
      .addUse(ResultReg1)
5095
      .addUse(DesiredReg)
5096
      .addImm(0);
5097

5098
  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5099
      .addDef(ResultReg2)
5100
      .addUse(AArch64::WZR)
5101
      .addUse(AArch64::WZR)
5102
      .addImm(AArch64CC::NE);
5103

5104
  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5105
  updateValueMap(I, ResultReg1, 2);
5106
  return true;
5107
}
5108

5109
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5110
  if (TLI.fallBackToDAGISel(*I))
5111
    return false;
5112
  switch (I->getOpcode()) {
5113
  default:
5114
    break;
5115
  case Instruction::Add:
5116
  case Instruction::Sub:
5117
    return selectAddSub(I);
5118
  case Instruction::Mul:
5119
    return selectMul(I);
5120
  case Instruction::SDiv:
5121
    return selectSDiv(I);
5122
  case Instruction::SRem:
5123
    if (!selectBinaryOp(I, ISD::SREM))
5124
      return selectRem(I, ISD::SREM);
5125
    return true;
5126
  case Instruction::URem:
5127
    if (!selectBinaryOp(I, ISD::UREM))
5128
      return selectRem(I, ISD::UREM);
5129
    return true;
5130
  case Instruction::Shl:
5131
  case Instruction::LShr:
5132
  case Instruction::AShr:
5133
    return selectShift(I);
5134
  case Instruction::And:
5135
  case Instruction::Or:
5136
  case Instruction::Xor:
5137
    return selectLogicalOp(I);
5138
  case Instruction::Br:
5139
    return selectBranch(I);
5140
  case Instruction::IndirectBr:
5141
    return selectIndirectBr(I);
5142
  case Instruction::BitCast:
5143
    if (!FastISel::selectBitCast(I))
5144
      return selectBitCast(I);
5145
    return true;
5146
  case Instruction::FPToSI:
5147
    if (!selectCast(I, ISD::FP_TO_SINT))
5148
      return selectFPToInt(I, /*Signed=*/true);
5149
    return true;
5150
  case Instruction::FPToUI:
5151
    return selectFPToInt(I, /*Signed=*/false);
5152
  case Instruction::ZExt:
5153
  case Instruction::SExt:
5154
    return selectIntExt(I);
5155
  case Instruction::Trunc:
5156
    if (!selectCast(I, ISD::TRUNCATE))
5157
      return selectTrunc(I);
5158
    return true;
5159
  case Instruction::FPExt:
5160
    return selectFPExt(I);
5161
  case Instruction::FPTrunc:
5162
    return selectFPTrunc(I);
5163
  case Instruction::SIToFP:
5164
    if (!selectCast(I, ISD::SINT_TO_FP))
5165
      return selectIntToFP(I, /*Signed=*/true);
5166
    return true;
5167
  case Instruction::UIToFP:
5168
    return selectIntToFP(I, /*Signed=*/false);
5169
  case Instruction::Load:
5170
    return selectLoad(I);
5171
  case Instruction::Store:
5172
    return selectStore(I);
5173
  case Instruction::FCmp:
5174
  case Instruction::ICmp:
5175
    return selectCmp(I);
5176
  case Instruction::Select:
5177
    return selectSelect(I);
5178
  case Instruction::Ret:
5179
    return selectRet(I);
5180
  case Instruction::FRem:
5181
    return selectFRem(I);
5182
  case Instruction::GetElementPtr:
5183
    return selectGetElementPtr(I);
5184
  case Instruction::AtomicCmpXchg:
5185
    return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5186
  }
5187

5188
  // fall-back to target-independent instruction selection.
5189
  return selectOperator(I, I->getOpcode());
5190
}
5191

5192
FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5193
                                        const TargetLibraryInfo *LibInfo) {
5194

5195
  SMEAttrs CallerAttrs(*FuncInfo.Fn);
5196
  if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197
      CallerAttrs.hasStreamingInterfaceOrBody() ||
5198
      CallerAttrs.hasStreamingCompatibleInterface())
5199
    return nullptr;
5200
  return new AArch64FastISel(FuncInfo, LibInfo);
5201
}
5202

5203
Product

Resources

Company