CoCalc -- AArch64ISelDAGToDAG.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
³⁵²⁶⁹ views
1
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the AArch64 target.
10
//
11
//===----------------------------------------------------------------------===//
12

13
#include "AArch64MachineFunctionInfo.h"
14
#include "AArch64TargetMachine.h"
15
#include "MCTargetDesc/AArch64AddressingModes.h"
16
#include "llvm/ADT/APSInt.h"
17
#include "llvm/CodeGen/ISDOpcodes.h"
18
#include "llvm/CodeGen/SelectionDAGISel.h"
19
#include "llvm/IR/Function.h" // To access function attributes.
20
#include "llvm/IR/GlobalValue.h"
21
#include "llvm/IR/Intrinsics.h"
22
#include "llvm/IR/IntrinsicsAArch64.h"
23
#include "llvm/Support/Debug.h"
24
#include "llvm/Support/ErrorHandling.h"
25
#include "llvm/Support/KnownBits.h"
26
#include "llvm/Support/MathExtras.h"
27
#include "llvm/Support/raw_ostream.h"
28

29
using namespace llvm;
30

31
#define DEBUG_TYPE "aarch64-isel"
32
#define PASS_NAME "AArch64 Instruction Selection"
33

34
//===--------------------------------------------------------------------===//
35
/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
36
/// instructions for SelectionDAG operations.
37
///
38
namespace {
39

40
class AArch64DAGToDAGISel : public SelectionDAGISel {
41

42
  /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
43
  /// make the right decision when generating code for different targets.
44
  const AArch64Subtarget *Subtarget;
45

46
public:
47
  AArch64DAGToDAGISel() = delete;
48

49
  explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
50
                               CodeGenOptLevel OptLevel)
51
      : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
52

53
  bool runOnMachineFunction(MachineFunction &MF) override {
54
    Subtarget = &MF.getSubtarget<AArch64Subtarget>();
55
    return SelectionDAGISel::runOnMachineFunction(MF);
56
  }
57

58
  void Select(SDNode *Node) override;
59

60
  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
61
  /// inline asm expressions.
62
  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
63
                                    InlineAsm::ConstraintCode ConstraintID,
64
                                    std::vector<SDValue> &OutOps) override;
65

66
  template <signed Low, signed High, signed Scale>
67
  bool SelectRDVLImm(SDValue N, SDValue &Imm);
68

69
  bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
70
  bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
71
  bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
72
  bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
73
  bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
74
    return SelectShiftedRegister(N, false, Reg, Shift);
75
  }
76
  bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
77
    return SelectShiftedRegister(N, true, Reg, Shift);
78
  }
79
  bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
80
    return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
81
  }
82
  bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
83
    return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
84
  }
85
  bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
86
    return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
87
  }
88
  bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
89
    return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
90
  }
91
  bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
92
    return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
93
  }
94
  bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
95
    return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
96
  }
97
  bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
98
    return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
99
  }
100
  bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
101
    return SelectAddrModeIndexed(N, 1, Base, OffImm);
102
  }
103
  bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
104
    return SelectAddrModeIndexed(N, 2, Base, OffImm);
105
  }
106
  bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
107
    return SelectAddrModeIndexed(N, 4, Base, OffImm);
108
  }
109
  bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
110
    return SelectAddrModeIndexed(N, 8, Base, OffImm);
111
  }
112
  bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
113
    return SelectAddrModeIndexed(N, 16, Base, OffImm);
114
  }
115
  bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
116
    return SelectAddrModeUnscaled(N, 1, Base, OffImm);
117
  }
118
  bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
119
    return SelectAddrModeUnscaled(N, 2, Base, OffImm);
120
  }
121
  bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
122
    return SelectAddrModeUnscaled(N, 4, Base, OffImm);
123
  }
124
  bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
125
    return SelectAddrModeUnscaled(N, 8, Base, OffImm);
126
  }
127
  bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
128
    return SelectAddrModeUnscaled(N, 16, Base, OffImm);
129
  }
130
  template <unsigned Size, unsigned Max>
131
  bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {
132
    // Test if there is an appropriate addressing mode and check if the
133
    // immediate fits.
134
    bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);
135
    if (Found) {
136
      if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {
137
        int64_t C = CI->getSExtValue();
138
        if (C <= Max)
139
          return true;
140
      }
141
    }
142

143
    // Otherwise, base only, materialize address in register.
144
    Base = N;
145
    OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
146
    return true;
147
  }
148

149
  template<int Width>
150
  bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
151
                         SDValue &SignExtend, SDValue &DoShift) {
152
    return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
153
  }
154

155
  template<int Width>
156
  bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
157
                         SDValue &SignExtend, SDValue &DoShift) {
158
    return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
159
  }
160

161
  bool SelectExtractHigh(SDValue N, SDValue &Res) {
162
    if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)
163
      N = N->getOperand(0);
164
    if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||
165
        !isa<ConstantSDNode>(N->getOperand(1)))
166
      return false;
167
    EVT VT = N->getValueType(0);
168
    EVT LVT = N->getOperand(0).getValueType();
169
    unsigned Index = N->getConstantOperandVal(1);
170
    if (!VT.is64BitVector() || !LVT.is128BitVector() ||
171
        Index != VT.getVectorNumElements())
172
      return false;
173
    Res = N->getOperand(0);
174
    return true;
175
  }
176

177
  bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {
178
    if (N.getOpcode() != AArch64ISD::VLSHR)
179
      return false;
180
    SDValue Op = N->getOperand(0);
181
    EVT VT = Op.getValueType();
182
    unsigned ShtAmt = N->getConstantOperandVal(1);
183
    if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)
184
      return false;
185

186
    APInt Imm;
187
    if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)
188
      Imm = APInt(VT.getScalarSizeInBits(),
189
                  Op.getOperand(1).getConstantOperandVal(0)
190
                      << Op.getOperand(1).getConstantOperandVal(1));
191
    else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&
192
             isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))
193
      Imm = APInt(VT.getScalarSizeInBits(),
194
                  Op.getOperand(1).getConstantOperandVal(0));
195
    else
196
      return false;
197

198
    if (Imm != 1ULL << (ShtAmt - 1))
199
      return false;
200

201
    Res1 = Op.getOperand(0);
202
    Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);
203
    return true;
204
  }
205

206
  bool SelectDupZeroOrUndef(SDValue N) {
207
    switch(N->getOpcode()) {
208
    case ISD::UNDEF:
209
      return true;
210
    case AArch64ISD::DUP:
211
    case ISD::SPLAT_VECTOR: {
212
      auto Opnd0 = N->getOperand(0);
213
      if (isNullConstant(Opnd0))
214
        return true;
215
      if (isNullFPConstant(Opnd0))
216
        return true;
217
      break;
218
    }
219
    default:
220
      break;
221
    }
222

223
    return false;
224
  }
225

226
  bool SelectDupZero(SDValue N) {
227
    switch(N->getOpcode()) {
228
    case AArch64ISD::DUP:
229
    case ISD::SPLAT_VECTOR: {
230
      auto Opnd0 = N->getOperand(0);
231
      if (isNullConstant(Opnd0))
232
        return true;
233
      if (isNullFPConstant(Opnd0))
234
        return true;
235
      break;
236
    }
237
    }
238

239
    return false;
240
  }
241

242
  bool SelectDupNegativeZero(SDValue N) {
243
    switch(N->getOpcode()) {
244
    case AArch64ISD::DUP:
245
    case ISD::SPLAT_VECTOR: {
246
      ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));
247
      return Const && Const->isZero() && Const->isNegative();
248
    }
249
    }
250

251
    return false;
252
  }
253

254
  template<MVT::SimpleValueType VT>
255
  bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
256
    return SelectSVEAddSubImm(N, VT, Imm, Shift);
257
  }
258

259
  template <MVT::SimpleValueType VT, bool Negate>
260
  bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {
261
    return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);
262
  }
263

264
  template <MVT::SimpleValueType VT>
265
  bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {
266
    return SelectSVECpyDupImm(N, VT, Imm, Shift);
267
  }
268

269
  template <MVT::SimpleValueType VT, bool Invert = false>
270
  bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
271
    return SelectSVELogicalImm(N, VT, Imm, Invert);
272
  }
273

274
  template <MVT::SimpleValueType VT>
275
  bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
276
    return SelectSVEArithImm(N, VT, Imm);
277
  }
278

279
  template <unsigned Low, unsigned High, bool AllowSaturation = false>
280
  bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
281
    return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
282
  }
283

284
  bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {
285
    if (N->getOpcode() != ISD::SPLAT_VECTOR)
286
      return false;
287

288
    EVT EltVT = N->getValueType(0).getVectorElementType();
289
    return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,
290
                             /* High */ EltVT.getFixedSizeInBits(),
291
                             /* AllowSaturation */ true, Imm);
292
  }
293

294
  // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
295
  template<signed Min, signed Max, signed Scale, bool Shift>
296
  bool SelectCntImm(SDValue N, SDValue &Imm) {
297
    if (!isa<ConstantSDNode>(N))
298
      return false;
299

300
    int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
301
    if (Shift)
302
      MulImm = 1LL << MulImm;
303

304
    if ((MulImm % std::abs(Scale)) != 0)
305
      return false;
306

307
    MulImm /= Scale;
308
    if ((MulImm >= Min) && (MulImm <= Max)) {
309
      Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
310
      return true;
311
    }
312

313
    return false;
314
  }
315

316
  template <signed Max, signed Scale>
317
  bool SelectEXTImm(SDValue N, SDValue &Imm) {
318
    if (!isa<ConstantSDNode>(N))
319
      return false;
320

321
    int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
322

323
    if (MulImm >= 0 && MulImm <= Max) {
324
      MulImm *= Scale;
325
      Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
326
      return true;
327
    }
328

329
    return false;
330
  }
331

332
  template <unsigned BaseReg, unsigned Max>
333
  bool ImmToReg(SDValue N, SDValue &Imm) {
334
    if (auto *CI = dyn_cast<ConstantSDNode>(N)) {
335
      uint64_t C = CI->getZExtValue();
336

337
      if (C > Max)
338
        return false;
339

340
      Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);
341
      return true;
342
    }
343
    return false;
344
  }
345

346
  /// Form sequences of consecutive 64/128-bit registers for use in NEON
347
  /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
348
  /// between 1 and 4 elements. If it contains a single element that is returned
349
  /// unchanged; otherwise a REG_SEQUENCE value is returned.
350
  SDValue createDTuple(ArrayRef<SDValue> Vecs);
351
  SDValue createQTuple(ArrayRef<SDValue> Vecs);
352
  // Form a sequence of SVE registers for instructions using list of vectors,
353
  // e.g. structured loads and stores (ldN, stN).
354
  SDValue createZTuple(ArrayRef<SDValue> Vecs);
355

356
  // Similar to above, except the register must start at a multiple of the
357
  // tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.
358
  SDValue createZMulTuple(ArrayRef<SDValue> Regs);
359

360
  /// Generic helper for the createDTuple/createQTuple
361
  /// functions. Those should almost always be called instead.
362
  SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
363
                      const unsigned SubRegs[]);
364

365
  void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
366

367
  bool tryIndexedLoad(SDNode *N);
368

369
  void SelectPtrauthAuth(SDNode *N);
370
  void SelectPtrauthResign(SDNode *N);
371

372
  bool trySelectStackSlotTagP(SDNode *N);
373
  void SelectTagP(SDNode *N);
374

375
  void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
376
                     unsigned SubRegIdx);
377
  void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
378
                         unsigned SubRegIdx);
379
  void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
380
  void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
381
  void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
382
                            unsigned Opc_rr, unsigned Opc_ri,
383
                            bool IsIntr = false);
384
  void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,
385
                                       unsigned Scale, unsigned Opc_ri,
386
                                       unsigned Opc_rr);
387
  void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,
388
                                       bool IsZmMulti, unsigned Opcode,
389
                                       bool HasPred = false);
390
  void SelectPExtPair(SDNode *N, unsigned Opc);
391
  void SelectWhilePair(SDNode *N, unsigned Opc);
392
  void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);
393
  void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);
394
  void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,
395
                                 bool IsTupleInput, unsigned Opc);
396
  void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);
397

398
  template <unsigned MaxIdx, unsigned Scale>
399
  void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,
400
                             unsigned Op);
401
  void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
402
                              unsigned Op, unsigned MaxIdx, unsigned Scale,
403
                              unsigned BaseReg = 0);
404
  bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
405
  /// SVE Reg+Imm addressing mode.
406
  template <int64_t Min, int64_t Max>
407
  bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
408
                                SDValue &OffImm);
409
  /// SVE Reg+Reg address mode.
410
  template <unsigned Scale>
411
  bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
412
    return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
413
  }
414

415
  void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,
416
                             uint32_t MaxImm);
417

418
  template <unsigned MaxIdx, unsigned Scale>
419
  bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {
420
    return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);
421
  }
422

423
  void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
424
  void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
425
  void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
426
  void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
427
  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
428
                             unsigned Opc_rr, unsigned Opc_ri);
429
  std::tuple<unsigned, SDValue, SDValue>
430
  findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
431
                           const SDValue &OldBase, const SDValue &OldOffset,
432
                           unsigned Scale);
433

434
  bool tryBitfieldExtractOp(SDNode *N);
435
  bool tryBitfieldExtractOpFromSExt(SDNode *N);
436
  bool tryBitfieldInsertOp(SDNode *N);
437
  bool tryBitfieldInsertInZeroOp(SDNode *N);
438
  bool tryShiftAmountMod(SDNode *N);
439

440
  bool tryReadRegister(SDNode *N);
441
  bool tryWriteRegister(SDNode *N);
442

443
  bool trySelectCastFixedLengthToScalableVector(SDNode *N);
444
  bool trySelectCastScalableToFixedLengthVector(SDNode *N);
445

446
  bool trySelectXAR(SDNode *N);
447

448
// Include the pieces autogenerated from the target description.
449
#include "AArch64GenDAGISel.inc"
450

451
private:
452
  bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
453
                             SDValue &Shift);
454
  bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);
455
  bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
456
                               SDValue &OffImm) {
457
    return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
458
  }
459
  bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
460
                                     unsigned Size, SDValue &Base,
461
                                     SDValue &OffImm);
462
  bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
463
                             SDValue &OffImm);
464
  bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
465
                              SDValue &OffImm);
466
  bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
467
                         SDValue &Offset, SDValue &SignExtend,
468
                         SDValue &DoShift);
469
  bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
470
                         SDValue &Offset, SDValue &SignExtend,
471
                         SDValue &DoShift);
472
  bool isWorthFoldingALU(SDValue V, bool LSL = false) const;
473
  bool isWorthFoldingAddr(SDValue V, unsigned Size) const;
474
  bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
475
                         SDValue &Offset, SDValue &SignExtend);
476

477
  template<unsigned RegWidth>
478
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
479
    return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
480
  }
481

482
  bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
483

484
  template<unsigned RegWidth>
485
  bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {
486
    return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);
487
  }
488

489
  bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,
490
                                     unsigned Width);
491

492
  bool SelectCMP_SWAP(SDNode *N);
493

494
  bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
495
  bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,
496
                              bool Negate);
497
  bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
498
  bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
499

500
  bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
501
  bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
502
                         bool AllowSaturation, SDValue &Imm);
503

504
  bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
505
  bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
506
                               SDValue &Offset);
507
  bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,
508
                          SDValue &Offset, unsigned Scale = 1);
509

510
  bool SelectAllActivePredicate(SDValue N);
511
  bool SelectAnyPredicate(SDValue N);
512
};
513

514
class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {
515
public:
516
  static char ID;
517
  explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,
518
                                     CodeGenOptLevel OptLevel)
519
      : SelectionDAGISelLegacy(
520
            ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}
521
};
522
} // end anonymous namespace
523

524
char AArch64DAGToDAGISelLegacy::ID = 0;
525

526
INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
527

528
/// isIntImmediate - This method tests to see if the node is a constant
529
/// operand. If so Imm will receive the 32-bit value.
530
static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
531
  if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
532
    Imm = C->getZExtValue();
533
    return true;
534
  }
535
  return false;
536
}
537

538
// isIntImmediate - This method tests to see if a constant operand.
539
// If so Imm will receive the value.
540
static bool isIntImmediate(SDValue N, uint64_t &Imm) {
541
  return isIntImmediate(N.getNode(), Imm);
542
}
543

544
// isOpcWithIntImmediate - This method tests to see if the node is a specific
545
// opcode and that it has a immediate integer right operand.
546
// If so Imm will receive the 32 bit value.
547
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
548
                                  uint64_t &Imm) {
549
  return N->getOpcode() == Opc &&
550
         isIntImmediate(N->getOperand(1).getNode(), Imm);
551
}
552

553
// isIntImmediateEq - This method tests to see if N is a constant operand that
554
// is equivalent to 'ImmExpected'.
555
#ifndef NDEBUG
556
static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {
557
  uint64_t Imm;
558
  if (!isIntImmediate(N.getNode(), Imm))
559
    return false;
560
  return Imm == ImmExpected;
561
}
562
#endif
563

564
bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
565
    const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,
566
    std::vector<SDValue> &OutOps) {
567
  switch(ConstraintID) {
568
  default:
569
    llvm_unreachable("Unexpected asm memory constraint");
570
  case InlineAsm::ConstraintCode::m:
571
  case InlineAsm::ConstraintCode::o:
572
  case InlineAsm::ConstraintCode::Q:
573
    // We need to make sure that this one operand does not end up in XZR, thus
574
    // require the address to be in a PointerRegClass register.
575
    const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
576
    const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
577
    SDLoc dl(Op);
578
    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
579
    SDValue NewOp =
580
        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
581
                                       dl, Op.getValueType(),
582
                                       Op, RC), 0);
583
    OutOps.push_back(NewOp);
584
    return false;
585
  }
586
  return true;
587
}
588

589
/// SelectArithImmed - Select an immediate value that can be represented as
590
/// a 12-bit value shifted left by either 0 or 12.  If so, return true with
591
/// Val set to the 12-bit value and Shift set to the shifter operand.
592
bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
593
                                           SDValue &Shift) {
594
  // This function is called from the addsub_shifted_imm ComplexPattern,
595
  // which lists [imm] as the list of opcode it's interested in, however
596
  // we still need to check whether the operand is actually an immediate
597
  // here because the ComplexPattern opcode list is only used in
598
  // root-level opcode matching.
599
  if (!isa<ConstantSDNode>(N.getNode()))
600
    return false;
601

602
  uint64_t Immed = N.getNode()->getAsZExtVal();
603
  unsigned ShiftAmt;
604

605
  if (Immed >> 12 == 0) {
606
    ShiftAmt = 0;
607
  } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
608
    ShiftAmt = 12;
609
    Immed = Immed >> 12;
610
  } else
611
    return false;
612

613
  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
614
  SDLoc dl(N);
615
  Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
616
  Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
617
  return true;
618
}
619

620
/// SelectNegArithImmed - As above, but negates the value before trying to
621
/// select it.
622
bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
623
                                              SDValue &Shift) {
624
  // This function is called from the addsub_shifted_imm ComplexPattern,
625
  // which lists [imm] as the list of opcode it's interested in, however
626
  // we still need to check whether the operand is actually an immediate
627
  // here because the ComplexPattern opcode list is only used in
628
  // root-level opcode matching.
629
  if (!isa<ConstantSDNode>(N.getNode()))
630
    return false;
631

632
  // The immediate operand must be a 24-bit zero-extended immediate.
633
  uint64_t Immed = N.getNode()->getAsZExtVal();
634

635
  // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
636
  // have the opposite effect on the C flag, so this pattern mustn't match under
637
  // those circumstances.
638
  if (Immed == 0)
639
    return false;
640

641
  if (N.getValueType() == MVT::i32)
642
    Immed = ~((uint32_t)Immed) + 1;
643
  else
644
    Immed = ~Immed + 1ULL;
645
  if (Immed & 0xFFFFFFFFFF000000ULL)
646
    return false;
647

648
  Immed &= 0xFFFFFFULL;
649
  return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
650
                          Shift);
651
}
652

653
/// getShiftTypeForNode - Translate a shift node to the corresponding
654
/// ShiftType value.
655
static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
656
  switch (N.getOpcode()) {
657
  default:
658
    return AArch64_AM::InvalidShiftExtend;
659
  case ISD::SHL:
660
    return AArch64_AM::LSL;
661
  case ISD::SRL:
662
    return AArch64_AM::LSR;
663
  case ISD::SRA:
664
    return AArch64_AM::ASR;
665
  case ISD::ROTR:
666
    return AArch64_AM::ROR;
667
  }
668
}
669

670
/// Determine whether it is worth it to fold SHL into the addressing
671
/// mode.
672
static bool isWorthFoldingSHL(SDValue V) {
673
  assert(V.getOpcode() == ISD::SHL && "invalid opcode");
674
  // It is worth folding logical shift of up to three places.
675
  auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
676
  if (!CSD)
677
    return false;
678
  unsigned ShiftVal = CSD->getZExtValue();
679
  if (ShiftVal > 3)
680
    return false;
681

682
  // Check if this particular node is reused in any non-memory related
683
  // operation.  If yes, do not try to fold this node into the address
684
  // computation, since the computation will be kept.
685
  const SDNode *Node = V.getNode();
686
  for (SDNode *UI : Node->uses())
687
    if (!isa<MemSDNode>(*UI))
688
      for (SDNode *UII : UI->uses())
689
        if (!isa<MemSDNode>(*UII))
690
          return false;
691
  return true;
692
}
693

694
/// Determine whether it is worth to fold V into an extended register addressing
695
/// mode.
696
bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {
697
  // Trivial if we are optimizing for code size or if there is only
698
  // one use of the value.
699
  if (CurDAG->shouldOptForSize() || V.hasOneUse())
700
    return true;
701

702
  // If a subtarget has a slow shift, folding a shift into multiple loads
703
  // costs additional micro-ops.
704
  if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))
705
    return false;
706

707
  // Check whether we're going to emit the address arithmetic anyway because
708
  // it's used by a non-address operation.
709
  if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))
710
    return true;
711
  if (V.getOpcode() == ISD::ADD) {
712
    const SDValue LHS = V.getOperand(0);
713
    const SDValue RHS = V.getOperand(1);
714
    if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
715
      return true;
716
    if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
717
      return true;
718
  }
719

720
  // It hurts otherwise, since the value will be reused.
721
  return false;
722
}
723

724
/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2
725
/// to select more shifted register
726
bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,
727
                                                       SDValue &Shift) {
728
  EVT VT = N.getValueType();
729
  if (VT != MVT::i32 && VT != MVT::i64)
730
    return false;
731

732
  if (N->getOpcode() != ISD::AND || !N->hasOneUse())
733
    return false;
734
  SDValue LHS = N.getOperand(0);
735
  if (!LHS->hasOneUse())
736
    return false;
737

738
  unsigned LHSOpcode = LHS->getOpcode();
739
  if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)
740
    return false;
741

742
  ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
743
  if (!ShiftAmtNode)
744
    return false;
745

746
  uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();
747
  ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));
748
  if (!RHSC)
749
    return false;
750

751
  APInt AndMask = RHSC->getAPIntValue();
752
  unsigned LowZBits, MaskLen;
753
  if (!AndMask.isShiftedMask(LowZBits, MaskLen))
754
    return false;
755

756
  unsigned BitWidth = N.getValueSizeInBits();
757
  SDLoc DL(LHS);
758
  uint64_t NewShiftC;
759
  unsigned NewShiftOp;
760
  if (LHSOpcode == ISD::SHL) {
761
    // LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp
762
    // BitWidth != LowZBits + MaskLen doesn't match the pattern
763
    if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))
764
      return false;
765

766
    NewShiftC = LowZBits - ShiftAmtC;
767
    NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
768
  } else {
769
    if (LowZBits == 0)
770
      return false;
771

772
    // NewShiftC >= BitWidth will fall into isBitfieldExtractOp
773
    NewShiftC = LowZBits + ShiftAmtC;
774
    if (NewShiftC >= BitWidth)
775
      return false;
776

777
    // SRA need all high bits
778
    if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))
779
      return false;
780

781
    // SRL high bits can be 0 or 1
782
    if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))
783
      return false;
784

785
    if (LHSOpcode == ISD::SRL)
786
      NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;
787
    else
788
      NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;
789
  }
790

791
  assert(NewShiftC < BitWidth && "Invalid shift amount");
792
  SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);
793
  SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);
794
  Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),
795
                                       NewShiftAmt, BitWidthMinus1),
796
                0);
797
  unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);
798
  Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);
799
  return true;
800
}
801

802
/// getExtendTypeForNode - Translate an extend node to the corresponding
803
/// ExtendType value.
804
static AArch64_AM::ShiftExtendType
805
getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
806
  if (N.getOpcode() == ISD::SIGN_EXTEND ||
807
      N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
808
    EVT SrcVT;
809
    if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
810
      SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
811
    else
812
      SrcVT = N.getOperand(0).getValueType();
813

814
    if (!IsLoadStore && SrcVT == MVT::i8)
815
      return AArch64_AM::SXTB;
816
    else if (!IsLoadStore && SrcVT == MVT::i16)
817
      return AArch64_AM::SXTH;
818
    else if (SrcVT == MVT::i32)
819
      return AArch64_AM::SXTW;
820
    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
821

822
    return AArch64_AM::InvalidShiftExtend;
823
  } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
824
             N.getOpcode() == ISD::ANY_EXTEND) {
825
    EVT SrcVT = N.getOperand(0).getValueType();
826
    if (!IsLoadStore && SrcVT == MVT::i8)
827
      return AArch64_AM::UXTB;
828
    else if (!IsLoadStore && SrcVT == MVT::i16)
829
      return AArch64_AM::UXTH;
830
    else if (SrcVT == MVT::i32)
831
      return AArch64_AM::UXTW;
832
    assert(SrcVT != MVT::i64 && "extend from 64-bits?");
833

834
    return AArch64_AM::InvalidShiftExtend;
835
  } else if (N.getOpcode() == ISD::AND) {
836
    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
837
    if (!CSD)
838
      return AArch64_AM::InvalidShiftExtend;
839
    uint64_t AndMask = CSD->getZExtValue();
840

841
    switch (AndMask) {
842
    default:
843
      return AArch64_AM::InvalidShiftExtend;
844
    case 0xFF:
845
      return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
846
    case 0xFFFF:
847
      return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
848
    case 0xFFFFFFFF:
849
      return AArch64_AM::UXTW;
850
    }
851
  }
852

853
  return AArch64_AM::InvalidShiftExtend;
854
}
855

856
/// Determine whether it is worth to fold V into an extended register of an
857
/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`
858
/// instruction, and the shift should be treated as worth folding even if has
859
/// multiple uses.
860
bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {
861
  // Trivial if we are optimizing for code size or if there is only
862
  // one use of the value.
863
  if (CurDAG->shouldOptForSize() || V.hasOneUse())
864
    return true;
865

866
  // If a subtarget has a fastpath LSL we can fold a logical shift into
867
  // the add/sub and save a cycle.
868
  if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&
869
      V.getConstantOperandVal(1) <= 4 &&
870
      getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend)
871
    return true;
872

873
  // It hurts otherwise, since the value will be reused.
874
  return false;
875
}
876

877
/// SelectShiftedRegister - Select a "shifted register" operand.  If the value
878
/// is not shifted, set the Shift operand to default of "LSL 0".  The logical
879
/// instructions allow the shifted register to be rotated, but the arithmetic
880
/// instructions do not.  The AllowROR parameter specifies whether ROR is
881
/// supported.
882
bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
883
                                                SDValue &Reg, SDValue &Shift) {
884
  if (SelectShiftedRegisterFromAnd(N, Reg, Shift))
885
    return true;
886

887
  AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
888
  if (ShType == AArch64_AM::InvalidShiftExtend)
889
    return false;
890
  if (!AllowROR && ShType == AArch64_AM::ROR)
891
    return false;
892

893
  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
894
    unsigned BitSize = N.getValueSizeInBits();
895
    unsigned Val = RHS->getZExtValue() & (BitSize - 1);
896
    unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
897

898
    Reg = N.getOperand(0);
899
    Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
900
    return isWorthFoldingALU(N, true);
901
  }
902

903
  return false;
904
}
905

906
/// Instructions that accept extend modifiers like UXTW expect the register
907
/// being extended to be a GPR32, but the incoming DAG might be acting on a
908
/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
909
/// this is the case.
910
static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
911
  if (N.getValueType() == MVT::i32)
912
    return N;
913

914
  SDLoc dl(N);
915
  return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);
916
}
917

918
// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
919
template<signed Low, signed High, signed Scale>
920
bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
921
  if (!isa<ConstantSDNode>(N))
922
    return false;
923

924
  int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
925
  if ((MulImm % std::abs(Scale)) == 0) {
926
    int64_t RDVLImm = MulImm / Scale;
927
    if ((RDVLImm >= Low) && (RDVLImm <= High)) {
928
      Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
929
      return true;
930
    }
931
  }
932

933
  return false;
934
}
935

936
/// SelectArithExtendedRegister - Select a "extended register" operand.  This
937
/// operand folds in an extend followed by an optional left shift.
938
bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
939
                                                      SDValue &Shift) {
940
  unsigned ShiftVal = 0;
941
  AArch64_AM::ShiftExtendType Ext;
942

943
  if (N.getOpcode() == ISD::SHL) {
944
    ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
945
    if (!CSD)
946
      return false;
947
    ShiftVal = CSD->getZExtValue();
948
    if (ShiftVal > 4)
949
      return false;
950

951
    Ext = getExtendTypeForNode(N.getOperand(0));
952
    if (Ext == AArch64_AM::InvalidShiftExtend)
953
      return false;
954

955
    Reg = N.getOperand(0).getOperand(0);
956
  } else {
957
    Ext = getExtendTypeForNode(N);
958
    if (Ext == AArch64_AM::InvalidShiftExtend)
959
      return false;
960

961
    Reg = N.getOperand(0);
962

963
    // Don't match if free 32-bit -> 64-bit zext can be used instead. Use the
964
    // isDef32 as a heuristic for when the operand is likely to be a 32bit def.
965
    auto isDef32 = [](SDValue N) {
966
      unsigned Opc = N.getOpcode();
967
      return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
968
             Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&
969
             Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&
970
             Opc != ISD::FREEZE;
971
    };
972
    if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&
973
        isDef32(Reg))
974
      return false;
975
  }
976

977
  // AArch64 mandates that the RHS of the operation must use the smallest
978
  // register class that could contain the size being extended from.  Thus,
979
  // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
980
  // there might not be an actual 32-bit value in the program.  We can
981
  // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
982
  assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
983
  Reg = narrowIfNeeded(CurDAG, Reg);
984
  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
985
                                    MVT::i32);
986
  return isWorthFoldingALU(N);
987
}
988

989
/// SelectArithUXTXRegister - Select a "UXTX register" operand. This
990
/// operand is refered by the instructions have SP operand
991
bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,
992
                                                  SDValue &Shift) {
993
  unsigned ShiftVal = 0;
994
  AArch64_AM::ShiftExtendType Ext;
995

996
  if (N.getOpcode() != ISD::SHL)
997
    return false;
998

999
  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1000
  if (!CSD)
1001
    return false;
1002
  ShiftVal = CSD->getZExtValue();
1003
  if (ShiftVal > 4)
1004
    return false;
1005

1006
  Ext = AArch64_AM::UXTX;
1007
  Reg = N.getOperand(0);
1008
  Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
1009
                                    MVT::i32);
1010
  return isWorthFoldingALU(N);
1011
}
1012

1013
/// If there's a use of this ADDlow that's not itself a load/store then we'll
1014
/// need to create a real ADD instruction from it anyway and there's no point in
1015
/// folding it into the mem op. Theoretically, it shouldn't matter, but there's
1016
/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
1017
/// leads to duplicated ADRP instructions.
1018
static bool isWorthFoldingADDlow(SDValue N) {
1019
  for (auto *Use : N->uses()) {
1020
    if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
1021
        Use->getOpcode() != ISD::ATOMIC_LOAD &&
1022
        Use->getOpcode() != ISD::ATOMIC_STORE)
1023
      return false;
1024

1025
    // ldar and stlr have much more restrictive addressing modes (just a
1026
    // register).
1027
    if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))
1028
      return false;
1029
  }
1030

1031
  return true;
1032
}
1033

1034
/// Check if the immediate offset is valid as a scaled immediate.
1035
static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,
1036
                                     unsigned Size) {
1037
  if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&
1038
      Offset < (Range << Log2_32(Size)))
1039
    return true;
1040
  return false;
1041
}
1042

1043
/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
1044
/// immediate" address.  The "Size" argument is the size in bytes of the memory
1045
/// reference, which determines the scale.
1046
bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
1047
                                                        unsigned BW, unsigned Size,
1048
                                                        SDValue &Base,
1049
                                                        SDValue &OffImm) {
1050
  SDLoc dl(N);
1051
  const DataLayout &DL = CurDAG->getDataLayout();
1052
  const TargetLowering *TLI = getTargetLowering();
1053
  if (N.getOpcode() == ISD::FrameIndex) {
1054
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1055
    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1056
    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1057
    return true;
1058
  }
1059

1060
  // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
1061
  // selected here doesn't support labels/immediates, only base+offset.
1062
  if (CurDAG->isBaseWithConstantOffset(N)) {
1063
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1064
      if (IsSignedImm) {
1065
        int64_t RHSC = RHS->getSExtValue();
1066
        unsigned Scale = Log2_32(Size);
1067
        int64_t Range = 0x1LL << (BW - 1);
1068

1069
        if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
1070
            RHSC < (Range << Scale)) {
1071
          Base = N.getOperand(0);
1072
          if (Base.getOpcode() == ISD::FrameIndex) {
1073
            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1074
            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1075
          }
1076
          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1077
          return true;
1078
        }
1079
      } else {
1080
        // unsigned Immediate
1081
        uint64_t RHSC = RHS->getZExtValue();
1082
        unsigned Scale = Log2_32(Size);
1083
        uint64_t Range = 0x1ULL << BW;
1084

1085
        if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
1086
          Base = N.getOperand(0);
1087
          if (Base.getOpcode() == ISD::FrameIndex) {
1088
            int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1089
            Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1090
          }
1091
          OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1092
          return true;
1093
        }
1094
      }
1095
    }
1096
  }
1097
  // Base only. The address will be materialized into a register before
1098
  // the memory is accessed.
1099
  //    add x0, Xbase, #offset
1100
  //    stp x1, x2, [x0]
1101
  Base = N;
1102
  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1103
  return true;
1104
}
1105

1106
/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
1107
/// immediate" address.  The "Size" argument is the size in bytes of the memory
1108
/// reference, which determines the scale.
1109
bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
1110
                                              SDValue &Base, SDValue &OffImm) {
1111
  SDLoc dl(N);
1112
  const DataLayout &DL = CurDAG->getDataLayout();
1113
  const TargetLowering *TLI = getTargetLowering();
1114
  if (N.getOpcode() == ISD::FrameIndex) {
1115
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
1116
    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1117
    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1118
    return true;
1119
  }
1120

1121
  if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
1122
    GlobalAddressSDNode *GAN =
1123
        dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
1124
    Base = N.getOperand(0);
1125
    OffImm = N.getOperand(1);
1126
    if (!GAN)
1127
      return true;
1128

1129
    if (GAN->getOffset() % Size == 0 &&
1130
        GAN->getGlobal()->getPointerAlignment(DL) >= Size)
1131
      return true;
1132
  }
1133

1134
  if (CurDAG->isBaseWithConstantOffset(N)) {
1135
    if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1136
      int64_t RHSC = (int64_t)RHS->getZExtValue();
1137
      unsigned Scale = Log2_32(Size);
1138
      if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {
1139
        Base = N.getOperand(0);
1140
        if (Base.getOpcode() == ISD::FrameIndex) {
1141
          int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1142
          Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
1143
        }
1144
        OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
1145
        return true;
1146
      }
1147
    }
1148
  }
1149

1150
  // Before falling back to our general case, check if the unscaled
1151
  // instructions can handle this. If so, that's preferable.
1152
  if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
1153
    return false;
1154

1155
  // Base only. The address will be materialized into a register before
1156
  // the memory is accessed.
1157
  //    add x0, Xbase, #offset
1158
  //    ldr x0, [x0]
1159
  Base = N;
1160
  OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
1161
  return true;
1162
}
1163

1164
/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
1165
/// immediate" address.  This should only match when there is an offset that
1166
/// is not valid for a scaled immediate addressing mode.  The "Size" argument
1167
/// is the size in bytes of the memory reference, which is needed here to know
1168
/// what is valid for a scaled immediate.
1169
bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
1170
                                                 SDValue &Base,
1171
                                                 SDValue &OffImm) {
1172
  if (!CurDAG->isBaseWithConstantOffset(N))
1173
    return false;
1174
  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1175
    int64_t RHSC = RHS->getSExtValue();
1176
    if (RHSC >= -256 && RHSC < 256) {
1177
      Base = N.getOperand(0);
1178
      if (Base.getOpcode() == ISD::FrameIndex) {
1179
        int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1180
        const TargetLowering *TLI = getTargetLowering();
1181
        Base = CurDAG->getTargetFrameIndex(
1182
            FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1183
      }
1184
      OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
1185
      return true;
1186
    }
1187
  }
1188
  return false;
1189
}
1190

1191
static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
1192
  SDLoc dl(N);
1193
  SDValue ImpDef = SDValue(
1194
      CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
1195
  return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,
1196
                                       N);
1197
}
1198

1199
/// Check if the given SHL node (\p N), can be used to form an
1200
/// extended register for an addressing mode.
1201
bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
1202
                                            bool WantExtend, SDValue &Offset,
1203
                                            SDValue &SignExtend) {
1204
  assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
1205
  ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
1206
  if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
1207
    return false;
1208

1209
  SDLoc dl(N);
1210
  if (WantExtend) {
1211
    AArch64_AM::ShiftExtendType Ext =
1212
        getExtendTypeForNode(N.getOperand(0), true);
1213
    if (Ext == AArch64_AM::InvalidShiftExtend)
1214
      return false;
1215

1216
    Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
1217
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1218
                                           MVT::i32);
1219
  } else {
1220
    Offset = N.getOperand(0);
1221
    SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
1222
  }
1223

1224
  unsigned LegalShiftVal = Log2_32(Size);
1225
  unsigned ShiftVal = CSD->getZExtValue();
1226

1227
  if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
1228
    return false;
1229

1230
  return isWorthFoldingAddr(N, Size);
1231
}
1232

1233
bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
1234
                                            SDValue &Base, SDValue &Offset,
1235
                                            SDValue &SignExtend,
1236
                                            SDValue &DoShift) {
1237
  if (N.getOpcode() != ISD::ADD)
1238
    return false;
1239
  SDValue LHS = N.getOperand(0);
1240
  SDValue RHS = N.getOperand(1);
1241
  SDLoc dl(N);
1242

1243
  // We don't want to match immediate adds here, because they are better lowered
1244
  // to the register-immediate addressing modes.
1245
  if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
1246
    return false;
1247

1248
  // Check if this particular node is reused in any non-memory related
1249
  // operation.  If yes, do not try to fold this node into the address
1250
  // computation, since the computation will be kept.
1251
  const SDNode *Node = N.getNode();
1252
  for (SDNode *UI : Node->uses()) {
1253
    if (!isa<MemSDNode>(*UI))
1254
      return false;
1255
  }
1256

1257
  // Remember if it is worth folding N when it produces extended register.
1258
  bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1259

1260
  // Try to match a shifted extend on the RHS.
1261
  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1262
      SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
1263
    Base = LHS;
1264
    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1265
    return true;
1266
  }
1267

1268
  // Try to match a shifted extend on the LHS.
1269
  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1270
      SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
1271
    Base = RHS;
1272
    DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
1273
    return true;
1274
  }
1275

1276
  // There was no shift, whatever else we find.
1277
  DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
1278

1279
  AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
1280
  // Try to match an unshifted extend on the LHS.
1281
  if (IsExtendedRegisterWorthFolding &&
1282
      (Ext = getExtendTypeForNode(LHS, true)) !=
1283
          AArch64_AM::InvalidShiftExtend) {
1284
    Base = RHS;
1285
    Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
1286
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1287
                                           MVT::i32);
1288
    if (isWorthFoldingAddr(LHS, Size))
1289
      return true;
1290
  }
1291

1292
  // Try to match an unshifted extend on the RHS.
1293
  if (IsExtendedRegisterWorthFolding &&
1294
      (Ext = getExtendTypeForNode(RHS, true)) !=
1295
          AArch64_AM::InvalidShiftExtend) {
1296
    Base = LHS;
1297
    Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
1298
    SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
1299
                                           MVT::i32);
1300
    if (isWorthFoldingAddr(RHS, Size))
1301
      return true;
1302
  }
1303

1304
  return false;
1305
}
1306

1307
// Check if the given immediate is preferred by ADD. If an immediate can be
1308
// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
1309
// encoded by one MOVZ, return true.
1310
static bool isPreferredADD(int64_t ImmOff) {
1311
  // Constant in [0x0, 0xfff] can be encoded in ADD.
1312
  if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
1313
    return true;
1314
  // Check if it can be encoded in an "ADD LSL #12".
1315
  if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
1316
    // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
1317
    return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
1318
           (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
1319
  return false;
1320
}
1321

1322
bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
1323
                                            SDValue &Base, SDValue &Offset,
1324
                                            SDValue &SignExtend,
1325
                                            SDValue &DoShift) {
1326
  if (N.getOpcode() != ISD::ADD)
1327
    return false;
1328
  SDValue LHS = N.getOperand(0);
1329
  SDValue RHS = N.getOperand(1);
1330
  SDLoc DL(N);
1331

1332
  // Check if this particular node is reused in any non-memory related
1333
  // operation.  If yes, do not try to fold this node into the address
1334
  // computation, since the computation will be kept.
1335
  const SDNode *Node = N.getNode();
1336
  for (SDNode *UI : Node->uses()) {
1337
    if (!isa<MemSDNode>(*UI))
1338
      return false;
1339
  }
1340

1341
  // Watch out if RHS is a wide immediate, it can not be selected into
1342
  // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
1343
  // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
1344
  // instructions like:
1345
  //     MOV  X0, WideImmediate
1346
  //     ADD  X1, BaseReg, X0
1347
  //     LDR  X2, [X1, 0]
1348
  // For such situation, using [BaseReg, XReg] addressing mode can save one
1349
  // ADD/SUB:
1350
  //     MOV  X0, WideImmediate
1351
  //     LDR  X2, [BaseReg, X0]
1352
  if (isa<ConstantSDNode>(RHS)) {
1353
    int64_t ImmOff = (int64_t)RHS->getAsZExtVal();
1354
    // Skip the immediate can be selected by load/store addressing mode.
1355
    // Also skip the immediate can be encoded by a single ADD (SUB is also
1356
    // checked by using -ImmOff).
1357
    if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||
1358
        isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
1359
      return false;
1360

1361
    SDValue Ops[] = { RHS };
1362
    SDNode *MOVI =
1363
        CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
1364
    SDValue MOVIV = SDValue(MOVI, 0);
1365
    // This ADD of two X register will be selected into [Reg+Reg] mode.
1366
    N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
1367
  }
1368

1369
  // Remember if it is worth folding N when it produces extended register.
1370
  bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);
1371

1372
  // Try to match a shifted extend on the RHS.
1373
  if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
1374
      SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
1375
    Base = LHS;
1376
    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1377
    return true;
1378
  }
1379

1380
  // Try to match a shifted extend on the LHS.
1381
  if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
1382
      SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
1383
    Base = RHS;
1384
    DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
1385
    return true;
1386
  }
1387

1388
  // Match any non-shifted, non-extend, non-immediate add expression.
1389
  Base = LHS;
1390
  Offset = RHS;
1391
  SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
1392
  DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
1393
  // Reg1 + Reg2 is free: no check needed.
1394
  return true;
1395
}
1396

1397
SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
1398
  static const unsigned RegClassIDs[] = {
1399
      AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
1400
  static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
1401
                                     AArch64::dsub2, AArch64::dsub3};
1402

1403
  return createTuple(Regs, RegClassIDs, SubRegs);
1404
}
1405

1406
SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
1407
  static const unsigned RegClassIDs[] = {
1408
      AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
1409
  static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
1410
                                     AArch64::qsub2, AArch64::qsub3};
1411

1412
  return createTuple(Regs, RegClassIDs, SubRegs);
1413
}
1414

1415
SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
1416
  static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
1417
                                         AArch64::ZPR3RegClassID,
1418
                                         AArch64::ZPR4RegClassID};
1419
  static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1420
                                     AArch64::zsub2, AArch64::zsub3};
1421

1422
  return createTuple(Regs, RegClassIDs, SubRegs);
1423
}
1424

1425
SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {
1426
  assert(Regs.size() == 2 || Regs.size() == 4);
1427

1428
  // The createTuple interface requires 3 RegClassIDs for each possible
1429
  // tuple type even though we only have them for ZPR2 and ZPR4.
1430
  static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,
1431
                                         AArch64::ZPR4Mul4RegClassID};
1432
  static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
1433
                                     AArch64::zsub2, AArch64::zsub3};
1434
  return createTuple(Regs, RegClassIDs, SubRegs);
1435
}
1436

1437
SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
1438
                                         const unsigned RegClassIDs[],
1439
                                         const unsigned SubRegs[]) {
1440
  // There's no special register-class for a vector-list of 1 element: it's just
1441
  // a vector.
1442
  if (Regs.size() == 1)
1443
    return Regs[0];
1444

1445
  assert(Regs.size() >= 2 && Regs.size() <= 4);
1446

1447
  SDLoc DL(Regs[0]);
1448

1449
  SmallVector<SDValue, 4> Ops;
1450

1451
  // First operand of REG_SEQUENCE is the desired RegClass.
1452
  Ops.push_back(
1453
      CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
1454

1455
  // Then we get pairs of source & subregister-position for the components.
1456
  for (unsigned i = 0; i < Regs.size(); ++i) {
1457
    Ops.push_back(Regs[i]);
1458
    Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
1459
  }
1460

1461
  SDNode *N =
1462
      CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
1463
  return SDValue(N, 0);
1464
}
1465

1466
void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
1467
                                      bool isExt) {
1468
  SDLoc dl(N);
1469
  EVT VT = N->getValueType(0);
1470

1471
  unsigned ExtOff = isExt;
1472

1473
  // Form a REG_SEQUENCE to force register allocation.
1474
  unsigned Vec0Off = ExtOff + 1;
1475
  SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
1476
                               N->op_begin() + Vec0Off + NumVecs);
1477
  SDValue RegSeq = createQTuple(Regs);
1478

1479
  SmallVector<SDValue, 6> Ops;
1480
  if (isExt)
1481
    Ops.push_back(N->getOperand(1));
1482
  Ops.push_back(RegSeq);
1483
  Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
1484
  ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
1485
}
1486

1487
static std::tuple<SDValue, SDValue>
1488
extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
1489
  SDLoc DL(Disc);
1490
  SDValue AddrDisc;
1491
  SDValue ConstDisc;
1492

1493
  // If this is a blend, remember the constant and address discriminators.
1494
  // Otherwise, it's either a constant discriminator, or a non-blended
1495
  // address discriminator.
1496
  if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1497
      Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
1498
    AddrDisc = Disc->getOperand(1);
1499
    ConstDisc = Disc->getOperand(2);
1500
  } else {
1501
    ConstDisc = Disc;
1502
  }
1503

1504
  // If the constant discriminator (either the blend RHS, or the entire
1505
  // discriminator value) isn't a 16-bit constant, bail out, and let the
1506
  // discriminator be computed separately.
1507
  auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);
1508
  if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
1509
    return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
1510

1511
  // If there's no address discriminator, use XZR directly.
1512
  if (!AddrDisc)
1513
    AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
1514

1515
  return std::make_tuple(
1516
      DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
1517
      AddrDisc);
1518
}
1519

1520
void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
1521
  SDLoc DL(N);
1522
  // IntrinsicID is operand #0
1523
  SDValue Val = N->getOperand(1);
1524
  SDValue AUTKey = N->getOperand(2);
1525
  SDValue AUTDisc = N->getOperand(3);
1526

1527
  unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1528
  AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1529

1530
  SDValue AUTAddrDisc, AUTConstDisc;
1531
  std::tie(AUTConstDisc, AUTAddrDisc) =
1532
      extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1533

1534
  SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1535
                                         AArch64::X16, Val, SDValue());
1536
  SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
1537

1538
  SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
1539
  ReplaceNode(N, AUT);
1540
  return;
1541
}
1542

1543
void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {
1544
  SDLoc DL(N);
1545
  // IntrinsicID is operand #0
1546
  SDValue Val = N->getOperand(1);
1547
  SDValue AUTKey = N->getOperand(2);
1548
  SDValue AUTDisc = N->getOperand(3);
1549
  SDValue PACKey = N->getOperand(4);
1550
  SDValue PACDisc = N->getOperand(5);
1551

1552
  unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();
1553
  unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();
1554

1555
  AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
1556
  PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
1557

1558
  SDValue AUTAddrDisc, AUTConstDisc;
1559
  std::tie(AUTConstDisc, AUTAddrDisc) =
1560
      extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
1561

1562
  SDValue PACAddrDisc, PACConstDisc;
1563
  std::tie(PACConstDisc, PACAddrDisc) =
1564
      extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
1565

1566
  SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
1567
                                         AArch64::X16, Val, SDValue());
1568

1569
  SDValue Ops[] = {AUTKey,       AUTConstDisc, AUTAddrDisc,        PACKey,
1570
                   PACConstDisc, PACAddrDisc,  X16Copy.getValue(1)};
1571

1572
  SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
1573
  ReplaceNode(N, AUTPAC);
1574
  return;
1575
}
1576

1577
bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
1578
  LoadSDNode *LD = cast<LoadSDNode>(N);
1579
  if (LD->isUnindexed())
1580
    return false;
1581
  EVT VT = LD->getMemoryVT();
1582
  EVT DstVT = N->getValueType(0);
1583
  ISD::MemIndexedMode AM = LD->getAddressingMode();
1584
  bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
1585

1586
  // We're not doing validity checking here. That was done when checking
1587
  // if we should mark the load as indexed or not. We're just selecting
1588
  // the right instruction.
1589
  unsigned Opcode = 0;
1590

1591
  ISD::LoadExtType ExtType = LD->getExtensionType();
1592
  bool InsertTo64 = false;
1593
  if (VT == MVT::i64)
1594
    Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
1595
  else if (VT == MVT::i32) {
1596
    if (ExtType == ISD::NON_EXTLOAD)
1597
      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1598
    else if (ExtType == ISD::SEXTLOAD)
1599
      Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
1600
    else {
1601
      Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
1602
      InsertTo64 = true;
1603
      // The result of the load is only i32. It's the subreg_to_reg that makes
1604
      // it into an i64.
1605
      DstVT = MVT::i32;
1606
    }
1607
  } else if (VT == MVT::i16) {
1608
    if (ExtType == ISD::SEXTLOAD) {
1609
      if (DstVT == MVT::i64)
1610
        Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
1611
      else
1612
        Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
1613
    } else {
1614
      Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
1615
      InsertTo64 = DstVT == MVT::i64;
1616
      // The result of the load is only i32. It's the subreg_to_reg that makes
1617
      // it into an i64.
1618
      DstVT = MVT::i32;
1619
    }
1620
  } else if (VT == MVT::i8) {
1621
    if (ExtType == ISD::SEXTLOAD) {
1622
      if (DstVT == MVT::i64)
1623
        Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
1624
      else
1625
        Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
1626
    } else {
1627
      Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
1628
      InsertTo64 = DstVT == MVT::i64;
1629
      // The result of the load is only i32. It's the subreg_to_reg that makes
1630
      // it into an i64.
1631
      DstVT = MVT::i32;
1632
    }
1633
  } else if (VT == MVT::f16) {
1634
    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1635
  } else if (VT == MVT::bf16) {
1636
    Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
1637
  } else if (VT == MVT::f32) {
1638
    Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
1639
  } else if (VT == MVT::f64 || VT.is64BitVector()) {
1640
    Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
1641
  } else if (VT.is128BitVector()) {
1642
    Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
1643
  } else
1644
    return false;
1645
  SDValue Chain = LD->getChain();
1646
  SDValue Base = LD->getBasePtr();
1647
  ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
1648
  int OffsetVal = (int)OffsetOp->getZExtValue();
1649
  SDLoc dl(N);
1650
  SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
1651
  SDValue Ops[] = { Base, Offset, Chain };
1652
  SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
1653
                                       MVT::Other, Ops);
1654

1655
  // Transfer memoperands.
1656
  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1657
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
1658

1659
  // Either way, we're replacing the node, so tell the caller that.
1660
  SDValue LoadedVal = SDValue(Res, 1);
1661
  if (InsertTo64) {
1662
    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
1663
    LoadedVal =
1664
        SDValue(CurDAG->getMachineNode(
1665
                    AArch64::SUBREG_TO_REG, dl, MVT::i64,
1666
                    CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
1667
                    SubReg),
1668
                0);
1669
  }
1670

1671
  ReplaceUses(SDValue(N, 0), LoadedVal);
1672
  ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
1673
  ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
1674
  CurDAG->RemoveDeadNode(N);
1675
  return true;
1676
}
1677

1678
void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
1679
                                     unsigned SubRegIdx) {
1680
  SDLoc dl(N);
1681
  EVT VT = N->getValueType(0);
1682
  SDValue Chain = N->getOperand(0);
1683

1684
  SDValue Ops[] = {N->getOperand(2), // Mem operand;
1685
                   Chain};
1686

1687
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1688

1689
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1690
  SDValue SuperReg = SDValue(Ld, 0);
1691
  for (unsigned i = 0; i < NumVecs; ++i)
1692
    ReplaceUses(SDValue(N, i),
1693
        CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1694

1695
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
1696

1697
  // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
1698
  // because it's too simple to have needed special treatment during lowering.
1699
  if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
1700
    MachineMemOperand *MemOp = MemIntr->getMemOperand();
1701
    CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
1702
  }
1703

1704
  CurDAG->RemoveDeadNode(N);
1705
}
1706

1707
void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
1708
                                         unsigned Opc, unsigned SubRegIdx) {
1709
  SDLoc dl(N);
1710
  EVT VT = N->getValueType(0);
1711
  SDValue Chain = N->getOperand(0);
1712

1713
  SDValue Ops[] = {N->getOperand(1), // Mem operand
1714
                   N->getOperand(2), // Incremental
1715
                   Chain};
1716

1717
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
1718
                        MVT::Untyped, MVT::Other};
1719

1720
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
1721

1722
  // Update uses of write back register
1723
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
1724

1725
  // Update uses of vector list
1726
  SDValue SuperReg = SDValue(Ld, 1);
1727
  if (NumVecs == 1)
1728
    ReplaceUses(SDValue(N, 0), SuperReg);
1729
  else
1730
    for (unsigned i = 0; i < NumVecs; ++i)
1731
      ReplaceUses(SDValue(N, i),
1732
          CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
1733

1734
  // Update the chain
1735
  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
1736
  CurDAG->RemoveDeadNode(N);
1737
}
1738

1739
/// Optimize \param OldBase and \param OldOffset selecting the best addressing
1740
/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
1741
/// new Base and an SDValue representing the new offset.
1742
std::tuple<unsigned, SDValue, SDValue>
1743
AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
1744
                                              unsigned Opc_ri,
1745
                                              const SDValue &OldBase,
1746
                                              const SDValue &OldOffset,
1747
                                              unsigned Scale) {
1748
  SDValue NewBase = OldBase;
1749
  SDValue NewOffset = OldOffset;
1750
  // Detect a possible Reg+Imm addressing mode.
1751
  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
1752
      N, OldBase, NewBase, NewOffset);
1753

1754
  // Detect a possible reg+reg addressing mode, but only if we haven't already
1755
  // detected a Reg+Imm one.
1756
  const bool IsRegReg =
1757
      !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
1758

1759
  // Select the instruction.
1760
  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
1761
}
1762

1763
enum class SelectTypeKind {
1764
  Int1 = 0,
1765
  Int = 1,
1766
  FP = 2,
1767
  AnyType = 3,
1768
};
1769

1770
/// This function selects an opcode from a list of opcodes, which is
1771
/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }
1772
/// element types, in this order.
1773
template <SelectTypeKind Kind>
1774
static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {
1775
  // Only match scalable vector VTs
1776
  if (!VT.isScalableVector())
1777
    return 0;
1778

1779
  EVT EltVT = VT.getVectorElementType();
1780
  unsigned Key = VT.getVectorMinNumElements();
1781
  switch (Kind) {
1782
  case SelectTypeKind::AnyType:
1783
    break;
1784
  case SelectTypeKind::Int:
1785
    if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&
1786
        EltVT != MVT::i64)
1787
      return 0;
1788
    break;
1789
  case SelectTypeKind::Int1:
1790
    if (EltVT != MVT::i1)
1791
      return 0;
1792
    break;
1793
  case SelectTypeKind::FP:
1794
    if (EltVT == MVT::bf16)
1795
      Key = 16;
1796
    else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&
1797
             EltVT != MVT::f64)
1798
      return 0;
1799
    break;
1800
  }
1801

1802
  unsigned Offset;
1803
  switch (Key) {
1804
  case 16: // 8-bit or bf16
1805
    Offset = 0;
1806
    break;
1807
  case 8: // 16-bit
1808
    Offset = 1;
1809
    break;
1810
  case 4: // 32-bit
1811
    Offset = 2;
1812
    break;
1813
  case 2: // 64-bit
1814
    Offset = 3;
1815
    break;
1816
  default:
1817
    return 0;
1818
  }
1819

1820
  return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];
1821
}
1822

1823
// This function is almost identical to SelectWhilePair, but has an
1824
// extra check on the range of the immediate operand.
1825
// TODO: Merge these two functions together at some point?
1826
void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {
1827
  // Immediate can be either 0 or 1.
1828
  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1829
    if (Imm->getZExtValue() > 1)
1830
      return;
1831

1832
  SDLoc DL(N);
1833
  EVT VT = N->getValueType(0);
1834
  SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1835
  SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1836
  SDValue SuperReg = SDValue(WhilePair, 0);
1837

1838
  for (unsigned I = 0; I < 2; ++I)
1839
    ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1840
                                   AArch64::psub0 + I, DL, VT, SuperReg));
1841

1842
  CurDAG->RemoveDeadNode(N);
1843
}
1844

1845
void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {
1846
  SDLoc DL(N);
1847
  EVT VT = N->getValueType(0);
1848

1849
  SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};
1850

1851
  SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
1852
  SDValue SuperReg = SDValue(WhilePair, 0);
1853

1854
  for (unsigned I = 0; I < 2; ++I)
1855
    ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
1856
                                   AArch64::psub0 + I, DL, VT, SuperReg));
1857

1858
  CurDAG->RemoveDeadNode(N);
1859
}
1860

1861
void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,
1862
                                             unsigned Opcode) {
1863
  EVT VT = N->getValueType(0);
1864
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
1865
  SDValue Ops = createZTuple(Regs);
1866
  SDLoc DL(N);
1867
  SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);
1868
  SDValue SuperReg = SDValue(Intrinsic, 0);
1869
  for (unsigned i = 0; i < NumVecs; ++i)
1870
    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1871
                                   AArch64::zsub0 + i, DL, VT, SuperReg));
1872

1873
  CurDAG->RemoveDeadNode(N);
1874
}
1875

1876
void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,
1877
                                                          unsigned NumVecs,
1878
                                                          bool IsZmMulti,
1879
                                                          unsigned Opcode,
1880
                                                          bool HasPred) {
1881
  assert(Opcode != 0 && "Unexpected opcode");
1882

1883
  SDLoc DL(N);
1884
  EVT VT = N->getValueType(0);
1885
  unsigned FirstVecIdx = HasPred ? 2 : 1;
1886

1887
  auto GetMultiVecOperand = [=](unsigned StartIdx) {
1888
    SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,
1889
                                 N->op_begin() + StartIdx + NumVecs);
1890
    return createZMulTuple(Regs);
1891
  };
1892

1893
  SDValue Zdn = GetMultiVecOperand(FirstVecIdx);
1894

1895
  SDValue Zm;
1896
  if (IsZmMulti)
1897
    Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);
1898
  else
1899
    Zm = N->getOperand(NumVecs + FirstVecIdx);
1900

1901
  SDNode *Intrinsic;
1902
  if (HasPred)
1903
    Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,
1904
                                       N->getOperand(1), Zdn, Zm);
1905
  else
1906
    Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);
1907
  SDValue SuperReg = SDValue(Intrinsic, 0);
1908
  for (unsigned i = 0; i < NumVecs; ++i)
1909
    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1910
                                   AArch64::zsub0 + i, DL, VT, SuperReg));
1911

1912
  CurDAG->RemoveDeadNode(N);
1913
}
1914

1915
void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
1916
                                               unsigned Scale, unsigned Opc_ri,
1917
                                               unsigned Opc_rr, bool IsIntr) {
1918
  assert(Scale < 5 && "Invalid scaling value.");
1919
  SDLoc DL(N);
1920
  EVT VT = N->getValueType(0);
1921
  SDValue Chain = N->getOperand(0);
1922

1923
  // Optimize addressing mode.
1924
  SDValue Base, Offset;
1925
  unsigned Opc;
1926
  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
1927
      N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),
1928
      CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
1929

1930
  SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate
1931
                   Base,                          // Memory operand
1932
                   Offset, Chain};
1933

1934
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1935

1936
  SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1937
  SDValue SuperReg = SDValue(Load, 0);
1938
  for (unsigned i = 0; i < NumVecs; ++i)
1939
    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1940
                                   AArch64::zsub0 + i, DL, VT, SuperReg));
1941

1942
  // Copy chain
1943
  unsigned ChainIdx = NumVecs;
1944
  ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1945
  CurDAG->RemoveDeadNode(N);
1946
}
1947

1948
void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,
1949
                                                          unsigned NumVecs,
1950
                                                          unsigned Scale,
1951
                                                          unsigned Opc_ri,
1952
                                                          unsigned Opc_rr) {
1953
  assert(Scale < 4 && "Invalid scaling value.");
1954
  SDLoc DL(N);
1955
  EVT VT = N->getValueType(0);
1956
  SDValue Chain = N->getOperand(0);
1957

1958
  SDValue PNg = N->getOperand(2);
1959
  SDValue Base = N->getOperand(3);
1960
  SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);
1961
  unsigned Opc;
1962
  std::tie(Opc, Base, Offset) =
1963
      findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);
1964

1965
  SDValue Ops[] = {PNg,            // Predicate-as-counter
1966
                   Base,           // Memory operand
1967
                   Offset, Chain};
1968

1969
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
1970

1971
  SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
1972
  SDValue SuperReg = SDValue(Load, 0);
1973
  for (unsigned i = 0; i < NumVecs; ++i)
1974
    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
1975
                                   AArch64::zsub0 + i, DL, VT, SuperReg));
1976

1977
  // Copy chain
1978
  unsigned ChainIdx = NumVecs;
1979
  ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
1980
  CurDAG->RemoveDeadNode(N);
1981
}
1982

1983
void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,
1984
                                            unsigned Opcode) {
1985
  if (N->getValueType(0) != MVT::nxv4f32)
1986
    return;
1987
  SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);
1988
}
1989

1990
void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,
1991
                                                unsigned NumOutVecs,
1992
                                                unsigned Opc, uint32_t MaxImm) {
1993
  if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))
1994
    if (Imm->getZExtValue() > MaxImm)
1995
      return;
1996

1997
  SDValue ZtValue;
1998
  if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))
1999
    return;
2000
  SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};
2001
  SDLoc DL(Node);
2002
  EVT VT = Node->getValueType(0);
2003

2004
  SDNode *Instruction =
2005
      CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);
2006
  SDValue SuperReg = SDValue(Instruction, 0);
2007

2008
  for (unsigned I = 0; I < NumOutVecs; ++I)
2009
    ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(
2010
                                      AArch64::zsub0 + I, DL, VT, SuperReg));
2011

2012
  // Copy chain
2013
  unsigned ChainIdx = NumOutVecs;
2014
  ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));
2015
  CurDAG->RemoveDeadNode(Node);
2016
}
2017

2018
void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,
2019
                                      unsigned Op) {
2020
  SDLoc DL(N);
2021
  EVT VT = N->getValueType(0);
2022

2023
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2024
  SDValue Zd = createZMulTuple(Regs);
2025
  SDValue Zn = N->getOperand(1 + NumVecs);
2026
  SDValue Zm = N->getOperand(2 + NumVecs);
2027

2028
  SDValue Ops[] = {Zd, Zn, Zm};
2029

2030
  SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);
2031
  SDValue SuperReg = SDValue(Intrinsic, 0);
2032
  for (unsigned i = 0; i < NumVecs; ++i)
2033
    ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
2034
                                   AArch64::zsub0 + i, DL, VT, SuperReg));
2035

2036
  CurDAG->RemoveDeadNode(N);
2037
}
2038

2039
bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {
2040
  switch (BaseReg) {
2041
  default:
2042
    return false;
2043
  case AArch64::ZA:
2044
  case AArch64::ZAB0:
2045
    if (TileNum == 0)
2046
      break;
2047
    return false;
2048
  case AArch64::ZAH0:
2049
    if (TileNum <= 1)
2050
      break;
2051
    return false;
2052
  case AArch64::ZAS0:
2053
    if (TileNum <= 3)
2054
      break;
2055
    return false;
2056
  case AArch64::ZAD0:
2057
    if (TileNum <= 7)
2058
      break;
2059
    return false;
2060
  }
2061

2062
  BaseReg += TileNum;
2063
  return true;
2064
}
2065

2066
template <unsigned MaxIdx, unsigned Scale>
2067
void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,
2068
                                                unsigned BaseReg, unsigned Op) {
2069
  unsigned TileNum = 0;
2070
  if (BaseReg != AArch64::ZA)
2071
    TileNum = N->getConstantOperandVal(2);
2072

2073
  if (!SelectSMETile(BaseReg, TileNum))
2074
    return;
2075

2076
  SDValue SliceBase, Base, Offset;
2077
  if (BaseReg == AArch64::ZA)
2078
    SliceBase = N->getOperand(2);
2079
  else
2080
    SliceBase = N->getOperand(3);
2081

2082
  if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2083
    return;
2084

2085
  SDLoc DL(N);
2086
  SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);
2087
  SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};
2088
  SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2089

2090
  EVT VT = N->getValueType(0);
2091
  for (unsigned I = 0; I < NumVecs; ++I)
2092
    ReplaceUses(SDValue(N, I),
2093
                CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2094
                                               SDValue(Mov, 0)));
2095
  // Copy chain
2096
  unsigned ChainIdx = NumVecs;
2097
  ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2098
  CurDAG->RemoveDeadNode(N);
2099
}
2100

2101
void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,
2102
                                                 unsigned Op, unsigned MaxIdx,
2103
                                                 unsigned Scale, unsigned BaseReg) {
2104
  // Slice can be in different positions
2105
  // The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)
2106
  // The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)
2107
  SDValue SliceBase = N->getOperand(2);
2108
  if (BaseReg != AArch64::ZA)
2109
    SliceBase = N->getOperand(3);
2110

2111
  SDValue Base, Offset;
2112
  if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))
2113
    return;
2114
  // The correct Za tile number is computed in Machine Instruction
2115
  // See EmitZAInstr
2116
  // DAG cannot select Za tile as an output register with ZReg
2117
  SDLoc DL(N);
2118
  SmallVector<SDValue, 6> Ops;
2119
  if (BaseReg != AArch64::ZA )
2120
    Ops.push_back(N->getOperand(2));
2121
  Ops.push_back(Base);
2122
  Ops.push_back(Offset);
2123
  Ops.push_back(N->getOperand(0)); //Chain
2124
  SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);
2125

2126
  EVT VT = N->getValueType(0);
2127
  for (unsigned I = 0; I < NumVecs; ++I)
2128
    ReplaceUses(SDValue(N, I),
2129
                CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,
2130
                                               SDValue(Mov, 0)));
2131

2132
  // Copy chain
2133
  unsigned ChainIdx = NumVecs;
2134
  ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));
2135
  CurDAG->RemoveDeadNode(N);
2136
}
2137

2138
void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,
2139
                                                    unsigned NumOutVecs,
2140
                                                    bool IsTupleInput,
2141
                                                    unsigned Opc) {
2142
  SDLoc DL(N);
2143
  EVT VT = N->getValueType(0);
2144
  unsigned NumInVecs = N->getNumOperands() - 1;
2145

2146
  SmallVector<SDValue, 6> Ops;
2147
  if (IsTupleInput) {
2148
    assert((NumInVecs == 2 || NumInVecs == 4) &&
2149
           "Don't know how to handle multi-register input!");
2150
    SmallVector<SDValue, 4> Regs(N->op_begin() + 1,
2151
                                 N->op_begin() + 1 + NumInVecs);
2152
    Ops.push_back(createZMulTuple(Regs));
2153
  } else {
2154
    // All intrinsic nodes have the ID as the first operand, hence the "1 + I".
2155
    for (unsigned I = 0; I < NumInVecs; I++)
2156
      Ops.push_back(N->getOperand(1 + I));
2157
  }
2158

2159
  SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);
2160
  SDValue SuperReg = SDValue(Res, 0);
2161

2162
  for (unsigned I = 0; I < NumOutVecs; I++)
2163
    ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(
2164
                                   AArch64::zsub0 + I, DL, VT, SuperReg));
2165
  CurDAG->RemoveDeadNode(N);
2166
}
2167

2168
void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
2169
                                      unsigned Opc) {
2170
  SDLoc dl(N);
2171
  EVT VT = N->getOperand(2)->getValueType(0);
2172

2173
  // Form a REG_SEQUENCE to force register allocation.
2174
  bool Is128Bit = VT.getSizeInBits() == 128;
2175
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2176
  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2177

2178
  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
2179
  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2180

2181
  // Transfer memoperands.
2182
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2183
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2184

2185
  ReplaceNode(N, St);
2186
}
2187

2188
void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
2189
                                                unsigned Scale, unsigned Opc_rr,
2190
                                                unsigned Opc_ri) {
2191
  SDLoc dl(N);
2192

2193
  // Form a REG_SEQUENCE to force register allocation.
2194
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2195
  SDValue RegSeq = createZTuple(Regs);
2196

2197
  // Optimize addressing mode.
2198
  unsigned Opc;
2199
  SDValue Offset, Base;
2200
  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
2201
      N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
2202
      CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
2203

2204
  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
2205
                   Base,                               // address
2206
                   Offset,                             // offset
2207
                   N->getOperand(0)};                  // chain
2208
  SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
2209

2210
  ReplaceNode(N, St);
2211
}
2212

2213
bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
2214
                                                      SDValue &OffImm) {
2215
  SDLoc dl(N);
2216
  const DataLayout &DL = CurDAG->getDataLayout();
2217
  const TargetLowering *TLI = getTargetLowering();
2218

2219
  // Try to match it for the frame address
2220
  if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
2221
    int FI = FINode->getIndex();
2222
    Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
2223
    OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
2224
    return true;
2225
  }
2226

2227
  return false;
2228
}
2229

2230
void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
2231
                                          unsigned Opc) {
2232
  SDLoc dl(N);
2233
  EVT VT = N->getOperand(2)->getValueType(0);
2234
  const EVT ResTys[] = {MVT::i64,    // Type of the write back register
2235
                        MVT::Other}; // Type for the Chain
2236

2237
  // Form a REG_SEQUENCE to force register allocation.
2238
  bool Is128Bit = VT.getSizeInBits() == 128;
2239
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2240
  SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
2241

2242
  SDValue Ops[] = {RegSeq,
2243
                   N->getOperand(NumVecs + 1), // base register
2244
                   N->getOperand(NumVecs + 2), // Incremental
2245
                   N->getOperand(0)};          // Chain
2246
  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2247

2248
  ReplaceNode(N, St);
2249
}
2250

2251
namespace {
2252
/// WidenVector - Given a value in the V64 register class, produce the
2253
/// equivalent value in the V128 register class.
2254
class WidenVector {
2255
  SelectionDAG &DAG;
2256

2257
public:
2258
  WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
2259

2260
  SDValue operator()(SDValue V64Reg) {
2261
    EVT VT = V64Reg.getValueType();
2262
    unsigned NarrowSize = VT.getVectorNumElements();
2263
    MVT EltTy = VT.getVectorElementType().getSimpleVT();
2264
    MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
2265
    SDLoc DL(V64Reg);
2266

2267
    SDValue Undef =
2268
        SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
2269
    return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
2270
  }
2271
};
2272
} // namespace
2273

2274
/// NarrowVector - Given a value in the V128 register class, produce the
2275
/// equivalent value in the V64 register class.
2276
static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
2277
  EVT VT = V128Reg.getValueType();
2278
  unsigned WideSize = VT.getVectorNumElements();
2279
  MVT EltTy = VT.getVectorElementType().getSimpleVT();
2280
  MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
2281

2282
  return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
2283
                                    V128Reg);
2284
}
2285

2286
void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
2287
                                         unsigned Opc) {
2288
  SDLoc dl(N);
2289
  EVT VT = N->getValueType(0);
2290
  bool Narrow = VT.getSizeInBits() == 64;
2291

2292
  // Form a REG_SEQUENCE to force register allocation.
2293
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2294

2295
  if (Narrow)
2296
    transform(Regs, Regs.begin(),
2297
                   WidenVector(*CurDAG));
2298

2299
  SDValue RegSeq = createQTuple(Regs);
2300

2301
  const EVT ResTys[] = {MVT::Untyped, MVT::Other};
2302

2303
  unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2304

2305
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2306
                   N->getOperand(NumVecs + 3), N->getOperand(0)};
2307
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2308
  SDValue SuperReg = SDValue(Ld, 0);
2309

2310
  EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2311
  static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2312
                                    AArch64::qsub2, AArch64::qsub3 };
2313
  for (unsigned i = 0; i < NumVecs; ++i) {
2314
    SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
2315
    if (Narrow)
2316
      NV = NarrowVector(NV, *CurDAG);
2317
    ReplaceUses(SDValue(N, i), NV);
2318
  }
2319

2320
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
2321
  CurDAG->RemoveDeadNode(N);
2322
}
2323

2324
void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
2325
                                             unsigned Opc) {
2326
  SDLoc dl(N);
2327
  EVT VT = N->getValueType(0);
2328
  bool Narrow = VT.getSizeInBits() == 64;
2329

2330
  // Form a REG_SEQUENCE to force register allocation.
2331
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2332

2333
  if (Narrow)
2334
    transform(Regs, Regs.begin(),
2335
                   WidenVector(*CurDAG));
2336

2337
  SDValue RegSeq = createQTuple(Regs);
2338

2339
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
2340
                        RegSeq->getValueType(0), MVT::Other};
2341

2342
  unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2343

2344
  SDValue Ops[] = {RegSeq,
2345
                   CurDAG->getTargetConstant(LaneNo, dl,
2346
                                             MVT::i64),         // Lane Number
2347
                   N->getOperand(NumVecs + 2),                  // Base register
2348
                   N->getOperand(NumVecs + 3),                  // Incremental
2349
                   N->getOperand(0)};
2350
  SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2351

2352
  // Update uses of the write back register
2353
  ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
2354

2355
  // Update uses of the vector list
2356
  SDValue SuperReg = SDValue(Ld, 1);
2357
  if (NumVecs == 1) {
2358
    ReplaceUses(SDValue(N, 0),
2359
                Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
2360
  } else {
2361
    EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
2362
    static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
2363
                                      AArch64::qsub2, AArch64::qsub3 };
2364
    for (unsigned i = 0; i < NumVecs; ++i) {
2365
      SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
2366
                                                  SuperReg);
2367
      if (Narrow)
2368
        NV = NarrowVector(NV, *CurDAG);
2369
      ReplaceUses(SDValue(N, i), NV);
2370
    }
2371
  }
2372

2373
  // Update the Chain
2374
  ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
2375
  CurDAG->RemoveDeadNode(N);
2376
}
2377

2378
void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
2379
                                          unsigned Opc) {
2380
  SDLoc dl(N);
2381
  EVT VT = N->getOperand(2)->getValueType(0);
2382
  bool Narrow = VT.getSizeInBits() == 64;
2383

2384
  // Form a REG_SEQUENCE to force register allocation.
2385
  SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
2386

2387
  if (Narrow)
2388
    transform(Regs, Regs.begin(),
2389
                   WidenVector(*CurDAG));
2390

2391
  SDValue RegSeq = createQTuple(Regs);
2392

2393
  unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);
2394

2395
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2396
                   N->getOperand(NumVecs + 3), N->getOperand(0)};
2397
  SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
2398

2399
  // Transfer memoperands.
2400
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2401
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2402

2403
  ReplaceNode(N, St);
2404
}
2405

2406
void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
2407
                                              unsigned Opc) {
2408
  SDLoc dl(N);
2409
  EVT VT = N->getOperand(2)->getValueType(0);
2410
  bool Narrow = VT.getSizeInBits() == 64;
2411

2412
  // Form a REG_SEQUENCE to force register allocation.
2413
  SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
2414

2415
  if (Narrow)
2416
    transform(Regs, Regs.begin(),
2417
                   WidenVector(*CurDAG));
2418

2419
  SDValue RegSeq = createQTuple(Regs);
2420

2421
  const EVT ResTys[] = {MVT::i64, // Type of the write back register
2422
                        MVT::Other};
2423

2424
  unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);
2425

2426
  SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
2427
                   N->getOperand(NumVecs + 2), // Base Register
2428
                   N->getOperand(NumVecs + 3), // Incremental
2429
                   N->getOperand(0)};
2430
  SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2431

2432
  // Transfer memoperands.
2433
  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2434
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
2435

2436
  ReplaceNode(N, St);
2437
}
2438

2439
static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
2440
                                       unsigned &Opc, SDValue &Opd0,
2441
                                       unsigned &LSB, unsigned &MSB,
2442
                                       unsigned NumberOfIgnoredLowBits,
2443
                                       bool BiggerPattern) {
2444
  assert(N->getOpcode() == ISD::AND &&
2445
         "N must be a AND operation to call this function");
2446

2447
  EVT VT = N->getValueType(0);
2448

2449
  // Here we can test the type of VT and return false when the type does not
2450
  // match, but since it is done prior to that call in the current context
2451
  // we turned that into an assert to avoid redundant code.
2452
  assert((VT == MVT::i32 || VT == MVT::i64) &&
2453
         "Type checking must have been done before calling this function");
2454

2455
  // FIXME: simplify-demanded-bits in DAGCombine will probably have
2456
  // changed the AND node to a 32-bit mask operation. We'll have to
2457
  // undo that as part of the transform here if we want to catch all
2458
  // the opportunities.
2459
  // Currently the NumberOfIgnoredLowBits argument helps to recover
2460
  // from these situations when matching bigger pattern (bitfield insert).
2461

2462
  // For unsigned extracts, check for a shift right and mask
2463
  uint64_t AndImm = 0;
2464
  if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
2465
    return false;
2466

2467
  const SDNode *Op0 = N->getOperand(0).getNode();
2468

2469
  // Because of simplify-demanded-bits in DAGCombine, the mask may have been
2470
  // simplified. Try to undo that
2471
  AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
2472

2473
  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
2474
  if (AndImm & (AndImm + 1))
2475
    return false;
2476

2477
  bool ClampMSB = false;
2478
  uint64_t SrlImm = 0;
2479
  // Handle the SRL + ANY_EXTEND case.
2480
  if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
2481
      isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
2482
    // Extend the incoming operand of the SRL to 64-bit.
2483
    Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
2484
    // Make sure to clamp the MSB so that we preserve the semantics of the
2485
    // original operations.
2486
    ClampMSB = true;
2487
  } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
2488
             isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
2489
                                   SrlImm)) {
2490
    // If the shift result was truncated, we can still combine them.
2491
    Opd0 = Op0->getOperand(0).getOperand(0);
2492

2493
    // Use the type of SRL node.
2494
    VT = Opd0->getValueType(0);
2495
  } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
2496
    Opd0 = Op0->getOperand(0);
2497
    ClampMSB = (VT == MVT::i32);
2498
  } else if (BiggerPattern) {
2499
    // Let's pretend a 0 shift right has been performed.
2500
    // The resulting code will be at least as good as the original one
2501
    // plus it may expose more opportunities for bitfield insert pattern.
2502
    // FIXME: Currently we limit this to the bigger pattern, because
2503
    // some optimizations expect AND and not UBFM.
2504
    Opd0 = N->getOperand(0);
2505
  } else
2506
    return false;
2507

2508
  // Bail out on large immediates. This happens when no proper
2509
  // combining/constant folding was performed.
2510
  if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
2511
    LLVM_DEBUG(
2512
        (dbgs() << N
2513
                << ": Found large shift immediate, this should not happen\n"));
2514
    return false;
2515
  }
2516

2517
  LSB = SrlImm;
2518
  MSB = SrlImm +
2519
        (VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)
2520
                        : llvm::countr_one<uint64_t>(AndImm)) -
2521
        1;
2522
  if (ClampMSB)
2523
    // Since we're moving the extend before the right shift operation, we need
2524
    // to clamp the MSB to make sure we don't shift in undefined bits instead of
2525
    // the zeros which would get shifted in with the original right shift
2526
    // operation.
2527
    MSB = MSB > 31 ? 31 : MSB;
2528

2529
  Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2530
  return true;
2531
}
2532

2533
static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
2534
                                             SDValue &Opd0, unsigned &Immr,
2535
                                             unsigned &Imms) {
2536
  assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
2537

2538
  EVT VT = N->getValueType(0);
2539
  unsigned BitWidth = VT.getSizeInBits();
2540
  assert((VT == MVT::i32 || VT == MVT::i64) &&
2541
         "Type checking must have been done before calling this function");
2542

2543
  SDValue Op = N->getOperand(0);
2544
  if (Op->getOpcode() == ISD::TRUNCATE) {
2545
    Op = Op->getOperand(0);
2546
    VT = Op->getValueType(0);
2547
    BitWidth = VT.getSizeInBits();
2548
  }
2549

2550
  uint64_t ShiftImm;
2551
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
2552
      !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2553
    return false;
2554

2555
  unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
2556
  if (ShiftImm + Width > BitWidth)
2557
    return false;
2558

2559
  Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
2560
  Opd0 = Op.getOperand(0);
2561
  Immr = ShiftImm;
2562
  Imms = ShiftImm + Width - 1;
2563
  return true;
2564
}
2565

2566
static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
2567
                                          SDValue &Opd0, unsigned &LSB,
2568
                                          unsigned &MSB) {
2569
  // We are looking for the following pattern which basically extracts several
2570
  // continuous bits from the source value and places it from the LSB of the
2571
  // destination value, all other bits of the destination value or set to zero:
2572
  //
2573
  // Value2 = AND Value, MaskImm
2574
  // SRL Value2, ShiftImm
2575
  //
2576
  // with MaskImm >> ShiftImm to search for the bit width.
2577
  //
2578
  // This gets selected into a single UBFM:
2579
  //
2580
  // UBFM Value, ShiftImm, Log2_64(MaskImm)
2581
  //
2582

2583
  if (N->getOpcode() != ISD::SRL)
2584
    return false;
2585

2586
  uint64_t AndMask = 0;
2587
  if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
2588
    return false;
2589

2590
  Opd0 = N->getOperand(0).getOperand(0);
2591

2592
  uint64_t SrlImm = 0;
2593
  if (!isIntImmediate(N->getOperand(1), SrlImm))
2594
    return false;
2595

2596
  // Check whether we really have several bits extract here.
2597
  if (!isMask_64(AndMask >> SrlImm))
2598
    return false;
2599

2600
  Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
2601
  LSB = SrlImm;
2602
  MSB = llvm::Log2_64(AndMask);
2603
  return true;
2604
}
2605

2606
static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
2607
                                       unsigned &Immr, unsigned &Imms,
2608
                                       bool BiggerPattern) {
2609
  assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
2610
         "N must be a SHR/SRA operation to call this function");
2611

2612
  EVT VT = N->getValueType(0);
2613

2614
  // Here we can test the type of VT and return false when the type does not
2615
  // match, but since it is done prior to that call in the current context
2616
  // we turned that into an assert to avoid redundant code.
2617
  assert((VT == MVT::i32 || VT == MVT::i64) &&
2618
         "Type checking must have been done before calling this function");
2619

2620
  // Check for AND + SRL doing several bits extract.
2621
  if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
2622
    return true;
2623

2624
  // We're looking for a shift of a shift.
2625
  uint64_t ShlImm = 0;
2626
  uint64_t TruncBits = 0;
2627
  if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
2628
    Opd0 = N->getOperand(0).getOperand(0);
2629
  } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
2630
             N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
2631
    // We are looking for a shift of truncate. Truncate from i64 to i32 could
2632
    // be considered as setting high 32 bits as zero. Our strategy here is to
2633
    // always generate 64bit UBFM. This consistency will help the CSE pass
2634
    // later find more redundancy.
2635
    Opd0 = N->getOperand(0).getOperand(0);
2636
    TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
2637
    VT = Opd0.getValueType();
2638
    assert(VT == MVT::i64 && "the promoted type should be i64");
2639
  } else if (BiggerPattern) {
2640
    // Let's pretend a 0 shift left has been performed.
2641
    // FIXME: Currently we limit this to the bigger pattern case,
2642
    // because some optimizations expect AND and not UBFM
2643
    Opd0 = N->getOperand(0);
2644
  } else
2645
    return false;
2646

2647
  // Missing combines/constant folding may have left us with strange
2648
  // constants.
2649
  if (ShlImm >= VT.getSizeInBits()) {
2650
    LLVM_DEBUG(
2651
        (dbgs() << N
2652
                << ": Found large shift immediate, this should not happen\n"));
2653
    return false;
2654
  }
2655

2656
  uint64_t SrlImm = 0;
2657
  if (!isIntImmediate(N->getOperand(1), SrlImm))
2658
    return false;
2659

2660
  assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
2661
         "bad amount in shift node!");
2662
  int immr = SrlImm - ShlImm;
2663
  Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
2664
  Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
2665
  // SRA requires a signed extraction
2666
  if (VT == MVT::i32)
2667
    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
2668
  else
2669
    Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
2670
  return true;
2671
}
2672

2673
bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
2674
  assert(N->getOpcode() == ISD::SIGN_EXTEND);
2675

2676
  EVT VT = N->getValueType(0);
2677
  EVT NarrowVT = N->getOperand(0)->getValueType(0);
2678
  if (VT != MVT::i64 || NarrowVT != MVT::i32)
2679
    return false;
2680

2681
  uint64_t ShiftImm;
2682
  SDValue Op = N->getOperand(0);
2683
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
2684
    return false;
2685

2686
  SDLoc dl(N);
2687
  // Extend the incoming operand of the shift to 64-bits.
2688
  SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
2689
  unsigned Immr = ShiftImm;
2690
  unsigned Imms = NarrowVT.getSizeInBits() - 1;
2691
  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2692
                   CurDAG->getTargetConstant(Imms, dl, VT)};
2693
  CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
2694
  return true;
2695
}
2696

2697
static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
2698
                                SDValue &Opd0, unsigned &Immr, unsigned &Imms,
2699
                                unsigned NumberOfIgnoredLowBits = 0,
2700
                                bool BiggerPattern = false) {
2701
  if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
2702
    return false;
2703

2704
  switch (N->getOpcode()) {
2705
  default:
2706
    if (!N->isMachineOpcode())
2707
      return false;
2708
    break;
2709
  case ISD::AND:
2710
    return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
2711
                                      NumberOfIgnoredLowBits, BiggerPattern);
2712
  case ISD::SRL:
2713
  case ISD::SRA:
2714
    return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
2715

2716
  case ISD::SIGN_EXTEND_INREG:
2717
    return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
2718
  }
2719

2720
  unsigned NOpc = N->getMachineOpcode();
2721
  switch (NOpc) {
2722
  default:
2723
    return false;
2724
  case AArch64::SBFMWri:
2725
  case AArch64::UBFMWri:
2726
  case AArch64::SBFMXri:
2727
  case AArch64::UBFMXri:
2728
    Opc = NOpc;
2729
    Opd0 = N->getOperand(0);
2730
    Immr = N->getConstantOperandVal(1);
2731
    Imms = N->getConstantOperandVal(2);
2732
    return true;
2733
  }
2734
  // Unreachable
2735
  return false;
2736
}
2737

2738
bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
2739
  unsigned Opc, Immr, Imms;
2740
  SDValue Opd0;
2741
  if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
2742
    return false;
2743

2744
  EVT VT = N->getValueType(0);
2745
  SDLoc dl(N);
2746

2747
  // If the bit extract operation is 64bit but the original type is 32bit, we
2748
  // need to add one EXTRACT_SUBREG.
2749
  if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
2750
    SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
2751
                       CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
2752

2753
    SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
2754
    SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,
2755
                                                   MVT::i32, SDValue(BFM, 0));
2756
    ReplaceNode(N, Inner.getNode());
2757
    return true;
2758
  }
2759

2760
  SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
2761
                   CurDAG->getTargetConstant(Imms, dl, VT)};
2762
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
2763
  return true;
2764
}
2765

2766
/// Does DstMask form a complementary pair with the mask provided by
2767
/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
2768
/// this asks whether DstMask zeroes precisely those bits that will be set by
2769
/// the other half.
2770
static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
2771
                              unsigned NumberOfIgnoredHighBits, EVT VT) {
2772
  assert((VT == MVT::i32 || VT == MVT::i64) &&
2773
         "i32 or i64 mask type expected!");
2774
  unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
2775

2776
  APInt SignificantDstMask = APInt(BitWidth, DstMask);
2777
  APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
2778

2779
  return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
2780
         (SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();
2781
}
2782

2783
// Look for bits that will be useful for later uses.
2784
// A bit is consider useless as soon as it is dropped and never used
2785
// before it as been dropped.
2786
// E.g., looking for useful bit of x
2787
// 1. y = x & 0x7
2788
// 2. z = y >> 2
2789
// After #1, x useful bits are 0x7, then the useful bits of x, live through
2790
// y.
2791
// After #2, the useful bits of x are 0x4.
2792
// However, if x is used on an unpredicatable instruction, then all its bits
2793
// are useful.
2794
// E.g.
2795
// 1. y = x & 0x7
2796
// 2. z = y >> 2
2797
// 3. str x, [@x]
2798
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
2799

2800
static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
2801
                                              unsigned Depth) {
2802
  uint64_t Imm =
2803
      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2804
  Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
2805
  UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
2806
  getUsefulBits(Op, UsefulBits, Depth + 1);
2807
}
2808

2809
static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
2810
                                             uint64_t Imm, uint64_t MSB,
2811
                                             unsigned Depth) {
2812
  // inherit the bitwidth value
2813
  APInt OpUsefulBits(UsefulBits);
2814
  OpUsefulBits = 1;
2815

2816
  if (MSB >= Imm) {
2817
    OpUsefulBits <<= MSB - Imm + 1;
2818
    --OpUsefulBits;
2819
    // The interesting part will be in the lower part of the result
2820
    getUsefulBits(Op, OpUsefulBits, Depth + 1);
2821
    // The interesting part was starting at Imm in the argument
2822
    OpUsefulBits <<= Imm;
2823
  } else {
2824
    OpUsefulBits <<= MSB + 1;
2825
    --OpUsefulBits;
2826
    // The interesting part will be shifted in the result
2827
    OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
2828
    getUsefulBits(Op, OpUsefulBits, Depth + 1);
2829
    // The interesting part was at zero in the argument
2830
    OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
2831
  }
2832

2833
  UsefulBits &= OpUsefulBits;
2834
}
2835

2836
static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
2837
                                  unsigned Depth) {
2838
  uint64_t Imm =
2839
      cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
2840
  uint64_t MSB =
2841
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2842

2843
  getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
2844
}
2845

2846
static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
2847
                                              unsigned Depth) {
2848
  uint64_t ShiftTypeAndValue =
2849
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2850
  APInt Mask(UsefulBits);
2851
  Mask.clearAllBits();
2852
  Mask.flipAllBits();
2853

2854
  if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
2855
    // Shift Left
2856
    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2857
    Mask <<= ShiftAmt;
2858
    getUsefulBits(Op, Mask, Depth + 1);
2859
    Mask.lshrInPlace(ShiftAmt);
2860
  } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
2861
    // Shift Right
2862
    // We do not handle AArch64_AM::ASR, because the sign will change the
2863
    // number of useful bits
2864
    uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
2865
    Mask.lshrInPlace(ShiftAmt);
2866
    getUsefulBits(Op, Mask, Depth + 1);
2867
    Mask <<= ShiftAmt;
2868
  } else
2869
    return;
2870

2871
  UsefulBits &= Mask;
2872
}
2873

2874
static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
2875
                                 unsigned Depth) {
2876
  uint64_t Imm =
2877
      cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
2878
  uint64_t MSB =
2879
      cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
2880

2881
  APInt OpUsefulBits(UsefulBits);
2882
  OpUsefulBits = 1;
2883

2884
  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
2885
  ResultUsefulBits.flipAllBits();
2886
  APInt Mask(UsefulBits.getBitWidth(), 0);
2887

2888
  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
2889

2890
  if (MSB >= Imm) {
2891
    // The instruction is a BFXIL.
2892
    uint64_t Width = MSB - Imm + 1;
2893
    uint64_t LSB = Imm;
2894

2895
    OpUsefulBits <<= Width;
2896
    --OpUsefulBits;
2897

2898
    if (Op.getOperand(1) == Orig) {
2899
      // Copy the low bits from the result to bits starting from LSB.
2900
      Mask = ResultUsefulBits & OpUsefulBits;
2901
      Mask <<= LSB;
2902
    }
2903

2904
    if (Op.getOperand(0) == Orig)
2905
      // Bits starting from LSB in the input contribute to the result.
2906
      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2907
  } else {
2908
    // The instruction is a BFI.
2909
    uint64_t Width = MSB + 1;
2910
    uint64_t LSB = UsefulBits.getBitWidth() - Imm;
2911

2912
    OpUsefulBits <<= Width;
2913
    --OpUsefulBits;
2914
    OpUsefulBits <<= LSB;
2915

2916
    if (Op.getOperand(1) == Orig) {
2917
      // Copy the bits from the result to the zero bits.
2918
      Mask = ResultUsefulBits & OpUsefulBits;
2919
      Mask.lshrInPlace(LSB);
2920
    }
2921

2922
    if (Op.getOperand(0) == Orig)
2923
      Mask |= (ResultUsefulBits & ~OpUsefulBits);
2924
  }
2925

2926
  UsefulBits &= Mask;
2927
}
2928

2929
static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
2930
                                SDValue Orig, unsigned Depth) {
2931

2932
  // Users of this node should have already been instruction selected
2933
  // FIXME: Can we turn that into an assert?
2934
  if (!UserNode->isMachineOpcode())
2935
    return;
2936

2937
  switch (UserNode->getMachineOpcode()) {
2938
  default:
2939
    return;
2940
  case AArch64::ANDSWri:
2941
  case AArch64::ANDSXri:
2942
  case AArch64::ANDWri:
2943
  case AArch64::ANDXri:
2944
    // We increment Depth only when we call the getUsefulBits
2945
    return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
2946
                                             Depth);
2947
  case AArch64::UBFMWri:
2948
  case AArch64::UBFMXri:
2949
    return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
2950

2951
  case AArch64::ORRWrs:
2952
  case AArch64::ORRXrs:
2953
    if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)
2954
      getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
2955
                                        Depth);
2956
    return;
2957
  case AArch64::BFMWri:
2958
  case AArch64::BFMXri:
2959
    return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
2960

2961
  case AArch64::STRBBui:
2962
  case AArch64::STURBBi:
2963
    if (UserNode->getOperand(0) != Orig)
2964
      return;
2965
    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
2966
    return;
2967

2968
  case AArch64::STRHHui:
2969
  case AArch64::STURHHi:
2970
    if (UserNode->getOperand(0) != Orig)
2971
      return;
2972
    UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
2973
    return;
2974
  }
2975
}
2976

2977
static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
2978
  if (Depth >= SelectionDAG::MaxRecursionDepth)
2979
    return;
2980
  // Initialize UsefulBits
2981
  if (!Depth) {
2982
    unsigned Bitwidth = Op.getScalarValueSizeInBits();
2983
    // At the beginning, assume every produced bits is useful
2984
    UsefulBits = APInt(Bitwidth, 0);
2985
    UsefulBits.flipAllBits();
2986
  }
2987
  APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
2988

2989
  for (SDNode *Node : Op.getNode()->uses()) {
2990
    // A use cannot produce useful bits
2991
    APInt UsefulBitsForUse = APInt(UsefulBits);
2992
    getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
2993
    UsersUsefulBits |= UsefulBitsForUse;
2994
  }
2995
  // UsefulBits contains the produced bits that are meaningful for the
2996
  // current definition, thus a user cannot make a bit meaningful at
2997
  // this point
2998
  UsefulBits &= UsersUsefulBits;
2999
}
3000

3001
/// Create a machine node performing a notional SHL of Op by ShlAmount. If
3002
/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
3003
/// 0, return Op unchanged.
3004
static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
3005
  if (ShlAmount == 0)
3006
    return Op;
3007

3008
  EVT VT = Op.getValueType();
3009
  SDLoc dl(Op);
3010
  unsigned BitWidth = VT.getSizeInBits();
3011
  unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
3012

3013
  SDNode *ShiftNode;
3014
  if (ShlAmount > 0) {
3015
    // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
3016
    ShiftNode = CurDAG->getMachineNode(
3017
        UBFMOpc, dl, VT, Op,
3018
        CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
3019
        CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
3020
  } else {
3021
    // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
3022
    assert(ShlAmount < 0 && "expected right shift");
3023
    int ShrAmount = -ShlAmount;
3024
    ShiftNode = CurDAG->getMachineNode(
3025
        UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
3026
        CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
3027
  }
3028

3029
  return SDValue(ShiftNode, 0);
3030
}
3031

3032
// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".
3033
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3034
                                           bool BiggerPattern,
3035
                                           const uint64_t NonZeroBits,
3036
                                           SDValue &Src, int &DstLSB,
3037
                                           int &Width);
3038

3039
// For bit-field-positioning pattern "shl VAL, N)".
3040
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3041
                                           bool BiggerPattern,
3042
                                           const uint64_t NonZeroBits,
3043
                                           SDValue &Src, int &DstLSB,
3044
                                           int &Width);
3045

3046
/// Does this tree qualify as an attempt to move a bitfield into position,
3047
/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).
3048
static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
3049
                                    bool BiggerPattern, SDValue &Src,
3050
                                    int &DstLSB, int &Width) {
3051
  EVT VT = Op.getValueType();
3052
  unsigned BitWidth = VT.getSizeInBits();
3053
  (void)BitWidth;
3054
  assert(BitWidth == 32 || BitWidth == 64);
3055

3056
  KnownBits Known = CurDAG->computeKnownBits(Op);
3057

3058
  // Non-zero in the sense that they're not provably zero, which is the key
3059
  // point if we want to use this value
3060
  const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
3061
  if (!isShiftedMask_64(NonZeroBits))
3062
    return false;
3063

3064
  switch (Op.getOpcode()) {
3065
  default:
3066
    break;
3067
  case ISD::AND:
3068
    return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,
3069
                                          NonZeroBits, Src, DstLSB, Width);
3070
  case ISD::SHL:
3071
    return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,
3072
                                          NonZeroBits, Src, DstLSB, Width);
3073
  }
3074

3075
  return false;
3076
}
3077

3078
static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,
3079
                                           bool BiggerPattern,
3080
                                           const uint64_t NonZeroBits,
3081
                                           SDValue &Src, int &DstLSB,
3082
                                           int &Width) {
3083
  assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3084

3085
  EVT VT = Op.getValueType();
3086
  assert((VT == MVT::i32 || VT == MVT::i64) &&
3087
         "Caller guarantees VT is one of i32 or i64");
3088
  (void)VT;
3089

3090
  uint64_t AndImm;
3091
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))
3092
    return false;
3093

3094
  // If (~AndImm & NonZeroBits) is not zero at POS, we know that
3095
  //   1) (AndImm & (1 << POS) == 0)
3096
  //   2) the result of AND is not zero at POS bit (according to NonZeroBits)
3097
  //
3098
  // 1) and 2) don't agree so something must be wrong (e.g., in
3099
  // 'SelectionDAG::computeKnownBits')
3100
  assert((~AndImm & NonZeroBits) == 0 &&
3101
         "Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");
3102

3103
  SDValue AndOp0 = Op.getOperand(0);
3104

3105
  uint64_t ShlImm;
3106
  SDValue ShlOp0;
3107
  if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {
3108
    // For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.
3109
    ShlOp0 = AndOp0.getOperand(0);
3110
  } else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&
3111
             isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,
3112
                                   ShlImm)) {
3113
    // For pattern "and(any_extend(shl(val, N)), shifted-mask)"
3114

3115
    // ShlVal == shl(val, N), which is a left shift on a smaller type.
3116
    SDValue ShlVal = AndOp0.getOperand(0);
3117

3118
    // Since this is after type legalization and ShlVal is extended to MVT::i64,
3119
    // expect VT to be MVT::i32.
3120
    assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");
3121

3122
    // Widens 'val' to MVT::i64 as the source of bit field positioning.
3123
    ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));
3124
  } else
3125
    return false;
3126

3127
  // For !BiggerPattern, bail out if the AndOp0 has more than one use, since
3128
  // then we'll end up generating AndOp0+UBFIZ instead of just keeping
3129
  // AndOp0+AND.
3130
  if (!BiggerPattern && !AndOp0.hasOneUse())
3131
    return false;
3132

3133
  DstLSB = llvm::countr_zero(NonZeroBits);
3134
  Width = llvm::countr_one(NonZeroBits >> DstLSB);
3135

3136
  // Bail out on large Width. This happens when no proper combining / constant
3137
  // folding was performed.
3138
  if (Width >= (int)VT.getSizeInBits()) {
3139
    // If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and
3140
    // Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to
3141
    // "val".
3142
    // If VT is i32, what Width >= 32 means:
3143
    // - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op
3144
    //   demands at least 'Width' bits (after dag-combiner). This together with
3145
    //   `any_extend` Op (undefined higher bits) indicates missed combination
3146
    //   when lowering the 'and' IR instruction to an machine IR instruction.
3147
    LLVM_DEBUG(
3148
        dbgs()
3149
        << "Found large Width in bit-field-positioning -- this indicates no "
3150
           "proper combining / constant folding was performed\n");
3151
    return false;
3152
  }
3153

3154
  // BFI encompasses sufficiently many nodes that it's worth inserting an extra
3155
  // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
3156
  // amount.  BiggerPattern is true when this pattern is being matched for BFI,
3157
  // BiggerPattern is false when this pattern is being matched for UBFIZ, in
3158
  // which case it is not profitable to insert an extra shift.
3159
  if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3160
    return false;
3161

3162
  Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);
3163
  return true;
3164
}
3165

3166
// For node (shl (and val, mask), N)), returns true if the node is equivalent to
3167
// UBFIZ.
3168
static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,
3169
                                              SDValue &Src, int &DstLSB,
3170
                                              int &Width) {
3171
  // Caller should have verified that N is a left shift with constant shift
3172
  // amount; asserts that.
3173
  assert(Op.getOpcode() == ISD::SHL &&
3174
         "Op.getNode() should be a SHL node to call this function");
3175
  assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&
3176
         "Op.getNode() should shift ShlImm to call this function");
3177

3178
  uint64_t AndImm = 0;
3179
  SDValue Op0 = Op.getOperand(0);
3180
  if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))
3181
    return false;
3182

3183
  const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);
3184
  if (isMask_64(ShiftedAndImm)) {
3185
    // AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm
3186
    // should end with Mask, and could be prefixed with random bits if those
3187
    // bits are shifted out.
3188
    //
3189
    // For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;
3190
    // the AND result corresponding to those bits are shifted out, so it's fine
3191
    // to not extract them.
3192
    Width = llvm::countr_one(ShiftedAndImm);
3193
    DstLSB = ShlImm;
3194
    Src = Op0.getOperand(0);
3195
    return true;
3196
  }
3197
  return false;
3198
}
3199

3200
static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,
3201
                                           bool BiggerPattern,
3202
                                           const uint64_t NonZeroBits,
3203
                                           SDValue &Src, int &DstLSB,
3204
                                           int &Width) {
3205
  assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");
3206

3207
  EVT VT = Op.getValueType();
3208
  assert((VT == MVT::i32 || VT == MVT::i64) &&
3209
         "Caller guarantees that type is i32 or i64");
3210
  (void)VT;
3211

3212
  uint64_t ShlImm;
3213
  if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
3214
    return false;
3215

3216
  if (!BiggerPattern && !Op.hasOneUse())
3217
    return false;
3218

3219
  if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))
3220
    return true;
3221

3222
  DstLSB = llvm::countr_zero(NonZeroBits);
3223
  Width = llvm::countr_one(NonZeroBits >> DstLSB);
3224

3225
  if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)
3226
    return false;
3227

3228
  Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);
3229
  return true;
3230
}
3231

3232
static bool isShiftedMask(uint64_t Mask, EVT VT) {
3233
  assert(VT == MVT::i32 || VT == MVT::i64);
3234
  if (VT == MVT::i32)
3235
    return isShiftedMask_32(Mask);
3236
  return isShiftedMask_64(Mask);
3237
}
3238

3239
// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
3240
// inserted only sets known zero bits.
3241
static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
3242
  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3243

3244
  EVT VT = N->getValueType(0);
3245
  if (VT != MVT::i32 && VT != MVT::i64)
3246
    return false;
3247

3248
  unsigned BitWidth = VT.getSizeInBits();
3249

3250
  uint64_t OrImm;
3251
  if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
3252
    return false;
3253

3254
  // Skip this transformation if the ORR immediate can be encoded in the ORR.
3255
  // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
3256
  // performance neutral.
3257
  if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
3258
    return false;
3259

3260
  uint64_t MaskImm;
3261
  SDValue And = N->getOperand(0);
3262
  // Must be a single use AND with an immediate operand.
3263
  if (!And.hasOneUse() ||
3264
      !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
3265
    return false;
3266

3267
  // Compute the Known Zero for the AND as this allows us to catch more general
3268
  // cases than just looking for AND with imm.
3269
  KnownBits Known = CurDAG->computeKnownBits(And);
3270

3271
  // Non-zero in the sense that they're not provably zero, which is the key
3272
  // point if we want to use this value.
3273
  uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
3274

3275
  // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
3276
  if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
3277
    return false;
3278

3279
  // The bits being inserted must only set those bits that are known to be zero.
3280
  if ((OrImm & NotKnownZero) != 0) {
3281
    // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
3282
    // currently handle this case.
3283
    return false;
3284
  }
3285

3286
  // BFI/BFXIL dst, src, #lsb, #width.
3287
  int LSB = llvm::countr_one(NotKnownZero);
3288
  int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();
3289

3290
  // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
3291
  unsigned ImmR = (BitWidth - LSB) % BitWidth;
3292
  unsigned ImmS = Width - 1;
3293

3294
  // If we're creating a BFI instruction avoid cases where we need more
3295
  // instructions to materialize the BFI constant as compared to the original
3296
  // ORR.  A BFXIL will use the same constant as the original ORR, so the code
3297
  // should be no worse in this case.
3298
  bool IsBFI = LSB != 0;
3299
  uint64_t BFIImm = OrImm >> LSB;
3300
  if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
3301
    // We have a BFI instruction and we know the constant can't be materialized
3302
    // with a ORR-immediate with the zero register.
3303
    unsigned OrChunks = 0, BFIChunks = 0;
3304
    for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
3305
      if (((OrImm >> Shift) & 0xFFFF) != 0)
3306
        ++OrChunks;
3307
      if (((BFIImm >> Shift) & 0xFFFF) != 0)
3308
        ++BFIChunks;
3309
    }
3310
    if (BFIChunks > OrChunks)
3311
      return false;
3312
  }
3313

3314
  // Materialize the constant to be inserted.
3315
  SDLoc DL(N);
3316
  unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
3317
  SDNode *MOVI = CurDAG->getMachineNode(
3318
      MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
3319

3320
  // Create the BFI/BFXIL instruction.
3321
  SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
3322
                   CurDAG->getTargetConstant(ImmR, DL, VT),
3323
                   CurDAG->getTargetConstant(ImmS, DL, VT)};
3324
  unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3325
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3326
  return true;
3327
}
3328

3329
static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,
3330
                                           SDValue &ShiftedOperand,
3331
                                           uint64_t &EncodedShiftImm) {
3332
  // Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.
3333
  if (!Dst.hasOneUse())
3334
    return false;
3335

3336
  EVT VT = Dst.getValueType();
3337
  assert((VT == MVT::i32 || VT == MVT::i64) &&
3338
         "Caller should guarantee that VT is one of i32 or i64");
3339
  const unsigned SizeInBits = VT.getSizeInBits();
3340

3341
  SDLoc DL(Dst.getNode());
3342
  uint64_t AndImm, ShlImm;
3343
  if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&
3344
      isShiftedMask_64(AndImm)) {
3345
    // Avoid transforming 'DstOp0' if it has other uses than the AND node.
3346
    SDValue DstOp0 = Dst.getOperand(0);
3347
    if (!DstOp0.hasOneUse())
3348
      return false;
3349

3350
    // An example to illustrate the transformation
3351
    // From:
3352
    //    lsr     x8, x1, #1
3353
    //    and     x8, x8, #0x3f80
3354
    //    bfxil   x8, x1, #0, #7
3355
    // To:
3356
    //    and    x8, x23, #0x7f
3357
    //    ubfx   x9, x23, #8, #7
3358
    //    orr    x23, x8, x9, lsl #7
3359
    //
3360
    // The number of instructions remains the same, but ORR is faster than BFXIL
3361
    // on many AArch64 processors (or as good as BFXIL if not faster). Besides,
3362
    // the dependency chain is improved after the transformation.
3363
    uint64_t SrlImm;
3364
    if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {
3365
      uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);
3366
      if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {
3367
        unsigned MaskWidth =
3368
            llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);
3369
        unsigned UBFMOpc =
3370
            (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3371
        SDNode *UBFMNode = CurDAG->getMachineNode(
3372
            UBFMOpc, DL, VT, DstOp0.getOperand(0),
3373
            CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,
3374
                                      VT),
3375
            CurDAG->getTargetConstant(
3376
                SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));
3377
        ShiftedOperand = SDValue(UBFMNode, 0);
3378
        EncodedShiftImm = AArch64_AM::getShifterImm(
3379
            AArch64_AM::LSL, NumTrailingZeroInShiftedMask);
3380
        return true;
3381
      }
3382
    }
3383
    return false;
3384
  }
3385

3386
  if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {
3387
    ShiftedOperand = Dst.getOperand(0);
3388
    EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);
3389
    return true;
3390
  }
3391

3392
  uint64_t SrlImm;
3393
  if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {
3394
    ShiftedOperand = Dst.getOperand(0);
3395
    EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);
3396
    return true;
3397
  }
3398
  return false;
3399
}
3400

3401
// Given an 'ISD::OR' node that is going to be selected as BFM, analyze
3402
// the operands and select it to AArch64::ORR with shifted registers if
3403
// that's more efficient. Returns true iff selection to AArch64::ORR happens.
3404
static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,
3405
                            SDValue Src, SDValue Dst, SelectionDAG *CurDAG,
3406
                            const bool BiggerPattern) {
3407
  EVT VT = N->getValueType(0);
3408
  assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");
3409
  assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||
3410
          (N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&
3411
         "Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");
3412
  assert((VT == MVT::i32 || VT == MVT::i64) &&
3413
         "Expect result type to be i32 or i64 since N is combinable to BFM");
3414
  SDLoc DL(N);
3415

3416
  // Bail out if BFM simplifies away one node in BFM Dst.
3417
  if (OrOpd1 != Dst)
3418
    return false;
3419

3420
  const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;
3421
  // For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer
3422
  // nodes from Rn (or inserts additional shift node) if BiggerPattern is true.
3423
  if (BiggerPattern) {
3424
    uint64_t SrcAndImm;
3425
    if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&
3426
        isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {
3427
      // OrOpd0 = AND Src, #Mask
3428
      // So BFM simplifies away one AND node from Src and doesn't simplify away
3429
      // nodes from Dst. If ORR with left-shifted operand also simplifies away
3430
      // one node (from Rd), ORR is better since it has higher throughput and
3431
      // smaller latency than BFM on many AArch64 processors (and for the rest
3432
      // ORR is at least as good as BFM).
3433
      SDValue ShiftedOperand;
3434
      uint64_t EncodedShiftImm;
3435
      if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,
3436
                                         EncodedShiftImm)) {
3437
        SDValue Ops[] = {OrOpd0, ShiftedOperand,
3438
                         CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};
3439
        CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3440
        return true;
3441
      }
3442
    }
3443
    return false;
3444
  }
3445

3446
  assert((!BiggerPattern) && "BiggerPattern should be handled above");
3447

3448
  uint64_t ShlImm;
3449
  if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {
3450
    if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {
3451
      SDValue Ops[] = {
3452
          Dst, Src,
3453
          CurDAG->getTargetConstant(
3454
              AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3455
      CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3456
      return true;
3457
    }
3458

3459
    // Select the following pattern to left-shifted operand rather than BFI.
3460
    // %val1 = op ..
3461
    // %val2 = shl %val1, #imm
3462
    // %res = or %val1, %val2
3463
    //
3464
    // If N is selected to be BFI, we know that
3465
    // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3466
    // BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)
3467
    //
3468
    // Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.
3469
    if (OrOpd0.getOperand(0) == OrOpd1) {
3470
      SDValue Ops[] = {
3471
          OrOpd1, OrOpd1,
3472
          CurDAG->getTargetConstant(
3473
              AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};
3474
      CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3475
      return true;
3476
    }
3477
  }
3478

3479
  uint64_t SrlImm;
3480
  if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {
3481
    // Select the following pattern to right-shifted operand rather than BFXIL.
3482
    // %val1 = op ..
3483
    // %val2 = lshr %val1, #imm
3484
    // %res = or %val1, %val2
3485
    //
3486
    // If N is selected to be BFXIL, we know that
3487
    // 1) OrOpd0 would be the operand from which extract bits (i.e., folded into
3488
    // BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)
3489
    //
3490
    // Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.
3491
    if (OrOpd0.getOperand(0) == OrOpd1) {
3492
      SDValue Ops[] = {
3493
          OrOpd1, OrOpd1,
3494
          CurDAG->getTargetConstant(
3495
              AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};
3496
      CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);
3497
      return true;
3498
    }
3499
  }
3500

3501
  return false;
3502
}
3503

3504
static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
3505
                                      SelectionDAG *CurDAG) {
3506
  assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
3507

3508
  EVT VT = N->getValueType(0);
3509
  if (VT != MVT::i32 && VT != MVT::i64)
3510
    return false;
3511

3512
  unsigned BitWidth = VT.getSizeInBits();
3513

3514
  // Because of simplify-demanded-bits in DAGCombine, involved masks may not
3515
  // have the expected shape. Try to undo that.
3516

3517
  unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();
3518
  unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();
3519

3520
  // Given a OR operation, check if we have the following pattern
3521
  // ubfm c, b, imm, imm2 (or something that does the same jobs, see
3522
  //                       isBitfieldExtractOp)
3523
  // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
3524
  //                 countTrailingZeros(mask2) == imm2 - imm + 1
3525
  // f = d | c
3526
  // if yes, replace the OR instruction with:
3527
  // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
3528

3529
  // OR is commutative, check all combinations of operand order and values of
3530
  // BiggerPattern, i.e.
3531
  //     Opd0, Opd1, BiggerPattern=false
3532
  //     Opd1, Opd0, BiggerPattern=false
3533
  //     Opd0, Opd1, BiggerPattern=true
3534
  //     Opd1, Opd0, BiggerPattern=true
3535
  // Several of these combinations may match, so check with BiggerPattern=false
3536
  // first since that will produce better results by matching more instructions
3537
  // and/or inserting fewer extra instructions.
3538
  for (int I = 0; I < 4; ++I) {
3539

3540
    SDValue Dst, Src;
3541
    unsigned ImmR, ImmS;
3542
    bool BiggerPattern = I / 2;
3543
    SDValue OrOpd0Val = N->getOperand(I % 2);
3544
    SDNode *OrOpd0 = OrOpd0Val.getNode();
3545
    SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
3546
    SDNode *OrOpd1 = OrOpd1Val.getNode();
3547

3548
    unsigned BFXOpc;
3549
    int DstLSB, Width;
3550
    if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
3551
                            NumberOfIgnoredLowBits, BiggerPattern)) {
3552
      // Check that the returned opcode is compatible with the pattern,
3553
      // i.e., same type and zero extended (U and not S)
3554
      if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
3555
          (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
3556
        continue;
3557

3558
      // Compute the width of the bitfield insertion
3559
      DstLSB = 0;
3560
      Width = ImmS - ImmR + 1;
3561
      // FIXME: This constraint is to catch bitfield insertion we may
3562
      // want to widen the pattern if we want to grab general bitfied
3563
      // move case
3564
      if (Width <= 0)
3565
        continue;
3566

3567
      // If the mask on the insertee is correct, we have a BFXIL operation. We
3568
      // can share the ImmR and ImmS values from the already-computed UBFM.
3569
    } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
3570
                                       BiggerPattern,
3571
                                       Src, DstLSB, Width)) {
3572
      ImmR = (BitWidth - DstLSB) % BitWidth;
3573
      ImmS = Width - 1;
3574
    } else
3575
      continue;
3576

3577
    // Check the second part of the pattern
3578
    EVT VT = OrOpd1Val.getValueType();
3579
    assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
3580

3581
    // Compute the Known Zero for the candidate of the first operand.
3582
    // This allows to catch more general case than just looking for
3583
    // AND with imm. Indeed, simplify-demanded-bits may have removed
3584
    // the AND instruction because it proves it was useless.
3585
    KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
3586

3587
    // Check if there is enough room for the second operand to appear
3588
    // in the first one
3589
    APInt BitsToBeInserted =
3590
        APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
3591

3592
    if ((BitsToBeInserted & ~Known.Zero) != 0)
3593
      continue;
3594

3595
    // Set the first operand
3596
    uint64_t Imm;
3597
    if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
3598
        isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
3599
      // In that case, we can eliminate the AND
3600
      Dst = OrOpd1->getOperand(0);
3601
    else
3602
      // Maybe the AND has been removed by simplify-demanded-bits
3603
      // or is useful because it discards more bits
3604
      Dst = OrOpd1Val;
3605

3606
    // Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR
3607
    // with shifted operand is more efficient.
3608
    if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,
3609
                        BiggerPattern))
3610
      return true;
3611

3612
    // both parts match
3613
    SDLoc DL(N);
3614
    SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
3615
                     CurDAG->getTargetConstant(ImmS, DL, VT)};
3616
    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3617
    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3618
    return true;
3619
  }
3620

3621
  // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
3622
  // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
3623
  // mask (e.g., 0x000ffff0).
3624
  uint64_t Mask0Imm, Mask1Imm;
3625
  SDValue And0 = N->getOperand(0);
3626
  SDValue And1 = N->getOperand(1);
3627
  if (And0.hasOneUse() && And1.hasOneUse() &&
3628
      isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
3629
      isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
3630
      APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
3631
      (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
3632

3633
    // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
3634
    // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
3635
    // bits to be inserted.
3636
    if (isShiftedMask(Mask0Imm, VT)) {
3637
      std::swap(And0, And1);
3638
      std::swap(Mask0Imm, Mask1Imm);
3639
    }
3640

3641
    SDValue Src = And1->getOperand(0);
3642
    SDValue Dst = And0->getOperand(0);
3643
    unsigned LSB = llvm::countr_zero(Mask1Imm);
3644
    int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();
3645

3646
    // The BFXIL inserts the low-order bits from a source register, so right
3647
    // shift the needed bits into place.
3648
    SDLoc DL(N);
3649
    unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3650
    uint64_t LsrImm = LSB;
3651
    if (Src->hasOneUse() &&
3652
        isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&
3653
        (LsrImm + LSB) < BitWidth) {
3654
      Src = Src->getOperand(0);
3655
      LsrImm += LSB;
3656
    }
3657

3658
    SDNode *LSR = CurDAG->getMachineNode(
3659
        ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),
3660
        CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
3661

3662
    // BFXIL is an alias of BFM, so translate to BFM operands.
3663
    unsigned ImmR = (BitWidth - LSB) % BitWidth;
3664
    unsigned ImmS = Width - 1;
3665

3666
    // Create the BFXIL instruction.
3667
    SDValue Ops[] = {Dst, SDValue(LSR, 0),
3668
                     CurDAG->getTargetConstant(ImmR, DL, VT),
3669
                     CurDAG->getTargetConstant(ImmS, DL, VT)};
3670
    unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
3671
    CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3672
    return true;
3673
  }
3674

3675
  return false;
3676
}
3677

3678
bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
3679
  if (N->getOpcode() != ISD::OR)
3680
    return false;
3681

3682
  APInt NUsefulBits;
3683
  getUsefulBits(SDValue(N, 0), NUsefulBits);
3684

3685
  // If all bits are not useful, just return UNDEF.
3686
  if (!NUsefulBits) {
3687
    CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
3688
    return true;
3689
  }
3690

3691
  if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
3692
    return true;
3693

3694
  return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
3695
}
3696

3697
/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
3698
/// equivalent of a left shift by a constant amount followed by an and masking
3699
/// out a contiguous set of bits.
3700
bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
3701
  if (N->getOpcode() != ISD::AND)
3702
    return false;
3703

3704
  EVT VT = N->getValueType(0);
3705
  if (VT != MVT::i32 && VT != MVT::i64)
3706
    return false;
3707

3708
  SDValue Op0;
3709
  int DstLSB, Width;
3710
  if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
3711
                               Op0, DstLSB, Width))
3712
    return false;
3713

3714
  // ImmR is the rotate right amount.
3715
  unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
3716
  // ImmS is the most significant bit of the source to be moved.
3717
  unsigned ImmS = Width - 1;
3718

3719
  SDLoc DL(N);
3720
  SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
3721
                   CurDAG->getTargetConstant(ImmS, DL, VT)};
3722
  unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
3723
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3724
  return true;
3725
}
3726

3727
/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
3728
/// variable shift/rotate instructions.
3729
bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
3730
  EVT VT = N->getValueType(0);
3731

3732
  unsigned Opc;
3733
  switch (N->getOpcode()) {
3734
  case ISD::ROTR:
3735
    Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
3736
    break;
3737
  case ISD::SHL:
3738
    Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
3739
    break;
3740
  case ISD::SRL:
3741
    Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
3742
    break;
3743
  case ISD::SRA:
3744
    Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
3745
    break;
3746
  default:
3747
    return false;
3748
  }
3749

3750
  uint64_t Size;
3751
  uint64_t Bits;
3752
  if (VT == MVT::i32) {
3753
    Bits = 5;
3754
    Size = 32;
3755
  } else if (VT == MVT::i64) {
3756
    Bits = 6;
3757
    Size = 64;
3758
  } else
3759
    return false;
3760

3761
  SDValue ShiftAmt = N->getOperand(1);
3762
  SDLoc DL(N);
3763
  SDValue NewShiftAmt;
3764

3765
  // Skip over an extend of the shift amount.
3766
  if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
3767
      ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
3768
    ShiftAmt = ShiftAmt->getOperand(0);
3769

3770
  if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
3771
    SDValue Add0 = ShiftAmt->getOperand(0);
3772
    SDValue Add1 = ShiftAmt->getOperand(1);
3773
    uint64_t Add0Imm;
3774
    uint64_t Add1Imm;
3775
    if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {
3776
      // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
3777
      // to avoid the ADD/SUB.
3778
      NewShiftAmt = Add0;
3779
    } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3780
               isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
3781
               (Add0Imm % Size == 0)) {
3782
      // If we are shifting by N-X where N == 0 mod Size, then just shift by -X
3783
      // to generate a NEG instead of a SUB from a constant.
3784
      unsigned NegOpc;
3785
      unsigned ZeroReg;
3786
      EVT SubVT = ShiftAmt->getValueType(0);
3787
      if (SubVT == MVT::i32) {
3788
        NegOpc = AArch64::SUBWrr;
3789
        ZeroReg = AArch64::WZR;
3790
      } else {
3791
        assert(SubVT == MVT::i64);
3792
        NegOpc = AArch64::SUBXrr;
3793
        ZeroReg = AArch64::XZR;
3794
      }
3795
      SDValue Zero =
3796
          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3797
      MachineSDNode *Neg =
3798
          CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
3799
      NewShiftAmt = SDValue(Neg, 0);
3800
    } else if (ShiftAmt->getOpcode() == ISD::SUB &&
3801
               isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {
3802
      // If we are shifting by N-X where N == -1 mod Size, then just shift by ~X
3803
      // to generate a NOT instead of a SUB from a constant.
3804
      unsigned NotOpc;
3805
      unsigned ZeroReg;
3806
      EVT SubVT = ShiftAmt->getValueType(0);
3807
      if (SubVT == MVT::i32) {
3808
        NotOpc = AArch64::ORNWrr;
3809
        ZeroReg = AArch64::WZR;
3810
      } else {
3811
        assert(SubVT == MVT::i64);
3812
        NotOpc = AArch64::ORNXrr;
3813
        ZeroReg = AArch64::XZR;
3814
      }
3815
      SDValue Zero =
3816
          CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
3817
      MachineSDNode *Not =
3818
          CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);
3819
      NewShiftAmt = SDValue(Not, 0);
3820
    } else
3821
      return false;
3822
  } else {
3823
    // If the shift amount is masked with an AND, check that the mask covers the
3824
    // bits that are implicitly ANDed off by the above opcodes and if so, skip
3825
    // the AND.
3826
    uint64_t MaskImm;
3827
    if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
3828
        !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
3829
      return false;
3830

3831
    if ((unsigned)llvm::countr_one(MaskImm) < Bits)
3832
      return false;
3833

3834
    NewShiftAmt = ShiftAmt->getOperand(0);
3835
  }
3836

3837
  // Narrow/widen the shift amount to match the size of the shift operation.
3838
  if (VT == MVT::i32)
3839
    NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
3840
  else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
3841
    SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
3842
    MachineSDNode *Ext = CurDAG->getMachineNode(
3843
        AArch64::SUBREG_TO_REG, DL, VT,
3844
        CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
3845
    NewShiftAmt = SDValue(Ext, 0);
3846
  }
3847

3848
  SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
3849
  CurDAG->SelectNodeTo(N, Opc, VT, Ops);
3850
  return true;
3851
}
3852

3853
static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,
3854
                                               SDValue &FixedPos,
3855
                                               unsigned RegWidth,
3856
                                               bool isReciprocal) {
3857
  APFloat FVal(0.0);
3858
  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
3859
    FVal = CN->getValueAPF();
3860
  else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
3861
    // Some otherwise illegal constants are allowed in this case.
3862
    if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
3863
        !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
3864
      return false;
3865

3866
    ConstantPoolSDNode *CN =
3867
        dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
3868
    FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
3869
  } else
3870
    return false;
3871

3872
  // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
3873
  // is between 1 and 32 for a destination w-register, or 1 and 64 for an
3874
  // x-register.
3875
  //
3876
  // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
3877
  // want THIS_NODE to be 2^fbits. This is much easier to deal with using
3878
  // integers.
3879
  bool IsExact;
3880

3881
  if (isReciprocal)
3882
    if (!FVal.getExactInverse(&FVal))
3883
      return false;
3884

3885
  // fbits is between 1 and 64 in the worst-case, which means the fmul
3886
  // could have 2^64 as an actual operand. Need 65 bits of precision.
3887
  APSInt IntVal(65, true);
3888
  FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
3889

3890
  // N.b. isPowerOf2 also checks for > 0.
3891
  if (!IsExact || !IntVal.isPowerOf2())
3892
    return false;
3893
  unsigned FBits = IntVal.logBase2();
3894

3895
  // Checks above should have guaranteed that we haven't lost information in
3896
  // finding FBits, but it must still be in range.
3897
  if (FBits == 0 || FBits > RegWidth) return false;
3898

3899
  FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
3900
  return true;
3901
}
3902

3903
bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
3904
                                                   unsigned RegWidth) {
3905
  return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3906
                                            false);
3907
}
3908

3909
bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,
3910
                                                        SDValue &FixedPos,
3911
                                                        unsigned RegWidth) {
3912
  return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,
3913
                                            true);
3914
}
3915

3916
// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
3917
// of the string and obtains the integer values from them and combines these
3918
// into a single value to be used in the MRS/MSR instruction.
3919
static int getIntOperandFromRegisterString(StringRef RegString) {
3920
  SmallVector<StringRef, 5> Fields;
3921
  RegString.split(Fields, ':');
3922

3923
  if (Fields.size() == 1)
3924
    return -1;
3925

3926
  assert(Fields.size() == 5
3927
            && "Invalid number of fields in read register string");
3928

3929
  SmallVector<int, 5> Ops;
3930
  bool AllIntFields = true;
3931

3932
  for (StringRef Field : Fields) {
3933
    unsigned IntField;
3934
    AllIntFields &= !Field.getAsInteger(10, IntField);
3935
    Ops.push_back(IntField);
3936
  }
3937

3938
  assert(AllIntFields &&
3939
          "Unexpected non-integer value in special register string.");
3940
  (void)AllIntFields;
3941

3942
  // Need to combine the integer fields of the string into a single value
3943
  // based on the bit encoding of MRS/MSR instruction.
3944
  return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
3945
         (Ops[3] << 3) | (Ops[4]);
3946
}
3947

3948
// Lower the read_register intrinsic to an MRS instruction node if the special
3949
// register string argument is either of the form detailed in the ALCE (the
3950
// form described in getIntOperandsFromRegsterString) or is a named register
3951
// known by the MRS SysReg mapper.
3952
bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
3953
  const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
3954
  const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
3955
  SDLoc DL(N);
3956

3957
  bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;
3958

3959
  unsigned Opcode64Bit = AArch64::MRS;
3960
  int Imm = getIntOperandFromRegisterString(RegString->getString());
3961
  if (Imm == -1) {
3962
    // No match, Use the sysreg mapper to map the remaining possible strings to
3963
    // the value for the register to be used for the instruction operand.
3964
    const auto *TheReg =
3965
        AArch64SysReg::lookupSysRegByName(RegString->getString());
3966
    if (TheReg && TheReg->Readable &&
3967
        TheReg->haveFeatures(Subtarget->getFeatureBits()))
3968
      Imm = TheReg->Encoding;
3969
    else
3970
      Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
3971

3972
    if (Imm == -1) {
3973
      // Still no match, see if this is "pc" or give up.
3974
      if (!ReadIs128Bit && RegString->getString() == "pc") {
3975
        Opcode64Bit = AArch64::ADR;
3976
        Imm = 0;
3977
      } else {
3978
        return false;
3979
      }
3980
    }
3981
  }
3982

3983
  SDValue InChain = N->getOperand(0);
3984
  SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);
3985
  if (!ReadIs128Bit) {
3986
    CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,
3987
                         {SysRegImm, InChain});
3988
  } else {
3989
    SDNode *MRRS = CurDAG->getMachineNode(
3990
        AArch64::MRRS, DL,
3991
        {MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},
3992
        {SysRegImm, InChain});
3993

3994
    // Sysregs are not endian. The even register always contains the low half
3995
    // of the register.
3996
    SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,
3997
                                                SDValue(MRRS, 0));
3998
    SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,
3999
                                                SDValue(MRRS, 0));
4000
    SDValue OutChain = SDValue(MRRS, 1);
4001

4002
    ReplaceUses(SDValue(N, 0), Lo);
4003
    ReplaceUses(SDValue(N, 1), Hi);
4004
    ReplaceUses(SDValue(N, 2), OutChain);
4005
  };
4006
  return true;
4007
}
4008

4009
// Lower the write_register intrinsic to an MSR instruction node if the special
4010
// register string argument is either of the form detailed in the ALCE (the
4011
// form described in getIntOperandsFromRegsterString) or is a named register
4012
// known by the MSR SysReg mapper.
4013
bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
4014
  const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
4015
  const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
4016
  SDLoc DL(N);
4017

4018
  bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;
4019

4020
  if (!WriteIs128Bit) {
4021
    // Check if the register was one of those allowed as the pstatefield value
4022
    // in the MSR (immediate) instruction. To accept the values allowed in the
4023
    // pstatefield for the MSR (immediate) instruction, we also require that an
4024
    // immediate value has been provided as an argument, we know that this is
4025
    // the case as it has been ensured by semantic checking.
4026
    auto trySelectPState = [&](auto PMapper, unsigned State) {
4027
      if (PMapper) {
4028
        assert(isa<ConstantSDNode>(N->getOperand(2)) &&
4029
               "Expected a constant integer expression.");
4030
        unsigned Reg = PMapper->Encoding;
4031
        uint64_t Immed = N->getConstantOperandVal(2);
4032
        CurDAG->SelectNodeTo(
4033
            N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),
4034
            CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));
4035
        return true;
4036
      }
4037
      return false;
4038
    };
4039

4040
    if (trySelectPState(
4041
            AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),
4042
            AArch64::MSRpstateImm4))
4043
      return true;
4044
    if (trySelectPState(
4045
            AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),
4046
            AArch64::MSRpstateImm1))
4047
      return true;
4048
  }
4049

4050
  int Imm = getIntOperandFromRegisterString(RegString->getString());
4051
  if (Imm == -1) {
4052
    // Use the sysreg mapper to attempt to map the remaining possible strings
4053
    // to the value for the register to be used for the MSR (register)
4054
    // instruction operand.
4055
    auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
4056
    if (TheReg && TheReg->Writeable &&
4057
        TheReg->haveFeatures(Subtarget->getFeatureBits()))
4058
      Imm = TheReg->Encoding;
4059
    else
4060
      Imm = AArch64SysReg::parseGenericRegister(RegString->getString());
4061

4062
    if (Imm == -1)
4063
      return false;
4064
  }
4065

4066
  SDValue InChain = N->getOperand(0);
4067
  if (!WriteIs128Bit) {
4068
    CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,
4069
                         CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4070
                         N->getOperand(2), InChain);
4071
  } else {
4072
    // No endian swap. The lower half always goes into the even subreg, and the
4073
    // higher half always into the odd supreg.
4074
    SDNode *Pair = CurDAG->getMachineNode(
4075
        TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,
4076
        {CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,
4077
                                   MVT::i32),
4078
         N->getOperand(2),
4079
         CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),
4080
         N->getOperand(3),
4081
         CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});
4082

4083
    CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,
4084
                         CurDAG->getTargetConstant(Imm, DL, MVT::i32),
4085
                         SDValue(Pair, 0), InChain);
4086
  }
4087

4088
  return true;
4089
}
4090

4091
/// We've got special pseudo-instructions for these
4092
bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
4093
  unsigned Opcode;
4094
  EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
4095

4096
  // Leave IR for LSE if subtarget supports it.
4097
  if (Subtarget->hasLSE()) return false;
4098

4099
  if (MemTy == MVT::i8)
4100
    Opcode = AArch64::CMP_SWAP_8;
4101
  else if (MemTy == MVT::i16)
4102
    Opcode = AArch64::CMP_SWAP_16;
4103
  else if (MemTy == MVT::i32)
4104
    Opcode = AArch64::CMP_SWAP_32;
4105
  else if (MemTy == MVT::i64)
4106
    Opcode = AArch64::CMP_SWAP_64;
4107
  else
4108
    llvm_unreachable("Unknown AtomicCmpSwap type");
4109

4110
  MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
4111
  SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
4112
                   N->getOperand(0)};
4113
  SDNode *CmpSwap = CurDAG->getMachineNode(
4114
      Opcode, SDLoc(N),
4115
      CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
4116

4117
  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4118
  CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
4119

4120
  ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
4121
  ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
4122
  CurDAG->RemoveDeadNode(N);
4123

4124
  return true;
4125
}
4126

4127
bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,
4128
                                             SDValue &Shift) {
4129
  if (!isa<ConstantSDNode>(N))
4130
    return false;
4131

4132
  SDLoc DL(N);
4133
  uint64_t Val = cast<ConstantSDNode>(N)
4134
                     ->getAPIntValue()
4135
                     .trunc(VT.getFixedSizeInBits())
4136
                     .getZExtValue();
4137

4138
  switch (VT.SimpleTy) {
4139
  case MVT::i8:
4140
    // All immediates are supported.
4141
    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4142
    Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4143
    return true;
4144
  case MVT::i16:
4145
  case MVT::i32:
4146
  case MVT::i64:
4147
    // Support 8bit unsigned immediates.
4148
    if (Val <= 255) {
4149
      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4150
      Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4151
      return true;
4152
    }
4153
    // Support 16bit unsigned immediates that are a multiple of 256.
4154
    if (Val <= 65280 && Val % 256 == 0) {
4155
      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4156
      Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4157
      return true;
4158
    }
4159
    break;
4160
  default:
4161
    break;
4162
  }
4163

4164
  return false;
4165
}
4166

4167
bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,
4168
                                                 SDValue &Imm, SDValue &Shift,
4169
                                                 bool Negate) {
4170
  if (!isa<ConstantSDNode>(N))
4171
    return false;
4172

4173
  SDLoc DL(N);
4174
  int64_t Val = cast<ConstantSDNode>(N)
4175
                    ->getAPIntValue()
4176
                    .trunc(VT.getFixedSizeInBits())
4177
                    .getSExtValue();
4178

4179
  if (Negate)
4180
    Val = -Val;
4181

4182
  // Signed saturating instructions treat their immediate operand as unsigned,
4183
  // whereas the related intrinsics define their operands to be signed. This
4184
  // means we can only use the immediate form when the operand is non-negative.
4185
  if (Val < 0)
4186
    return false;
4187

4188
  switch (VT.SimpleTy) {
4189
  case MVT::i8:
4190
    // All positive immediates are supported.
4191
    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4192
    Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4193
    return true;
4194
  case MVT::i16:
4195
  case MVT::i32:
4196
  case MVT::i64:
4197
    // Support 8bit positive immediates.
4198
    if (Val <= 255) {
4199
      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4200
      Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);
4201
      return true;
4202
    }
4203
    // Support 16bit positive immediates that are a multiple of 256.
4204
    if (Val <= 65280 && Val % 256 == 0) {
4205
      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4206
      Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);
4207
      return true;
4208
    }
4209
    break;
4210
  default:
4211
    break;
4212
  }
4213

4214
  return false;
4215
}
4216

4217
bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,
4218
                                             SDValue &Shift) {
4219
  if (!isa<ConstantSDNode>(N))
4220
    return false;
4221

4222
  SDLoc DL(N);
4223
  int64_t Val = cast<ConstantSDNode>(N)
4224
                    ->getAPIntValue()
4225
                    .trunc(VT.getFixedSizeInBits())
4226
                    .getSExtValue();
4227

4228
  switch (VT.SimpleTy) {
4229
  case MVT::i8:
4230
    // All immediates are supported.
4231
    Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4232
    Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4233
    return true;
4234
  case MVT::i16:
4235
  case MVT::i32:
4236
  case MVT::i64:
4237
    // Support 8bit signed immediates.
4238
    if (Val >= -128 && Val <= 127) {
4239
      Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
4240
      Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);
4241
      return true;
4242
    }
4243
    // Support 16bit signed immediates that are a multiple of 256.
4244
    if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {
4245
      Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
4246
      Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);
4247
      return true;
4248
    }
4249
    break;
4250
  default:
4251
    break;
4252
  }
4253

4254
  return false;
4255
}
4256

4257
bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
4258
  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4259
    int64_t ImmVal = CNode->getSExtValue();
4260
    SDLoc DL(N);
4261
    if (ImmVal >= -128 && ImmVal < 128) {
4262
      Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
4263
      return true;
4264
    }
4265
  }
4266
  return false;
4267
}
4268

4269
bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
4270
  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4271
    uint64_t ImmVal = CNode->getZExtValue();
4272

4273
    switch (VT.SimpleTy) {
4274
    case MVT::i8:
4275
      ImmVal &= 0xFF;
4276
      break;
4277
    case MVT::i16:
4278
      ImmVal &= 0xFFFF;
4279
      break;
4280
    case MVT::i32:
4281
      ImmVal &= 0xFFFFFFFF;
4282
      break;
4283
    case MVT::i64:
4284
      break;
4285
    default:
4286
      llvm_unreachable("Unexpected type");
4287
    }
4288

4289
    if (ImmVal < 256) {
4290
      Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4291
      return true;
4292
    }
4293
  }
4294
  return false;
4295
}
4296

4297
bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
4298
                                              bool Invert) {
4299
  if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
4300
    uint64_t ImmVal = CNode->getZExtValue();
4301
    SDLoc DL(N);
4302

4303
    if (Invert)
4304
      ImmVal = ~ImmVal;
4305

4306
    // Shift mask depending on type size.
4307
    switch (VT.SimpleTy) {
4308
    case MVT::i8:
4309
      ImmVal &= 0xFF;
4310
      ImmVal |= ImmVal << 8;
4311
      ImmVal |= ImmVal << 16;
4312
      ImmVal |= ImmVal << 32;
4313
      break;
4314
    case MVT::i16:
4315
      ImmVal &= 0xFFFF;
4316
      ImmVal |= ImmVal << 16;
4317
      ImmVal |= ImmVal << 32;
4318
      break;
4319
    case MVT::i32:
4320
      ImmVal &= 0xFFFFFFFF;
4321
      ImmVal |= ImmVal << 32;
4322
      break;
4323
    case MVT::i64:
4324
      break;
4325
    default:
4326
      llvm_unreachable("Unexpected type");
4327
    }
4328

4329
    uint64_t encoding;
4330
    if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
4331
      Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
4332
      return true;
4333
    }
4334
  }
4335
  return false;
4336
}
4337

4338
// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
4339
// Rather than attempt to normalise everything we can sometimes saturate the
4340
// shift amount during selection. This function also allows for consistent
4341
// isel patterns by ensuring the resulting "Imm" node is of the i32 type
4342
// required by the instructions.
4343
bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
4344
                                            uint64_t High, bool AllowSaturation,
4345
                                            SDValue &Imm) {
4346
  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
4347
    uint64_t ImmVal = CN->getZExtValue();
4348

4349
    // Reject shift amounts that are too small.
4350
    if (ImmVal < Low)
4351
      return false;
4352

4353
    // Reject or saturate shift amounts that are too big.
4354
    if (ImmVal > High) {
4355
      if (!AllowSaturation)
4356
        return false;
4357
      ImmVal = High;
4358
    }
4359

4360
    Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
4361
    return true;
4362
  }
4363

4364
  return false;
4365
}
4366

4367
bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
4368
  // tagp(FrameIndex, IRGstack, tag_offset):
4369
  // since the offset between FrameIndex and IRGstack is a compile-time
4370
  // constant, this can be lowered to a single ADDG instruction.
4371
  if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
4372
    return false;
4373
  }
4374

4375
  SDValue IRG_SP = N->getOperand(2);
4376
  if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4377
      IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {
4378
    return false;
4379
  }
4380

4381
  const TargetLowering *TLI = getTargetLowering();
4382
  SDLoc DL(N);
4383
  int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
4384
  SDValue FiOp = CurDAG->getTargetFrameIndex(
4385
      FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4386
  int TagOffset = N->getConstantOperandVal(3);
4387

4388
  SDNode *Out = CurDAG->getMachineNode(
4389
      AArch64::TAGPstack, DL, MVT::i64,
4390
      {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
4391
       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4392
  ReplaceNode(N, Out);
4393
  return true;
4394
}
4395

4396
void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
4397
  assert(isa<ConstantSDNode>(N->getOperand(3)) &&
4398
         "llvm.aarch64.tagp third argument must be an immediate");
4399
  if (trySelectStackSlotTagP(N))
4400
    return;
4401
  // FIXME: above applies in any case when offset between Op1 and Op2 is a
4402
  // compile-time constant, not just for stack allocations.
4403

4404
  // General case for unrelated pointers in Op1 and Op2.
4405
  SDLoc DL(N);
4406
  int TagOffset = N->getConstantOperandVal(3);
4407
  SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
4408
                                      {N->getOperand(1), N->getOperand(2)});
4409
  SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
4410
                                      {SDValue(N1, 0), N->getOperand(2)});
4411
  SDNode *N3 = CurDAG->getMachineNode(
4412
      AArch64::ADDG, DL, MVT::i64,
4413
      {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
4414
       CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
4415
  ReplaceNode(N, N3);
4416
}
4417

4418
bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {
4419
  assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");
4420

4421
  // Bail when not a "cast" like insert_subvector.
4422
  if (N->getConstantOperandVal(2) != 0)
4423
    return false;
4424
  if (!N->getOperand(0).isUndef())
4425
    return false;
4426

4427
  // Bail when normal isel should do the job.
4428
  EVT VT = N->getValueType(0);
4429
  EVT InVT = N->getOperand(1).getValueType();
4430
  if (VT.isFixedLengthVector() || InVT.isScalableVector())
4431
    return false;
4432
  if (InVT.getSizeInBits() <= 128)
4433
    return false;
4434

4435
  // NOTE: We can only get here when doing fixed length SVE code generation.
4436
  // We do manual selection because the types involved are not linked to real
4437
  // registers (despite being legal) and must be coerced into SVE registers.
4438

4439
  assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4440
         "Expected to insert into a packed scalable vector!");
4441

4442
  SDLoc DL(N);
4443
  auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4444
  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4445
                                        N->getOperand(1), RC));
4446
  return true;
4447
}
4448

4449
bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {
4450
  assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");
4451

4452
  // Bail when not a "cast" like extract_subvector.
4453
  if (N->getConstantOperandVal(1) != 0)
4454
    return false;
4455

4456
  // Bail when normal isel can do the job.
4457
  EVT VT = N->getValueType(0);
4458
  EVT InVT = N->getOperand(0).getValueType();
4459
  if (VT.isScalableVector() || InVT.isFixedLengthVector())
4460
    return false;
4461
  if (VT.getSizeInBits() <= 128)
4462
    return false;
4463

4464
  // NOTE: We can only get here when doing fixed length SVE code generation.
4465
  // We do manual selection because the types involved are not linked to real
4466
  // registers (despite being legal) and must be coerced into SVE registers.
4467

4468
  assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&
4469
         "Expected to extract from a packed scalable vector!");
4470

4471
  SDLoc DL(N);
4472
  auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
4473
  ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,
4474
                                        N->getOperand(0), RC));
4475
  return true;
4476
}
4477

4478
bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {
4479
  assert(N->getOpcode() == ISD::OR && "Expected OR instruction");
4480

4481
  SDValue N0 = N->getOperand(0);
4482
  SDValue N1 = N->getOperand(1);
4483
  EVT VT = N->getValueType(0);
4484

4485
  // Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)
4486
  // Rotate by a constant is a funnel shift in IR which is exanded to
4487
  // an OR with shifted operands.
4488
  // We do the following transform:
4489
  //   OR N0, N1 -> xar (x, y, imm)
4490
  // Where:
4491
  //   N1 = SRL_PRED true, V, splat(imm)  --> rotr amount
4492
  //   N0 = SHL_PRED true, V, splat(bits-imm)
4493
  //   V = (xor x, y)
4494
  if (VT.isScalableVector() &&
4495
      (Subtarget->hasSVE2() ||
4496
       (Subtarget->hasSME() && Subtarget->isStreaming()))) {
4497
    if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4498
        N1.getOpcode() != AArch64ISD::SRL_PRED)
4499
      std::swap(N0, N1);
4500
    if (N0.getOpcode() != AArch64ISD::SHL_PRED ||
4501
        N1.getOpcode() != AArch64ISD::SRL_PRED)
4502
      return false;
4503

4504
    auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());
4505
    if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||
4506
        !TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))
4507
      return false;
4508

4509
    SDValue XOR = N0.getOperand(1);
4510
    if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))
4511
      return false;
4512

4513
    APInt ShlAmt, ShrAmt;
4514
    if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||
4515
        !ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt))
4516
      return false;
4517

4518
    if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())
4519
      return false;
4520

4521
    SDLoc DL(N);
4522
    SDValue Imm =
4523
        CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);
4524

4525
    SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};
4526
    if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(
4527
            VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,
4528
                 AArch64::XAR_ZZZI_D})) {
4529
      CurDAG->SelectNodeTo(N, Opc, VT, Ops);
4530
      return true;
4531
    }
4532
    return false;
4533
  }
4534

4535
  if (!Subtarget->hasSHA3())
4536
    return false;
4537

4538
  if (N0->getOpcode() != AArch64ISD::VSHL ||
4539
      N1->getOpcode() != AArch64ISD::VLSHR)
4540
    return false;
4541

4542
  if (N0->getOperand(0) != N1->getOperand(0) ||
4543
      N1->getOperand(0)->getOpcode() != ISD::XOR)
4544
    return false;
4545

4546
  SDValue XOR = N0.getOperand(0);
4547
  SDValue R1 = XOR.getOperand(0);
4548
  SDValue R2 = XOR.getOperand(1);
4549

4550
  unsigned HsAmt = N0.getConstantOperandVal(1);
4551
  unsigned ShAmt = N1.getConstantOperandVal(1);
4552

4553
  SDLoc DL = SDLoc(N0.getOperand(1));
4554
  SDValue Imm = CurDAG->getTargetConstant(
4555
      ShAmt, DL, N0.getOperand(1).getValueType(), false);
4556

4557
  if (ShAmt + HsAmt != 64)
4558
    return false;
4559

4560
  SDValue Ops[] = {R1, R2, Imm};
4561
  CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);
4562

4563
  return true;
4564
}
4565

4566
void AArch64DAGToDAGISel::Select(SDNode *Node) {
4567
  // If we have a custom node, we already have selected!
4568
  if (Node->isMachineOpcode()) {
4569
    LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
4570
    Node->setNodeId(-1);
4571
    return;
4572
  }
4573

4574
  // Few custom selection stuff.
4575
  EVT VT = Node->getValueType(0);
4576

4577
  switch (Node->getOpcode()) {
4578
  default:
4579
    break;
4580

4581
  case ISD::ATOMIC_CMP_SWAP:
4582
    if (SelectCMP_SWAP(Node))
4583
      return;
4584
    break;
4585

4586
  case ISD::READ_REGISTER:
4587
  case AArch64ISD::MRRS:
4588
    if (tryReadRegister(Node))
4589
      return;
4590
    break;
4591

4592
  case ISD::WRITE_REGISTER:
4593
  case AArch64ISD::MSRR:
4594
    if (tryWriteRegister(Node))
4595
      return;
4596
    break;
4597

4598
  case ISD::LOAD: {
4599
    // Try to select as an indexed load. Fall through to normal processing
4600
    // if we can't.
4601
    if (tryIndexedLoad(Node))
4602
      return;
4603
    break;
4604
  }
4605

4606
  case ISD::SRL:
4607
  case ISD::AND:
4608
  case ISD::SRA:
4609
  case ISD::SIGN_EXTEND_INREG:
4610
    if (tryBitfieldExtractOp(Node))
4611
      return;
4612
    if (tryBitfieldInsertInZeroOp(Node))
4613
      return;
4614
    [[fallthrough]];
4615
  case ISD::ROTR:
4616
  case ISD::SHL:
4617
    if (tryShiftAmountMod(Node))
4618
      return;
4619
    break;
4620

4621
  case ISD::SIGN_EXTEND:
4622
    if (tryBitfieldExtractOpFromSExt(Node))
4623
      return;
4624
    break;
4625

4626
  case ISD::OR:
4627
    if (tryBitfieldInsertOp(Node))
4628
      return;
4629
    if (trySelectXAR(Node))
4630
      return;
4631
    break;
4632

4633
  case ISD::EXTRACT_SUBVECTOR: {
4634
    if (trySelectCastScalableToFixedLengthVector(Node))
4635
      return;
4636
    break;
4637
  }
4638

4639
  case ISD::INSERT_SUBVECTOR: {
4640
    if (trySelectCastFixedLengthToScalableVector(Node))
4641
      return;
4642
    break;
4643
  }
4644

4645
  case ISD::Constant: {
4646
    // Materialize zero constants as copies from WZR/XZR.  This allows
4647
    // the coalescer to propagate these into other instructions.
4648
    ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
4649
    if (ConstNode->isZero()) {
4650
      if (VT == MVT::i32) {
4651
        SDValue New = CurDAG->getCopyFromReg(
4652
            CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
4653
        ReplaceNode(Node, New.getNode());
4654
        return;
4655
      } else if (VT == MVT::i64) {
4656
        SDValue New = CurDAG->getCopyFromReg(
4657
            CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
4658
        ReplaceNode(Node, New.getNode());
4659
        return;
4660
      }
4661
    }
4662
    break;
4663
  }
4664

4665
  case ISD::FrameIndex: {
4666
    // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
4667
    int FI = cast<FrameIndexSDNode>(Node)->getIndex();
4668
    unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
4669
    const TargetLowering *TLI = getTargetLowering();
4670
    SDValue TFI = CurDAG->getTargetFrameIndex(
4671
        FI, TLI->getPointerTy(CurDAG->getDataLayout()));
4672
    SDLoc DL(Node);
4673
    SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
4674
                      CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
4675
    CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
4676
    return;
4677
  }
4678
  case ISD::INTRINSIC_W_CHAIN: {
4679
    unsigned IntNo = Node->getConstantOperandVal(1);
4680
    switch (IntNo) {
4681
    default:
4682
      break;
4683
    case Intrinsic::aarch64_gcsss: {
4684
      SDLoc DL(Node);
4685
      SDValue Chain = Node->getOperand(0);
4686
      SDValue Val = Node->getOperand(2);
4687
      SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);
4688
      SDNode *SS1 =
4689
          CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);
4690
      SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,
4691
                                           MVT::Other, Zero, SDValue(SS1, 0));
4692
      ReplaceNode(Node, SS2);
4693
      return;
4694
    }
4695
    case Intrinsic::aarch64_ldaxp:
4696
    case Intrinsic::aarch64_ldxp: {
4697
      unsigned Op =
4698
          IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
4699
      SDValue MemAddr = Node->getOperand(2);
4700
      SDLoc DL(Node);
4701
      SDValue Chain = Node->getOperand(0);
4702

4703
      SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
4704
                                          MVT::Other, MemAddr, Chain);
4705

4706
      // Transfer memoperands.
4707
      MachineMemOperand *MemOp =
4708
          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4709
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4710
      ReplaceNode(Node, Ld);
4711
      return;
4712
    }
4713
    case Intrinsic::aarch64_stlxp:
4714
    case Intrinsic::aarch64_stxp: {
4715
      unsigned Op =
4716
          IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
4717
      SDLoc DL(Node);
4718
      SDValue Chain = Node->getOperand(0);
4719
      SDValue ValLo = Node->getOperand(2);
4720
      SDValue ValHi = Node->getOperand(3);
4721
      SDValue MemAddr = Node->getOperand(4);
4722

4723
      // Place arguments in the right order.
4724
      SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
4725

4726
      SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
4727
      // Transfer memoperands.
4728
      MachineMemOperand *MemOp =
4729
          cast<MemIntrinsicSDNode>(Node)->getMemOperand();
4730
      CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4731

4732
      ReplaceNode(Node, St);
4733
      return;
4734
    }
4735
    case Intrinsic::aarch64_neon_ld1x2:
4736
      if (VT == MVT::v8i8) {
4737
        SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
4738
        return;
4739
      } else if (VT == MVT::v16i8) {
4740
        SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
4741
        return;
4742
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4743
        SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
4744
        return;
4745
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4746
        SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
4747
        return;
4748
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4749
        SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
4750
        return;
4751
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4752
        SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
4753
        return;
4754
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4755
        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4756
        return;
4757
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4758
        SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
4759
        return;
4760
      }
4761
      break;
4762
    case Intrinsic::aarch64_neon_ld1x3:
4763
      if (VT == MVT::v8i8) {
4764
        SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
4765
        return;
4766
      } else if (VT == MVT::v16i8) {
4767
        SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
4768
        return;
4769
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4770
        SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
4771
        return;
4772
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4773
        SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
4774
        return;
4775
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4776
        SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
4777
        return;
4778
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4779
        SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
4780
        return;
4781
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4782
        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4783
        return;
4784
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4785
        SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
4786
        return;
4787
      }
4788
      break;
4789
    case Intrinsic::aarch64_neon_ld1x4:
4790
      if (VT == MVT::v8i8) {
4791
        SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
4792
        return;
4793
      } else if (VT == MVT::v16i8) {
4794
        SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
4795
        return;
4796
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4797
        SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
4798
        return;
4799
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4800
        SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
4801
        return;
4802
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4803
        SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
4804
        return;
4805
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4806
        SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
4807
        return;
4808
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4809
        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4810
        return;
4811
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4812
        SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
4813
        return;
4814
      }
4815
      break;
4816
    case Intrinsic::aarch64_neon_ld2:
4817
      if (VT == MVT::v8i8) {
4818
        SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
4819
        return;
4820
      } else if (VT == MVT::v16i8) {
4821
        SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
4822
        return;
4823
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4824
        SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
4825
        return;
4826
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4827
        SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
4828
        return;
4829
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4830
        SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
4831
        return;
4832
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4833
        SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
4834
        return;
4835
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4836
        SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
4837
        return;
4838
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4839
        SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
4840
        return;
4841
      }
4842
      break;
4843
    case Intrinsic::aarch64_neon_ld3:
4844
      if (VT == MVT::v8i8) {
4845
        SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
4846
        return;
4847
      } else if (VT == MVT::v16i8) {
4848
        SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
4849
        return;
4850
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4851
        SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
4852
        return;
4853
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4854
        SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
4855
        return;
4856
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4857
        SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
4858
        return;
4859
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4860
        SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
4861
        return;
4862
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4863
        SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
4864
        return;
4865
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4866
        SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
4867
        return;
4868
      }
4869
      break;
4870
    case Intrinsic::aarch64_neon_ld4:
4871
      if (VT == MVT::v8i8) {
4872
        SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
4873
        return;
4874
      } else if (VT == MVT::v16i8) {
4875
        SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
4876
        return;
4877
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4878
        SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
4879
        return;
4880
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4881
        SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
4882
        return;
4883
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4884
        SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
4885
        return;
4886
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4887
        SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
4888
        return;
4889
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4890
        SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
4891
        return;
4892
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4893
        SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
4894
        return;
4895
      }
4896
      break;
4897
    case Intrinsic::aarch64_neon_ld2r:
4898
      if (VT == MVT::v8i8) {
4899
        SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
4900
        return;
4901
      } else if (VT == MVT::v16i8) {
4902
        SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
4903
        return;
4904
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4905
        SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
4906
        return;
4907
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4908
        SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
4909
        return;
4910
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4911
        SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
4912
        return;
4913
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4914
        SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
4915
        return;
4916
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4917
        SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
4918
        return;
4919
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4920
        SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
4921
        return;
4922
      }
4923
      break;
4924
    case Intrinsic::aarch64_neon_ld3r:
4925
      if (VT == MVT::v8i8) {
4926
        SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
4927
        return;
4928
      } else if (VT == MVT::v16i8) {
4929
        SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
4930
        return;
4931
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4932
        SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
4933
        return;
4934
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4935
        SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
4936
        return;
4937
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4938
        SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
4939
        return;
4940
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4941
        SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
4942
        return;
4943
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4944
        SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
4945
        return;
4946
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4947
        SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
4948
        return;
4949
      }
4950
      break;
4951
    case Intrinsic::aarch64_neon_ld4r:
4952
      if (VT == MVT::v8i8) {
4953
        SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
4954
        return;
4955
      } else if (VT == MVT::v16i8) {
4956
        SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
4957
        return;
4958
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
4959
        SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
4960
        return;
4961
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
4962
        SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
4963
        return;
4964
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
4965
        SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
4966
        return;
4967
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
4968
        SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
4969
        return;
4970
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
4971
        SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
4972
        return;
4973
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
4974
        SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
4975
        return;
4976
      }
4977
      break;
4978
    case Intrinsic::aarch64_neon_ld2lane:
4979
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4980
        SelectLoadLane(Node, 2, AArch64::LD2i8);
4981
        return;
4982
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
4983
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
4984
        SelectLoadLane(Node, 2, AArch64::LD2i16);
4985
        return;
4986
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
4987
                 VT == MVT::v2f32) {
4988
        SelectLoadLane(Node, 2, AArch64::LD2i32);
4989
        return;
4990
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
4991
                 VT == MVT::v1f64) {
4992
        SelectLoadLane(Node, 2, AArch64::LD2i64);
4993
        return;
4994
      }
4995
      break;
4996
    case Intrinsic::aarch64_neon_ld3lane:
4997
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
4998
        SelectLoadLane(Node, 3, AArch64::LD3i8);
4999
        return;
5000
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5001
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5002
        SelectLoadLane(Node, 3, AArch64::LD3i16);
5003
        return;
5004
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5005
                 VT == MVT::v2f32) {
5006
        SelectLoadLane(Node, 3, AArch64::LD3i32);
5007
        return;
5008
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5009
                 VT == MVT::v1f64) {
5010
        SelectLoadLane(Node, 3, AArch64::LD3i64);
5011
        return;
5012
      }
5013
      break;
5014
    case Intrinsic::aarch64_neon_ld4lane:
5015
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
5016
        SelectLoadLane(Node, 4, AArch64::LD4i8);
5017
        return;
5018
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
5019
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
5020
        SelectLoadLane(Node, 4, AArch64::LD4i16);
5021
        return;
5022
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
5023
                 VT == MVT::v2f32) {
5024
        SelectLoadLane(Node, 4, AArch64::LD4i32);
5025
        return;
5026
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
5027
                 VT == MVT::v1f64) {
5028
        SelectLoadLane(Node, 4, AArch64::LD4i64);
5029
        return;
5030
      }
5031
      break;
5032
    case Intrinsic::aarch64_ld64b:
5033
      SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
5034
      return;
5035
    case Intrinsic::aarch64_sve_ld2q_sret: {
5036
      SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);
5037
      return;
5038
    }
5039
    case Intrinsic::aarch64_sve_ld3q_sret: {
5040
      SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);
5041
      return;
5042
    }
5043
    case Intrinsic::aarch64_sve_ld4q_sret: {
5044
      SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);
5045
      return;
5046
    }
5047
    case Intrinsic::aarch64_sve_ld2_sret: {
5048
      if (VT == MVT::nxv16i8) {
5049
        SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,
5050
                             true);
5051
        return;
5052
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5053
                 VT == MVT::nxv8bf16) {
5054
        SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,
5055
                             true);
5056
        return;
5057
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5058
        SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,
5059
                             true);
5060
        return;
5061
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5062
        SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,
5063
                             true);
5064
        return;
5065
      }
5066
      break;
5067
    }
5068
    case Intrinsic::aarch64_sve_ld1_pn_x2: {
5069
      if (VT == MVT::nxv16i8) {
5070
        if (Subtarget->hasSME2())
5071
          SelectContiguousMultiVectorLoad(
5072
              Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);
5073
        else if (Subtarget->hasSVE2p1())
5074
          SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,
5075
                                          AArch64::LD1B_2Z);
5076
        else
5077
          break;
5078
        return;
5079
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5080
                 VT == MVT::nxv8bf16) {
5081
        if (Subtarget->hasSME2())
5082
          SelectContiguousMultiVectorLoad(
5083
              Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);
5084
        else if (Subtarget->hasSVE2p1())
5085
          SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,
5086
                                          AArch64::LD1H_2Z);
5087
        else
5088
          break;
5089
        return;
5090
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5091
        if (Subtarget->hasSME2())
5092
          SelectContiguousMultiVectorLoad(
5093
              Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);
5094
        else if (Subtarget->hasSVE2p1())
5095
          SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,
5096
                                          AArch64::LD1W_2Z);
5097
        else
5098
          break;
5099
        return;
5100
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5101
        if (Subtarget->hasSME2())
5102
          SelectContiguousMultiVectorLoad(
5103
              Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);
5104
        else if (Subtarget->hasSVE2p1())
5105
          SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,
5106
                                          AArch64::LD1D_2Z);
5107
        else
5108
          break;
5109
        return;
5110
      }
5111
      break;
5112
    }
5113
    case Intrinsic::aarch64_sve_ld1_pn_x4: {
5114
      if (VT == MVT::nxv16i8) {
5115
        if (Subtarget->hasSME2())
5116
          SelectContiguousMultiVectorLoad(
5117
              Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);
5118
        else if (Subtarget->hasSVE2p1())
5119
          SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,
5120
                                          AArch64::LD1B_4Z);
5121
        else
5122
          break;
5123
        return;
5124
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5125
                 VT == MVT::nxv8bf16) {
5126
        if (Subtarget->hasSME2())
5127
          SelectContiguousMultiVectorLoad(
5128
              Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);
5129
        else if (Subtarget->hasSVE2p1())
5130
          SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,
5131
                                          AArch64::LD1H_4Z);
5132
        else
5133
          break;
5134
        return;
5135
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5136
        if (Subtarget->hasSME2())
5137
          SelectContiguousMultiVectorLoad(
5138
              Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);
5139
        else if (Subtarget->hasSVE2p1())
5140
          SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,
5141
                                          AArch64::LD1W_4Z);
5142
        else
5143
          break;
5144
        return;
5145
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5146
        if (Subtarget->hasSME2())
5147
          SelectContiguousMultiVectorLoad(
5148
              Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);
5149
        else if (Subtarget->hasSVE2p1())
5150
          SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,
5151
                                          AArch64::LD1D_4Z);
5152
        else
5153
          break;
5154
        return;
5155
      }
5156
      break;
5157
    }
5158
    case Intrinsic::aarch64_sve_ldnt1_pn_x2: {
5159
      if (VT == MVT::nxv16i8) {
5160
        if (Subtarget->hasSME2())
5161
          SelectContiguousMultiVectorLoad(Node, 2, 0,
5162
                                          AArch64::LDNT1B_2Z_IMM_PSEUDO,
5163
                                          AArch64::LDNT1B_2Z_PSEUDO);
5164
        else if (Subtarget->hasSVE2p1())
5165
          SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,
5166
                                          AArch64::LDNT1B_2Z);
5167
        else
5168
          break;
5169
        return;
5170
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5171
                 VT == MVT::nxv8bf16) {
5172
        if (Subtarget->hasSME2())
5173
          SelectContiguousMultiVectorLoad(Node, 2, 1,
5174
                                          AArch64::LDNT1H_2Z_IMM_PSEUDO,
5175
                                          AArch64::LDNT1H_2Z_PSEUDO);
5176
        else if (Subtarget->hasSVE2p1())
5177
          SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,
5178
                                          AArch64::LDNT1H_2Z);
5179
        else
5180
          break;
5181
        return;
5182
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5183
        if (Subtarget->hasSME2())
5184
          SelectContiguousMultiVectorLoad(Node, 2, 2,
5185
                                          AArch64::LDNT1W_2Z_IMM_PSEUDO,
5186
                                          AArch64::LDNT1W_2Z_PSEUDO);
5187
        else if (Subtarget->hasSVE2p1())
5188
          SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,
5189
                                          AArch64::LDNT1W_2Z);
5190
        else
5191
          break;
5192
        return;
5193
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5194
        if (Subtarget->hasSME2())
5195
          SelectContiguousMultiVectorLoad(Node, 2, 3,
5196
                                          AArch64::LDNT1D_2Z_IMM_PSEUDO,
5197
                                          AArch64::LDNT1D_2Z_PSEUDO);
5198
        else if (Subtarget->hasSVE2p1())
5199
          SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,
5200
                                          AArch64::LDNT1D_2Z);
5201
        else
5202
          break;
5203
        return;
5204
      }
5205
      break;
5206
    }
5207
    case Intrinsic::aarch64_sve_ldnt1_pn_x4: {
5208
      if (VT == MVT::nxv16i8) {
5209
        if (Subtarget->hasSME2())
5210
          SelectContiguousMultiVectorLoad(Node, 4, 0,
5211
                                          AArch64::LDNT1B_4Z_IMM_PSEUDO,
5212
                                          AArch64::LDNT1B_4Z_PSEUDO);
5213
        else if (Subtarget->hasSVE2p1())
5214
          SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,
5215
                                          AArch64::LDNT1B_4Z);
5216
        else
5217
          break;
5218
        return;
5219
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5220
                 VT == MVT::nxv8bf16) {
5221
        if (Subtarget->hasSME2())
5222
          SelectContiguousMultiVectorLoad(Node, 4, 1,
5223
                                          AArch64::LDNT1H_4Z_IMM_PSEUDO,
5224
                                          AArch64::LDNT1H_4Z_PSEUDO);
5225
        else if (Subtarget->hasSVE2p1())
5226
          SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,
5227
                                          AArch64::LDNT1H_4Z);
5228
        else
5229
          break;
5230
        return;
5231
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5232
        if (Subtarget->hasSME2())
5233
          SelectContiguousMultiVectorLoad(Node, 4, 2,
5234
                                          AArch64::LDNT1W_4Z_IMM_PSEUDO,
5235
                                          AArch64::LDNT1W_4Z_PSEUDO);
5236
        else if (Subtarget->hasSVE2p1())
5237
          SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,
5238
                                          AArch64::LDNT1W_4Z);
5239
        else
5240
          break;
5241
        return;
5242
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5243
        if (Subtarget->hasSME2())
5244
          SelectContiguousMultiVectorLoad(Node, 4, 3,
5245
                                          AArch64::LDNT1D_4Z_IMM_PSEUDO,
5246
                                          AArch64::LDNT1D_4Z_PSEUDO);
5247
        else if (Subtarget->hasSVE2p1())
5248
          SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,
5249
                                          AArch64::LDNT1D_4Z);
5250
        else
5251
          break;
5252
        return;
5253
      }
5254
      break;
5255
    }
5256
    case Intrinsic::aarch64_sve_ld3_sret: {
5257
      if (VT == MVT::nxv16i8) {
5258
        SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,
5259
                             true);
5260
        return;
5261
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5262
                 VT == MVT::nxv8bf16) {
5263
        SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,
5264
                             true);
5265
        return;
5266
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5267
        SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,
5268
                             true);
5269
        return;
5270
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5271
        SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,
5272
                             true);
5273
        return;
5274
      }
5275
      break;
5276
    }
5277
    case Intrinsic::aarch64_sve_ld4_sret: {
5278
      if (VT == MVT::nxv16i8) {
5279
        SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,
5280
                             true);
5281
        return;
5282
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5283
                 VT == MVT::nxv8bf16) {
5284
        SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,
5285
                             true);
5286
        return;
5287
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5288
        SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,
5289
                             true);
5290
        return;
5291
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5292
        SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,
5293
                             true);
5294
        return;
5295
      }
5296
      break;
5297
    }
5298
    case Intrinsic::aarch64_sme_read_hor_vg2: {
5299
      if (VT == MVT::nxv16i8) {
5300
        SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5301
                                     AArch64::MOVA_2ZMXI_H_B);
5302
        return;
5303
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5304
                 VT == MVT::nxv8bf16) {
5305
        SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5306
                                    AArch64::MOVA_2ZMXI_H_H);
5307
        return;
5308
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5309
        SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5310
                                    AArch64::MOVA_2ZMXI_H_S);
5311
        return;
5312
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5313
        SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5314
                                    AArch64::MOVA_2ZMXI_H_D);
5315
        return;
5316
      }
5317
      break;
5318
    }
5319
    case Intrinsic::aarch64_sme_read_ver_vg2: {
5320
      if (VT == MVT::nxv16i8) {
5321
        SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,
5322
                                     AArch64::MOVA_2ZMXI_V_B);
5323
        return;
5324
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5325
                 VT == MVT::nxv8bf16) {
5326
        SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,
5327
                                    AArch64::MOVA_2ZMXI_V_H);
5328
        return;
5329
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5330
        SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,
5331
                                    AArch64::MOVA_2ZMXI_V_S);
5332
        return;
5333
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5334
        SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,
5335
                                    AArch64::MOVA_2ZMXI_V_D);
5336
        return;
5337
      }
5338
      break;
5339
    }
5340
    case Intrinsic::aarch64_sme_read_hor_vg4: {
5341
      if (VT == MVT::nxv16i8) {
5342
        SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5343
                                     AArch64::MOVA_4ZMXI_H_B);
5344
        return;
5345
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5346
                 VT == MVT::nxv8bf16) {
5347
        SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5348
                                    AArch64::MOVA_4ZMXI_H_H);
5349
        return;
5350
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5351
        SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,
5352
                                    AArch64::MOVA_4ZMXI_H_S);
5353
        return;
5354
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5355
        SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,
5356
                                    AArch64::MOVA_4ZMXI_H_D);
5357
        return;
5358
      }
5359
      break;
5360
    }
5361
    case Intrinsic::aarch64_sme_read_ver_vg4: {
5362
      if (VT == MVT::nxv16i8) {
5363
        SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,
5364
                                     AArch64::MOVA_4ZMXI_V_B);
5365
        return;
5366
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5367
                 VT == MVT::nxv8bf16) {
5368
        SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,
5369
                                    AArch64::MOVA_4ZMXI_V_H);
5370
        return;
5371
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5372
        SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,
5373
                                    AArch64::MOVA_4ZMXI_V_S);
5374
        return;
5375
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5376
        SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,
5377
                                    AArch64::MOVA_4ZMXI_V_D);
5378
        return;
5379
      }
5380
      break;
5381
    }
5382
    case Intrinsic::aarch64_sme_read_vg1x2: {
5383
      SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,
5384
                                  AArch64::MOVA_VG2_2ZMXI);
5385
      return;
5386
    }
5387
    case Intrinsic::aarch64_sme_read_vg1x4: {
5388
      SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,
5389
                                  AArch64::MOVA_VG4_4ZMXI);
5390
      return;
5391
    }
5392
    case Intrinsic::aarch64_sme_readz_horiz_x2: {
5393
      if (VT == MVT::nxv16i8) {
5394
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);
5395
        return;
5396
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5397
                 VT == MVT::nxv8bf16) {
5398
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);
5399
        return;
5400
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5401
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);
5402
        return;
5403
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5404
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);
5405
        return;
5406
      }
5407
      break;
5408
    }
5409
    case Intrinsic::aarch64_sme_readz_vert_x2: {
5410
      if (VT == MVT::nxv16i8) {
5411
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);
5412
        return;
5413
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5414
                 VT == MVT::nxv8bf16) {
5415
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);
5416
        return;
5417
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5418
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);
5419
        return;
5420
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5421
        SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);
5422
        return;
5423
      }
5424
      break;
5425
    }
5426
    case Intrinsic::aarch64_sme_readz_horiz_x4: {
5427
      if (VT == MVT::nxv16i8) {
5428
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);
5429
        return;
5430
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5431
                 VT == MVT::nxv8bf16) {
5432
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);
5433
        return;
5434
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5435
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);
5436
        return;
5437
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5438
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);
5439
        return;
5440
      }
5441
      break;
5442
    }
5443
    case Intrinsic::aarch64_sme_readz_vert_x4: {
5444
      if (VT == MVT::nxv16i8) {
5445
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);
5446
        return;
5447
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
5448
                 VT == MVT::nxv8bf16) {
5449
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);
5450
        return;
5451
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
5452
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);
5453
        return;
5454
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
5455
        SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);
5456
        return;
5457
      }
5458
      break;
5459
    }
5460
    case Intrinsic::aarch64_sme_readz_x2: {
5461
      SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,
5462
                             AArch64::ZA);
5463
      return;
5464
    }
5465
    case Intrinsic::aarch64_sme_readz_x4: {
5466
      SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,
5467
                             AArch64::ZA);
5468
      return;
5469
    }
5470
    case Intrinsic::swift_async_context_addr: {
5471
      SDLoc DL(Node);
5472
      SDValue Chain = Node->getOperand(0);
5473
      SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);
5474
      SDValue Res = SDValue(
5475
          CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,
5476
                                 CurDAG->getTargetConstant(8, DL, MVT::i32),
5477
                                 CurDAG->getTargetConstant(0, DL, MVT::i32)),
5478
          0);
5479
      ReplaceUses(SDValue(Node, 0), Res);
5480
      ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));
5481
      CurDAG->RemoveDeadNode(Node);
5482

5483
      auto &MF = CurDAG->getMachineFunction();
5484
      MF.getFrameInfo().setFrameAddressIsTaken(true);
5485
      MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5486
      return;
5487
    }
5488
    case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {
5489
      if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5490
              Node->getValueType(0),
5491
              {AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,
5492
               AArch64::LUTI2_4ZTZI_S}))
5493
        // Second Immediate must be <= 3:
5494
        SelectMultiVectorLuti(Node, 4, Opc, 3);
5495
      return;
5496
    }
5497
    case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {
5498
      if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5499
              Node->getValueType(0),
5500
              {0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))
5501
        // Second Immediate must be <= 1:
5502
        SelectMultiVectorLuti(Node, 4, Opc, 1);
5503
      return;
5504
    }
5505
    case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {
5506
      if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5507
              Node->getValueType(0),
5508
              {AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,
5509
               AArch64::LUTI2_2ZTZI_S}))
5510
        // Second Immediate must be <= 7:
5511
        SelectMultiVectorLuti(Node, 2, Opc, 7);
5512
      return;
5513
    }
5514
    case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {
5515
      if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
5516
              Node->getValueType(0),
5517
              {AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,
5518
               AArch64::LUTI4_2ZTZI_S}))
5519
        // Second Immediate must be <= 3:
5520
        SelectMultiVectorLuti(Node, 2, Opc, 3);
5521
      return;
5522
    }
5523
    }
5524
  } break;
5525
  case ISD::INTRINSIC_WO_CHAIN: {
5526
    unsigned IntNo = Node->getConstantOperandVal(0);
5527
    switch (IntNo) {
5528
    default:
5529
      break;
5530
    case Intrinsic::aarch64_tagp:
5531
      SelectTagP(Node);
5532
      return;
5533

5534
    case Intrinsic::ptrauth_auth:
5535
      SelectPtrauthAuth(Node);
5536
      return;
5537

5538
    case Intrinsic::ptrauth_resign:
5539
      SelectPtrauthResign(Node);
5540
      return;
5541

5542
    case Intrinsic::aarch64_neon_tbl2:
5543
      SelectTable(Node, 2,
5544
                  VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
5545
                  false);
5546
      return;
5547
    case Intrinsic::aarch64_neon_tbl3:
5548
      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
5549
                                           : AArch64::TBLv16i8Three,
5550
                  false);
5551
      return;
5552
    case Intrinsic::aarch64_neon_tbl4:
5553
      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
5554
                                           : AArch64::TBLv16i8Four,
5555
                  false);
5556
      return;
5557
    case Intrinsic::aarch64_neon_tbx2:
5558
      SelectTable(Node, 2,
5559
                  VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
5560
                  true);
5561
      return;
5562
    case Intrinsic::aarch64_neon_tbx3:
5563
      SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
5564
                                           : AArch64::TBXv16i8Three,
5565
                  true);
5566
      return;
5567
    case Intrinsic::aarch64_neon_tbx4:
5568
      SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
5569
                                           : AArch64::TBXv16i8Four,
5570
                  true);
5571
      return;
5572
    case Intrinsic::aarch64_sve_srshl_single_x2:
5573
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5574
              Node->getValueType(0),
5575
              {AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,
5576
               AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))
5577
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5578
      return;
5579
    case Intrinsic::aarch64_sve_srshl_single_x4:
5580
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5581
              Node->getValueType(0),
5582
              {AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,
5583
               AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))
5584
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5585
      return;
5586
    case Intrinsic::aarch64_sve_urshl_single_x2:
5587
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5588
              Node->getValueType(0),
5589
              {AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,
5590
               AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))
5591
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5592
      return;
5593
    case Intrinsic::aarch64_sve_urshl_single_x4:
5594
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5595
              Node->getValueType(0),
5596
              {AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,
5597
               AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))
5598
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5599
      return;
5600
    case Intrinsic::aarch64_sve_srshl_x2:
5601
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5602
              Node->getValueType(0),
5603
              {AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,
5604
               AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))
5605
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5606
      return;
5607
    case Intrinsic::aarch64_sve_srshl_x4:
5608
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5609
              Node->getValueType(0),
5610
              {AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,
5611
               AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))
5612
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5613
      return;
5614
    case Intrinsic::aarch64_sve_urshl_x2:
5615
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5616
              Node->getValueType(0),
5617
              {AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,
5618
               AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))
5619
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5620
      return;
5621
    case Intrinsic::aarch64_sve_urshl_x4:
5622
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5623
              Node->getValueType(0),
5624
              {AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,
5625
               AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))
5626
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5627
      return;
5628
    case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:
5629
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5630
              Node->getValueType(0),
5631
              {AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,
5632
               AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))
5633
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5634
      return;
5635
    case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:
5636
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5637
              Node->getValueType(0),
5638
              {AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,
5639
               AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))
5640
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5641
      return;
5642
    case Intrinsic::aarch64_sve_sqdmulh_vgx2:
5643
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5644
              Node->getValueType(0),
5645
              {AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,
5646
               AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))
5647
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5648
      return;
5649
    case Intrinsic::aarch64_sve_sqdmulh_vgx4:
5650
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5651
              Node->getValueType(0),
5652
              {AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,
5653
               AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))
5654
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5655
      return;
5656
    case Intrinsic::aarch64_sve_whilege_x2:
5657
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5658
              Node->getValueType(0),
5659
              {AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,
5660
               AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))
5661
        SelectWhilePair(Node, Op);
5662
      return;
5663
    case Intrinsic::aarch64_sve_whilegt_x2:
5664
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5665
              Node->getValueType(0),
5666
              {AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,
5667
               AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))
5668
        SelectWhilePair(Node, Op);
5669
      return;
5670
    case Intrinsic::aarch64_sve_whilehi_x2:
5671
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5672
              Node->getValueType(0),
5673
              {AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,
5674
               AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))
5675
        SelectWhilePair(Node, Op);
5676
      return;
5677
    case Intrinsic::aarch64_sve_whilehs_x2:
5678
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5679
              Node->getValueType(0),
5680
              {AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,
5681
               AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))
5682
        SelectWhilePair(Node, Op);
5683
      return;
5684
    case Intrinsic::aarch64_sve_whilele_x2:
5685
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5686
              Node->getValueType(0),
5687
              {AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,
5688
               AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))
5689
      SelectWhilePair(Node, Op);
5690
      return;
5691
    case Intrinsic::aarch64_sve_whilelo_x2:
5692
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5693
              Node->getValueType(0),
5694
              {AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,
5695
               AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))
5696
      SelectWhilePair(Node, Op);
5697
      return;
5698
    case Intrinsic::aarch64_sve_whilels_x2:
5699
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5700
              Node->getValueType(0),
5701
              {AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,
5702
               AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))
5703
        SelectWhilePair(Node, Op);
5704
      return;
5705
    case Intrinsic::aarch64_sve_whilelt_x2:
5706
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(
5707
              Node->getValueType(0),
5708
              {AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,
5709
               AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))
5710
        SelectWhilePair(Node, Op);
5711
      return;
5712
    case Intrinsic::aarch64_sve_smax_single_x2:
5713
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5714
              Node->getValueType(0),
5715
              {AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,
5716
               AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))
5717
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5718
      return;
5719
    case Intrinsic::aarch64_sve_umax_single_x2:
5720
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5721
              Node->getValueType(0),
5722
              {AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,
5723
               AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))
5724
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5725
      return;
5726
    case Intrinsic::aarch64_sve_fmax_single_x2:
5727
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5728
              Node->getValueType(0),
5729
              {AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,
5730
               AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))
5731
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5732
      return;
5733
    case Intrinsic::aarch64_sve_smax_single_x4:
5734
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5735
              Node->getValueType(0),
5736
              {AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,
5737
               AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))
5738
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5739
      return;
5740
    case Intrinsic::aarch64_sve_umax_single_x4:
5741
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5742
              Node->getValueType(0),
5743
              {AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,
5744
               AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))
5745
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5746
      return;
5747
    case Intrinsic::aarch64_sve_fmax_single_x4:
5748
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5749
              Node->getValueType(0),
5750
              {AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,
5751
               AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))
5752
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5753
      return;
5754
    case Intrinsic::aarch64_sve_smin_single_x2:
5755
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5756
              Node->getValueType(0),
5757
              {AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,
5758
               AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))
5759
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5760
      return;
5761
    case Intrinsic::aarch64_sve_umin_single_x2:
5762
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5763
              Node->getValueType(0),
5764
              {AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,
5765
               AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))
5766
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5767
      return;
5768
    case Intrinsic::aarch64_sve_fmin_single_x2:
5769
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5770
              Node->getValueType(0),
5771
              {AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,
5772
               AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))
5773
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5774
      return;
5775
    case Intrinsic::aarch64_sve_smin_single_x4:
5776
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5777
              Node->getValueType(0),
5778
              {AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,
5779
               AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))
5780
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5781
      return;
5782
    case Intrinsic::aarch64_sve_umin_single_x4:
5783
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5784
              Node->getValueType(0),
5785
              {AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,
5786
               AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))
5787
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5788
      return;
5789
    case Intrinsic::aarch64_sve_fmin_single_x4:
5790
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5791
              Node->getValueType(0),
5792
              {AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,
5793
               AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))
5794
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5795
      return;
5796
    case Intrinsic::aarch64_sve_smax_x2:
5797
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5798
              Node->getValueType(0),
5799
              {AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,
5800
               AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))
5801
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5802
      return;
5803
    case Intrinsic::aarch64_sve_umax_x2:
5804
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5805
              Node->getValueType(0),
5806
              {AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,
5807
               AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))
5808
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5809
      return;
5810
    case Intrinsic::aarch64_sve_fmax_x2:
5811
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5812
              Node->getValueType(0),
5813
              {AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,
5814
               AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))
5815
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5816
      return;
5817
    case Intrinsic::aarch64_sve_smax_x4:
5818
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5819
              Node->getValueType(0),
5820
              {AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,
5821
               AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))
5822
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5823
      return;
5824
    case Intrinsic::aarch64_sve_umax_x4:
5825
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5826
              Node->getValueType(0),
5827
              {AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,
5828
               AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))
5829
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5830
      return;
5831
    case Intrinsic::aarch64_sve_fmax_x4:
5832
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5833
              Node->getValueType(0),
5834
              {AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,
5835
               AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))
5836
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5837
      return;
5838
    case Intrinsic::aarch64_sve_smin_x2:
5839
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5840
              Node->getValueType(0),
5841
              {AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,
5842
               AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))
5843
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5844
      return;
5845
    case Intrinsic::aarch64_sve_umin_x2:
5846
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5847
              Node->getValueType(0),
5848
              {AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,
5849
               AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))
5850
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5851
      return;
5852
    case Intrinsic::aarch64_sve_fmin_x2:
5853
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5854
              Node->getValueType(0),
5855
              {AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,
5856
               AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))
5857
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5858
      return;
5859
    case Intrinsic::aarch64_sve_smin_x4:
5860
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5861
              Node->getValueType(0),
5862
              {AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,
5863
               AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))
5864
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5865
      return;
5866
    case Intrinsic::aarch64_sve_umin_x4:
5867
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5868
              Node->getValueType(0),
5869
              {AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,
5870
               AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))
5871
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5872
      return;
5873
    case Intrinsic::aarch64_sve_fmin_x4:
5874
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5875
              Node->getValueType(0),
5876
              {AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,
5877
               AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))
5878
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5879
      return;
5880
    case Intrinsic::aarch64_sve_fmaxnm_single_x2 :
5881
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5882
              Node->getValueType(0),
5883
              {AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,
5884
               AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))
5885
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5886
      return;
5887
    case Intrinsic::aarch64_sve_fmaxnm_single_x4 :
5888
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5889
              Node->getValueType(0),
5890
              {AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,
5891
               AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))
5892
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5893
      return;
5894
    case Intrinsic::aarch64_sve_fminnm_single_x2:
5895
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5896
              Node->getValueType(0),
5897
              {AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,
5898
               AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))
5899
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
5900
      return;
5901
    case Intrinsic::aarch64_sve_fminnm_single_x4:
5902
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5903
              Node->getValueType(0),
5904
              {AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,
5905
               AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))
5906
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
5907
      return;
5908
    case Intrinsic::aarch64_sve_fmaxnm_x2:
5909
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5910
              Node->getValueType(0),
5911
              {AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,
5912
               AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))
5913
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5914
      return;
5915
    case Intrinsic::aarch64_sve_fmaxnm_x4:
5916
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5917
              Node->getValueType(0),
5918
              {AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,
5919
               AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))
5920
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5921
      return;
5922
    case Intrinsic::aarch64_sve_fminnm_x2:
5923
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5924
              Node->getValueType(0),
5925
              {AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,
5926
               AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))
5927
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op);
5928
      return;
5929
    case Intrinsic::aarch64_sve_fminnm_x4:
5930
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5931
              Node->getValueType(0),
5932
              {AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,
5933
               AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))
5934
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op);
5935
      return;
5936
    case Intrinsic::aarch64_sve_fcvtzs_x2:
5937
      SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);
5938
      return;
5939
    case Intrinsic::aarch64_sve_scvtf_x2:
5940
      SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);
5941
      return;
5942
    case Intrinsic::aarch64_sve_fcvtzu_x2:
5943
      SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);
5944
      return;
5945
    case Intrinsic::aarch64_sve_ucvtf_x2:
5946
      SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);
5947
      return;
5948
    case Intrinsic::aarch64_sve_fcvtzs_x4:
5949
      SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);
5950
      return;
5951
    case Intrinsic::aarch64_sve_scvtf_x4:
5952
      SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);
5953
      return;
5954
    case Intrinsic::aarch64_sve_fcvtzu_x4:
5955
      SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);
5956
      return;
5957
    case Intrinsic::aarch64_sve_ucvtf_x4:
5958
      SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);
5959
      return;
5960
    case Intrinsic::aarch64_sve_fcvt_widen_x2:
5961
      SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);
5962
      return;
5963
    case Intrinsic::aarch64_sve_fcvtl_widen_x2:
5964
      SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);
5965
      return;
5966
    case Intrinsic::aarch64_sve_sclamp_single_x2:
5967
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5968
              Node->getValueType(0),
5969
              {AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,
5970
               AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))
5971
        SelectClamp(Node, 2, Op);
5972
      return;
5973
    case Intrinsic::aarch64_sve_uclamp_single_x2:
5974
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5975
              Node->getValueType(0),
5976
              {AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,
5977
               AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))
5978
        SelectClamp(Node, 2, Op);
5979
      return;
5980
    case Intrinsic::aarch64_sve_fclamp_single_x2:
5981
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
5982
              Node->getValueType(0),
5983
              {0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,
5984
               AArch64::FCLAMP_VG2_2Z2Z_D}))
5985
        SelectClamp(Node, 2, Op);
5986
      return;
5987
    case Intrinsic::aarch64_sve_bfclamp_single_x2:
5988
      SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);
5989
      return;
5990
    case Intrinsic::aarch64_sve_sclamp_single_x4:
5991
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5992
              Node->getValueType(0),
5993
              {AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,
5994
               AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))
5995
        SelectClamp(Node, 4, Op);
5996
      return;
5997
    case Intrinsic::aarch64_sve_uclamp_single_x4:
5998
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
5999
              Node->getValueType(0),
6000
              {AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,
6001
               AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))
6002
        SelectClamp(Node, 4, Op);
6003
      return;
6004
    case Intrinsic::aarch64_sve_fclamp_single_x4:
6005
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(
6006
              Node->getValueType(0),
6007
              {0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,
6008
               AArch64::FCLAMP_VG4_4Z4Z_D}))
6009
        SelectClamp(Node, 4, Op);
6010
      return;
6011
    case Intrinsic::aarch64_sve_bfclamp_single_x4:
6012
      SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);
6013
      return;
6014
    case Intrinsic::aarch64_sve_add_single_x2:
6015
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6016
              Node->getValueType(0),
6017
              {AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,
6018
               AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))
6019
        SelectDestructiveMultiIntrinsic(Node, 2, false, Op);
6020
      return;
6021
    case Intrinsic::aarch64_sve_add_single_x4:
6022
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6023
              Node->getValueType(0),
6024
              {AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,
6025
               AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))
6026
        SelectDestructiveMultiIntrinsic(Node, 4, false, Op);
6027
      return;
6028
    case Intrinsic::aarch64_sve_zip_x2:
6029
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6030
              Node->getValueType(0),
6031
              {AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,
6032
               AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))
6033
        SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6034
      return;
6035
    case Intrinsic::aarch64_sve_zipq_x2:
6036
      SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6037
                                AArch64::ZIP_VG2_2ZZZ_Q);
6038
      return;
6039
    case Intrinsic::aarch64_sve_zip_x4:
6040
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6041
              Node->getValueType(0),
6042
              {AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,
6043
               AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))
6044
        SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6045
      return;
6046
    case Intrinsic::aarch64_sve_zipq_x4:
6047
      SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6048
                                AArch64::ZIP_VG4_4Z4Z_Q);
6049
      return;
6050
    case Intrinsic::aarch64_sve_uzp_x2:
6051
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6052
              Node->getValueType(0),
6053
              {AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,
6054
               AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))
6055
        SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6056
      return;
6057
    case Intrinsic::aarch64_sve_uzpq_x2:
6058
      SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,
6059
                                AArch64::UZP_VG2_2ZZZ_Q);
6060
      return;
6061
    case Intrinsic::aarch64_sve_uzp_x4:
6062
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6063
              Node->getValueType(0),
6064
              {AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,
6065
               AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))
6066
        SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6067
      return;
6068
    case Intrinsic::aarch64_sve_uzpq_x4:
6069
      SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,
6070
                                AArch64::UZP_VG4_4Z4Z_Q);
6071
      return;
6072
    case Intrinsic::aarch64_sve_sel_x2:
6073
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6074
              Node->getValueType(0),
6075
              {AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,
6076
               AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))
6077
        SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);
6078
      return;
6079
    case Intrinsic::aarch64_sve_sel_x4:
6080
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6081
              Node->getValueType(0),
6082
              {AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,
6083
               AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))
6084
        SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);
6085
      return;
6086
    case Intrinsic::aarch64_sve_frinta_x2:
6087
      SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);
6088
      return;
6089
    case Intrinsic::aarch64_sve_frinta_x4:
6090
      SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);
6091
      return;
6092
    case Intrinsic::aarch64_sve_frintm_x2:
6093
      SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);
6094
      return;
6095
    case Intrinsic::aarch64_sve_frintm_x4:
6096
      SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);
6097
      return;
6098
    case Intrinsic::aarch64_sve_frintn_x2:
6099
      SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);
6100
      return;
6101
    case Intrinsic::aarch64_sve_frintn_x4:
6102
      SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);
6103
      return;
6104
    case Intrinsic::aarch64_sve_frintp_x2:
6105
      SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);
6106
      return;
6107
    case Intrinsic::aarch64_sve_frintp_x4:
6108
      SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);
6109
      return;
6110
    case Intrinsic::aarch64_sve_sunpk_x2:
6111
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6112
              Node->getValueType(0),
6113
              {0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,
6114
               AArch64::SUNPK_VG2_2ZZ_D}))
6115
        SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6116
      return;
6117
    case Intrinsic::aarch64_sve_uunpk_x2:
6118
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6119
              Node->getValueType(0),
6120
              {0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,
6121
               AArch64::UUNPK_VG2_2ZZ_D}))
6122
        SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);
6123
      return;
6124
    case Intrinsic::aarch64_sve_sunpk_x4:
6125
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6126
              Node->getValueType(0),
6127
              {0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,
6128
               AArch64::SUNPK_VG4_4Z2Z_D}))
6129
        SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6130
      return;
6131
    case Intrinsic::aarch64_sve_uunpk_x4:
6132
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
6133
              Node->getValueType(0),
6134
              {0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,
6135
               AArch64::UUNPK_VG4_4Z2Z_D}))
6136
        SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);
6137
      return;
6138
    case Intrinsic::aarch64_sve_pext_x2: {
6139
      if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(
6140
              Node->getValueType(0),
6141
              {AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,
6142
               AArch64::PEXT_2PCI_D}))
6143
        SelectPExtPair(Node, Op);
6144
      return;
6145
    }
6146
    }
6147
    break;
6148
  }
6149
  case ISD::INTRINSIC_VOID: {
6150
    unsigned IntNo = Node->getConstantOperandVal(1);
6151
    if (Node->getNumOperands() >= 3)
6152
      VT = Node->getOperand(2)->getValueType(0);
6153
    switch (IntNo) {
6154
    default:
6155
      break;
6156
    case Intrinsic::aarch64_neon_st1x2: {
6157
      if (VT == MVT::v8i8) {
6158
        SelectStore(Node, 2, AArch64::ST1Twov8b);
6159
        return;
6160
      } else if (VT == MVT::v16i8) {
6161
        SelectStore(Node, 2, AArch64::ST1Twov16b);
6162
        return;
6163
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6164
                 VT == MVT::v4bf16) {
6165
        SelectStore(Node, 2, AArch64::ST1Twov4h);
6166
        return;
6167
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6168
                 VT == MVT::v8bf16) {
6169
        SelectStore(Node, 2, AArch64::ST1Twov8h);
6170
        return;
6171
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6172
        SelectStore(Node, 2, AArch64::ST1Twov2s);
6173
        return;
6174
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6175
        SelectStore(Node, 2, AArch64::ST1Twov4s);
6176
        return;
6177
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6178
        SelectStore(Node, 2, AArch64::ST1Twov2d);
6179
        return;
6180
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6181
        SelectStore(Node, 2, AArch64::ST1Twov1d);
6182
        return;
6183
      }
6184
      break;
6185
    }
6186
    case Intrinsic::aarch64_neon_st1x3: {
6187
      if (VT == MVT::v8i8) {
6188
        SelectStore(Node, 3, AArch64::ST1Threev8b);
6189
        return;
6190
      } else if (VT == MVT::v16i8) {
6191
        SelectStore(Node, 3, AArch64::ST1Threev16b);
6192
        return;
6193
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6194
                 VT == MVT::v4bf16) {
6195
        SelectStore(Node, 3, AArch64::ST1Threev4h);
6196
        return;
6197
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6198
                 VT == MVT::v8bf16) {
6199
        SelectStore(Node, 3, AArch64::ST1Threev8h);
6200
        return;
6201
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6202
        SelectStore(Node, 3, AArch64::ST1Threev2s);
6203
        return;
6204
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6205
        SelectStore(Node, 3, AArch64::ST1Threev4s);
6206
        return;
6207
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6208
        SelectStore(Node, 3, AArch64::ST1Threev2d);
6209
        return;
6210
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6211
        SelectStore(Node, 3, AArch64::ST1Threev1d);
6212
        return;
6213
      }
6214
      break;
6215
    }
6216
    case Intrinsic::aarch64_neon_st1x4: {
6217
      if (VT == MVT::v8i8) {
6218
        SelectStore(Node, 4, AArch64::ST1Fourv8b);
6219
        return;
6220
      } else if (VT == MVT::v16i8) {
6221
        SelectStore(Node, 4, AArch64::ST1Fourv16b);
6222
        return;
6223
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6224
                 VT == MVT::v4bf16) {
6225
        SelectStore(Node, 4, AArch64::ST1Fourv4h);
6226
        return;
6227
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6228
                 VT == MVT::v8bf16) {
6229
        SelectStore(Node, 4, AArch64::ST1Fourv8h);
6230
        return;
6231
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6232
        SelectStore(Node, 4, AArch64::ST1Fourv2s);
6233
        return;
6234
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6235
        SelectStore(Node, 4, AArch64::ST1Fourv4s);
6236
        return;
6237
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6238
        SelectStore(Node, 4, AArch64::ST1Fourv2d);
6239
        return;
6240
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6241
        SelectStore(Node, 4, AArch64::ST1Fourv1d);
6242
        return;
6243
      }
6244
      break;
6245
    }
6246
    case Intrinsic::aarch64_neon_st2: {
6247
      if (VT == MVT::v8i8) {
6248
        SelectStore(Node, 2, AArch64::ST2Twov8b);
6249
        return;
6250
      } else if (VT == MVT::v16i8) {
6251
        SelectStore(Node, 2, AArch64::ST2Twov16b);
6252
        return;
6253
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6254
                 VT == MVT::v4bf16) {
6255
        SelectStore(Node, 2, AArch64::ST2Twov4h);
6256
        return;
6257
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6258
                 VT == MVT::v8bf16) {
6259
        SelectStore(Node, 2, AArch64::ST2Twov8h);
6260
        return;
6261
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6262
        SelectStore(Node, 2, AArch64::ST2Twov2s);
6263
        return;
6264
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6265
        SelectStore(Node, 2, AArch64::ST2Twov4s);
6266
        return;
6267
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6268
        SelectStore(Node, 2, AArch64::ST2Twov2d);
6269
        return;
6270
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6271
        SelectStore(Node, 2, AArch64::ST1Twov1d);
6272
        return;
6273
      }
6274
      break;
6275
    }
6276
    case Intrinsic::aarch64_neon_st3: {
6277
      if (VT == MVT::v8i8) {
6278
        SelectStore(Node, 3, AArch64::ST3Threev8b);
6279
        return;
6280
      } else if (VT == MVT::v16i8) {
6281
        SelectStore(Node, 3, AArch64::ST3Threev16b);
6282
        return;
6283
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6284
                 VT == MVT::v4bf16) {
6285
        SelectStore(Node, 3, AArch64::ST3Threev4h);
6286
        return;
6287
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6288
                 VT == MVT::v8bf16) {
6289
        SelectStore(Node, 3, AArch64::ST3Threev8h);
6290
        return;
6291
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6292
        SelectStore(Node, 3, AArch64::ST3Threev2s);
6293
        return;
6294
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6295
        SelectStore(Node, 3, AArch64::ST3Threev4s);
6296
        return;
6297
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6298
        SelectStore(Node, 3, AArch64::ST3Threev2d);
6299
        return;
6300
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6301
        SelectStore(Node, 3, AArch64::ST1Threev1d);
6302
        return;
6303
      }
6304
      break;
6305
    }
6306
    case Intrinsic::aarch64_neon_st4: {
6307
      if (VT == MVT::v8i8) {
6308
        SelectStore(Node, 4, AArch64::ST4Fourv8b);
6309
        return;
6310
      } else if (VT == MVT::v16i8) {
6311
        SelectStore(Node, 4, AArch64::ST4Fourv16b);
6312
        return;
6313
      } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
6314
                 VT == MVT::v4bf16) {
6315
        SelectStore(Node, 4, AArch64::ST4Fourv4h);
6316
        return;
6317
      } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
6318
                 VT == MVT::v8bf16) {
6319
        SelectStore(Node, 4, AArch64::ST4Fourv8h);
6320
        return;
6321
      } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6322
        SelectStore(Node, 4, AArch64::ST4Fourv2s);
6323
        return;
6324
      } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6325
        SelectStore(Node, 4, AArch64::ST4Fourv4s);
6326
        return;
6327
      } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6328
        SelectStore(Node, 4, AArch64::ST4Fourv2d);
6329
        return;
6330
      } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6331
        SelectStore(Node, 4, AArch64::ST1Fourv1d);
6332
        return;
6333
      }
6334
      break;
6335
    }
6336
    case Intrinsic::aarch64_neon_st2lane: {
6337
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6338
        SelectStoreLane(Node, 2, AArch64::ST2i8);
6339
        return;
6340
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6341
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6342
        SelectStoreLane(Node, 2, AArch64::ST2i16);
6343
        return;
6344
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6345
                 VT == MVT::v2f32) {
6346
        SelectStoreLane(Node, 2, AArch64::ST2i32);
6347
        return;
6348
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6349
                 VT == MVT::v1f64) {
6350
        SelectStoreLane(Node, 2, AArch64::ST2i64);
6351
        return;
6352
      }
6353
      break;
6354
    }
6355
    case Intrinsic::aarch64_neon_st3lane: {
6356
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6357
        SelectStoreLane(Node, 3, AArch64::ST3i8);
6358
        return;
6359
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6360
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6361
        SelectStoreLane(Node, 3, AArch64::ST3i16);
6362
        return;
6363
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6364
                 VT == MVT::v2f32) {
6365
        SelectStoreLane(Node, 3, AArch64::ST3i32);
6366
        return;
6367
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6368
                 VT == MVT::v1f64) {
6369
        SelectStoreLane(Node, 3, AArch64::ST3i64);
6370
        return;
6371
      }
6372
      break;
6373
    }
6374
    case Intrinsic::aarch64_neon_st4lane: {
6375
      if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6376
        SelectStoreLane(Node, 4, AArch64::ST4i8);
6377
        return;
6378
      } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6379
                 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6380
        SelectStoreLane(Node, 4, AArch64::ST4i16);
6381
        return;
6382
      } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6383
                 VT == MVT::v2f32) {
6384
        SelectStoreLane(Node, 4, AArch64::ST4i32);
6385
        return;
6386
      } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6387
                 VT == MVT::v1f64) {
6388
        SelectStoreLane(Node, 4, AArch64::ST4i64);
6389
        return;
6390
      }
6391
      break;
6392
    }
6393
    case Intrinsic::aarch64_sve_st2q: {
6394
      SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);
6395
      return;
6396
    }
6397
    case Intrinsic::aarch64_sve_st3q: {
6398
      SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);
6399
      return;
6400
    }
6401
    case Intrinsic::aarch64_sve_st4q: {
6402
      SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);
6403
      return;
6404
    }
6405
    case Intrinsic::aarch64_sve_st2: {
6406
      if (VT == MVT::nxv16i8) {
6407
        SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
6408
        return;
6409
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6410
                 VT == MVT::nxv8bf16) {
6411
        SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
6412
        return;
6413
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6414
        SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
6415
        return;
6416
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6417
        SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
6418
        return;
6419
      }
6420
      break;
6421
    }
6422
    case Intrinsic::aarch64_sve_st3: {
6423
      if (VT == MVT::nxv16i8) {
6424
        SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
6425
        return;
6426
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6427
                 VT == MVT::nxv8bf16) {
6428
        SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
6429
        return;
6430
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6431
        SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
6432
        return;
6433
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6434
        SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
6435
        return;
6436
      }
6437
      break;
6438
    }
6439
    case Intrinsic::aarch64_sve_st4: {
6440
      if (VT == MVT::nxv16i8) {
6441
        SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
6442
        return;
6443
      } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
6444
                 VT == MVT::nxv8bf16) {
6445
        SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
6446
        return;
6447
      } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
6448
        SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
6449
        return;
6450
      } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
6451
        SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
6452
        return;
6453
      }
6454
      break;
6455
    }
6456
    }
6457
    break;
6458
  }
6459
  case AArch64ISD::LD2post: {
6460
    if (VT == MVT::v8i8) {
6461
      SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
6462
      return;
6463
    } else if (VT == MVT::v16i8) {
6464
      SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
6465
      return;
6466
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6467
      SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
6468
      return;
6469
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6470
      SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
6471
      return;
6472
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6473
      SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
6474
      return;
6475
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6476
      SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
6477
      return;
6478
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6479
      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6480
      return;
6481
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6482
      SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
6483
      return;
6484
    }
6485
    break;
6486
  }
6487
  case AArch64ISD::LD3post: {
6488
    if (VT == MVT::v8i8) {
6489
      SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
6490
      return;
6491
    } else if (VT == MVT::v16i8) {
6492
      SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
6493
      return;
6494
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6495
      SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
6496
      return;
6497
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6498
      SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
6499
      return;
6500
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6501
      SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
6502
      return;
6503
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6504
      SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
6505
      return;
6506
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6507
      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6508
      return;
6509
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6510
      SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
6511
      return;
6512
    }
6513
    break;
6514
  }
6515
  case AArch64ISD::LD4post: {
6516
    if (VT == MVT::v8i8) {
6517
      SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
6518
      return;
6519
    } else if (VT == MVT::v16i8) {
6520
      SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
6521
      return;
6522
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6523
      SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
6524
      return;
6525
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6526
      SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
6527
      return;
6528
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6529
      SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
6530
      return;
6531
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6532
      SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
6533
      return;
6534
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6535
      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6536
      return;
6537
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6538
      SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
6539
      return;
6540
    }
6541
    break;
6542
  }
6543
  case AArch64ISD::LD1x2post: {
6544
    if (VT == MVT::v8i8) {
6545
      SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
6546
      return;
6547
    } else if (VT == MVT::v16i8) {
6548
      SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
6549
      return;
6550
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6551
      SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
6552
      return;
6553
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6554
      SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
6555
      return;
6556
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6557
      SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
6558
      return;
6559
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6560
      SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
6561
      return;
6562
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6563
      SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
6564
      return;
6565
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6566
      SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
6567
      return;
6568
    }
6569
    break;
6570
  }
6571
  case AArch64ISD::LD1x3post: {
6572
    if (VT == MVT::v8i8) {
6573
      SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
6574
      return;
6575
    } else if (VT == MVT::v16i8) {
6576
      SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
6577
      return;
6578
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6579
      SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
6580
      return;
6581
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6582
      SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
6583
      return;
6584
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6585
      SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
6586
      return;
6587
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6588
      SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
6589
      return;
6590
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6591
      SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
6592
      return;
6593
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6594
      SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
6595
      return;
6596
    }
6597
    break;
6598
  }
6599
  case AArch64ISD::LD1x4post: {
6600
    if (VT == MVT::v8i8) {
6601
      SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
6602
      return;
6603
    } else if (VT == MVT::v16i8) {
6604
      SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
6605
      return;
6606
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6607
      SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
6608
      return;
6609
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6610
      SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
6611
      return;
6612
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6613
      SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
6614
      return;
6615
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6616
      SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
6617
      return;
6618
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6619
      SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
6620
      return;
6621
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6622
      SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
6623
      return;
6624
    }
6625
    break;
6626
  }
6627
  case AArch64ISD::LD1DUPpost: {
6628
    if (VT == MVT::v8i8) {
6629
      SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
6630
      return;
6631
    } else if (VT == MVT::v16i8) {
6632
      SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
6633
      return;
6634
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6635
      SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
6636
      return;
6637
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6638
      SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
6639
      return;
6640
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6641
      SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
6642
      return;
6643
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6644
      SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
6645
      return;
6646
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6647
      SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
6648
      return;
6649
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6650
      SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
6651
      return;
6652
    }
6653
    break;
6654
  }
6655
  case AArch64ISD::LD2DUPpost: {
6656
    if (VT == MVT::v8i8) {
6657
      SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
6658
      return;
6659
    } else if (VT == MVT::v16i8) {
6660
      SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
6661
      return;
6662
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6663
      SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
6664
      return;
6665
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6666
      SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
6667
      return;
6668
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6669
      SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
6670
      return;
6671
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6672
      SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
6673
      return;
6674
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6675
      SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
6676
      return;
6677
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6678
      SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
6679
      return;
6680
    }
6681
    break;
6682
  }
6683
  case AArch64ISD::LD3DUPpost: {
6684
    if (VT == MVT::v8i8) {
6685
      SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
6686
      return;
6687
    } else if (VT == MVT::v16i8) {
6688
      SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
6689
      return;
6690
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6691
      SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
6692
      return;
6693
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6694
      SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
6695
      return;
6696
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6697
      SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
6698
      return;
6699
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6700
      SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
6701
      return;
6702
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6703
      SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
6704
      return;
6705
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6706
      SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
6707
      return;
6708
    }
6709
    break;
6710
  }
6711
  case AArch64ISD::LD4DUPpost: {
6712
    if (VT == MVT::v8i8) {
6713
      SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
6714
      return;
6715
    } else if (VT == MVT::v16i8) {
6716
      SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
6717
      return;
6718
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6719
      SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
6720
      return;
6721
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
6722
      SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
6723
      return;
6724
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6725
      SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
6726
      return;
6727
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6728
      SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
6729
      return;
6730
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6731
      SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
6732
      return;
6733
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6734
      SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
6735
      return;
6736
    }
6737
    break;
6738
  }
6739
  case AArch64ISD::LD1LANEpost: {
6740
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6741
      SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
6742
      return;
6743
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6744
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6745
      SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
6746
      return;
6747
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6748
               VT == MVT::v2f32) {
6749
      SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
6750
      return;
6751
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6752
               VT == MVT::v1f64) {
6753
      SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
6754
      return;
6755
    }
6756
    break;
6757
  }
6758
  case AArch64ISD::LD2LANEpost: {
6759
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6760
      SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
6761
      return;
6762
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6763
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6764
      SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
6765
      return;
6766
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6767
               VT == MVT::v2f32) {
6768
      SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
6769
      return;
6770
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6771
               VT == MVT::v1f64) {
6772
      SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
6773
      return;
6774
    }
6775
    break;
6776
  }
6777
  case AArch64ISD::LD3LANEpost: {
6778
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6779
      SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
6780
      return;
6781
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6782
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6783
      SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
6784
      return;
6785
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6786
               VT == MVT::v2f32) {
6787
      SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
6788
      return;
6789
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6790
               VT == MVT::v1f64) {
6791
      SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
6792
      return;
6793
    }
6794
    break;
6795
  }
6796
  case AArch64ISD::LD4LANEpost: {
6797
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6798
      SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
6799
      return;
6800
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6801
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6802
      SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
6803
      return;
6804
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6805
               VT == MVT::v2f32) {
6806
      SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
6807
      return;
6808
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
6809
               VT == MVT::v1f64) {
6810
      SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
6811
      return;
6812
    }
6813
    break;
6814
  }
6815
  case AArch64ISD::ST2post: {
6816
    VT = Node->getOperand(1).getValueType();
6817
    if (VT == MVT::v8i8) {
6818
      SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
6819
      return;
6820
    } else if (VT == MVT::v16i8) {
6821
      SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
6822
      return;
6823
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6824
      SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
6825
      return;
6826
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6827
      SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
6828
      return;
6829
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6830
      SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
6831
      return;
6832
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6833
      SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
6834
      return;
6835
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6836
      SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
6837
      return;
6838
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6839
      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6840
      return;
6841
    }
6842
    break;
6843
  }
6844
  case AArch64ISD::ST3post: {
6845
    VT = Node->getOperand(1).getValueType();
6846
    if (VT == MVT::v8i8) {
6847
      SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
6848
      return;
6849
    } else if (VT == MVT::v16i8) {
6850
      SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
6851
      return;
6852
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6853
      SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
6854
      return;
6855
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6856
      SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
6857
      return;
6858
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6859
      SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
6860
      return;
6861
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6862
      SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
6863
      return;
6864
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6865
      SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
6866
      return;
6867
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6868
      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6869
      return;
6870
    }
6871
    break;
6872
  }
6873
  case AArch64ISD::ST4post: {
6874
    VT = Node->getOperand(1).getValueType();
6875
    if (VT == MVT::v8i8) {
6876
      SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
6877
      return;
6878
    } else if (VT == MVT::v16i8) {
6879
      SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
6880
      return;
6881
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6882
      SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
6883
      return;
6884
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6885
      SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
6886
      return;
6887
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6888
      SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
6889
      return;
6890
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6891
      SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
6892
      return;
6893
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6894
      SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
6895
      return;
6896
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6897
      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6898
      return;
6899
    }
6900
    break;
6901
  }
6902
  case AArch64ISD::ST1x2post: {
6903
    VT = Node->getOperand(1).getValueType();
6904
    if (VT == MVT::v8i8) {
6905
      SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
6906
      return;
6907
    } else if (VT == MVT::v16i8) {
6908
      SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
6909
      return;
6910
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6911
      SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
6912
      return;
6913
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6914
      SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
6915
      return;
6916
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6917
      SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
6918
      return;
6919
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6920
      SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
6921
      return;
6922
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6923
      SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
6924
      return;
6925
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6926
      SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
6927
      return;
6928
    }
6929
    break;
6930
  }
6931
  case AArch64ISD::ST1x3post: {
6932
    VT = Node->getOperand(1).getValueType();
6933
    if (VT == MVT::v8i8) {
6934
      SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
6935
      return;
6936
    } else if (VT == MVT::v16i8) {
6937
      SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
6938
      return;
6939
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6940
      SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
6941
      return;
6942
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
6943
      SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
6944
      return;
6945
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6946
      SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
6947
      return;
6948
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6949
      SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
6950
      return;
6951
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6952
      SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
6953
      return;
6954
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6955
      SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
6956
      return;
6957
    }
6958
    break;
6959
  }
6960
  case AArch64ISD::ST1x4post: {
6961
    VT = Node->getOperand(1).getValueType();
6962
    if (VT == MVT::v8i8) {
6963
      SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
6964
      return;
6965
    } else if (VT == MVT::v16i8) {
6966
      SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
6967
      return;
6968
    } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
6969
      SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
6970
      return;
6971
    } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
6972
      SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
6973
      return;
6974
    } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
6975
      SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
6976
      return;
6977
    } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
6978
      SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
6979
      return;
6980
    } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
6981
      SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
6982
      return;
6983
    } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
6984
      SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
6985
      return;
6986
    }
6987
    break;
6988
  }
6989
  case AArch64ISD::ST2LANEpost: {
6990
    VT = Node->getOperand(1).getValueType();
6991
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
6992
      SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
6993
      return;
6994
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
6995
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
6996
      SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
6997
      return;
6998
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
6999
               VT == MVT::v2f32) {
7000
      SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
7001
      return;
7002
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7003
               VT == MVT::v1f64) {
7004
      SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
7005
      return;
7006
    }
7007
    break;
7008
  }
7009
  case AArch64ISD::ST3LANEpost: {
7010
    VT = Node->getOperand(1).getValueType();
7011
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7012
      SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
7013
      return;
7014
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7015
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7016
      SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
7017
      return;
7018
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7019
               VT == MVT::v2f32) {
7020
      SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
7021
      return;
7022
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7023
               VT == MVT::v1f64) {
7024
      SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
7025
      return;
7026
    }
7027
    break;
7028
  }
7029
  case AArch64ISD::ST4LANEpost: {
7030
    VT = Node->getOperand(1).getValueType();
7031
    if (VT == MVT::v16i8 || VT == MVT::v8i8) {
7032
      SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
7033
      return;
7034
    } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
7035
               VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
7036
      SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
7037
      return;
7038
    } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
7039
               VT == MVT::v2f32) {
7040
      SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
7041
      return;
7042
    } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
7043
               VT == MVT::v1f64) {
7044
      SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
7045
      return;
7046
    }
7047
    break;
7048
  }
7049
  case AArch64ISD::SVE_LD2_MERGE_ZERO: {
7050
    if (VT == MVT::nxv16i8) {
7051
      SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
7052
      return;
7053
    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7054
               VT == MVT::nxv8bf16) {
7055
      SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
7056
      return;
7057
    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7058
      SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
7059
      return;
7060
    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7061
      SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
7062
      return;
7063
    }
7064
    break;
7065
  }
7066
  case AArch64ISD::SVE_LD3_MERGE_ZERO: {
7067
    if (VT == MVT::nxv16i8) {
7068
      SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
7069
      return;
7070
    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7071
               VT == MVT::nxv8bf16) {
7072
      SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
7073
      return;
7074
    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7075
      SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
7076
      return;
7077
    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7078
      SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
7079
      return;
7080
    }
7081
    break;
7082
  }
7083
  case AArch64ISD::SVE_LD4_MERGE_ZERO: {
7084
    if (VT == MVT::nxv16i8) {
7085
      SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
7086
      return;
7087
    } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
7088
               VT == MVT::nxv8bf16) {
7089
      SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
7090
      return;
7091
    } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
7092
      SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
7093
      return;
7094
    } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
7095
      SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
7096
      return;
7097
    }
7098
    break;
7099
  }
7100
  }
7101

7102
  // Select the default instruction
7103
  SelectCode(Node);
7104
}
7105

7106
/// createAArch64ISelDag - This pass converts a legalized DAG into a
7107
/// AArch64-specific DAG, ready for instruction scheduling.
7108
FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
7109
                                         CodeGenOptLevel OptLevel) {
7110
  return new AArch64DAGToDAGISelLegacy(TM, OptLevel);
7111
}
7112

7113
/// When \p PredVT is a scalable vector predicate in the form
7114
/// MVT::nx<M>xi1, it builds the correspondent scalable vector of
7115
/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
7116
/// structured vectors (NumVec >1), the output data type is
7117
/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
7118
/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
7119
/// EVT.
7120
static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
7121
                                                unsigned NumVec) {
7122
  assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
7123
  if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
7124
    return EVT();
7125

7126
  if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
7127
      PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
7128
    return EVT();
7129

7130
  ElementCount EC = PredVT.getVectorElementCount();
7131
  EVT ScalarVT =
7132
      EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
7133
  EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
7134

7135
  return MemVT;
7136
}
7137

7138
/// Return the EVT of the data associated to a memory operation in \p
7139
/// Root. If such EVT cannot be retrived, it returns an invalid EVT.
7140
static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
7141
  if (isa<MemSDNode>(Root))
7142
    return cast<MemSDNode>(Root)->getMemoryVT();
7143

7144
  if (isa<MemIntrinsicSDNode>(Root))
7145
    return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
7146

7147
  const unsigned Opcode = Root->getOpcode();
7148
  // For custom ISD nodes, we have to look at them individually to extract the
7149
  // type of the data moved to/from memory.
7150
  switch (Opcode) {
7151
  case AArch64ISD::LD1_MERGE_ZERO:
7152
  case AArch64ISD::LD1S_MERGE_ZERO:
7153
  case AArch64ISD::LDNF1_MERGE_ZERO:
7154
  case AArch64ISD::LDNF1S_MERGE_ZERO:
7155
    return cast<VTSDNode>(Root->getOperand(3))->getVT();
7156
  case AArch64ISD::ST1_PRED:
7157
    return cast<VTSDNode>(Root->getOperand(4))->getVT();
7158
  case AArch64ISD::SVE_LD2_MERGE_ZERO:
7159
    return getPackedVectorTypeFromPredicateType(
7160
        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
7161
  case AArch64ISD::SVE_LD3_MERGE_ZERO:
7162
    return getPackedVectorTypeFromPredicateType(
7163
        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
7164
  case AArch64ISD::SVE_LD4_MERGE_ZERO:
7165
    return getPackedVectorTypeFromPredicateType(
7166
        Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
7167
  default:
7168
    break;
7169
  }
7170

7171
  if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)
7172
    return EVT();
7173

7174
  switch (Root->getConstantOperandVal(1)) {
7175
  default:
7176
    return EVT();
7177
  case Intrinsic::aarch64_sme_ldr:
7178
  case Intrinsic::aarch64_sme_str:
7179
    return MVT::nxv16i8;
7180
  case Intrinsic::aarch64_sve_prf:
7181
    // We are using an SVE prefetch intrinsic. Type must be inferred from the
7182
    // width of the predicate.
7183
    return getPackedVectorTypeFromPredicateType(
7184
        Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
7185
  case Intrinsic::aarch64_sve_ld2_sret:
7186
  case Intrinsic::aarch64_sve_ld2q_sret:
7187
    return getPackedVectorTypeFromPredicateType(
7188
        Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);
7189
  case Intrinsic::aarch64_sve_st2q:
7190
    return getPackedVectorTypeFromPredicateType(
7191
        Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);
7192
  case Intrinsic::aarch64_sve_ld3_sret:
7193
  case Intrinsic::aarch64_sve_ld3q_sret:
7194
    return getPackedVectorTypeFromPredicateType(
7195
        Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);
7196
  case Intrinsic::aarch64_sve_st3q:
7197
    return getPackedVectorTypeFromPredicateType(
7198
        Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);
7199
  case Intrinsic::aarch64_sve_ld4_sret:
7200
  case Intrinsic::aarch64_sve_ld4q_sret:
7201
    return getPackedVectorTypeFromPredicateType(
7202
        Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);
7203
  case Intrinsic::aarch64_sve_st4q:
7204
    return getPackedVectorTypeFromPredicateType(
7205
        Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
7206
  case Intrinsic::aarch64_sve_ld1udq:
7207
  case Intrinsic::aarch64_sve_st1dq:
7208
    return EVT(MVT::nxv1i64);
7209
  case Intrinsic::aarch64_sve_ld1uwq:
7210
  case Intrinsic::aarch64_sve_st1wq:
7211
    return EVT(MVT::nxv1i32);
7212
  }
7213
}
7214

7215
/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
7216
/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
7217
/// where Root is the memory access using N for its address.
7218
template <int64_t Min, int64_t Max>
7219
bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
7220
                                                   SDValue &Base,
7221
                                                   SDValue &OffImm) {
7222
  const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
7223
  const DataLayout &DL = CurDAG->getDataLayout();
7224
  const MachineFrameInfo &MFI = MF->getFrameInfo();
7225

7226
  if (N.getOpcode() == ISD::FrameIndex) {
7227
    int FI = cast<FrameIndexSDNode>(N)->getIndex();
7228
    // We can only encode VL scaled offsets, so only fold in frame indexes
7229
    // referencing SVE objects.
7230
    if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
7231
      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7232
      OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7233
      return true;
7234
    }
7235

7236
    return false;
7237
  }
7238

7239
  if (MemVT == EVT())
7240
    return false;
7241

7242
  if (N.getOpcode() != ISD::ADD)
7243
    return false;
7244

7245
  SDValue VScale = N.getOperand(1);
7246
  if (VScale.getOpcode() != ISD::VSCALE)
7247
    return false;
7248

7249
  TypeSize TS = MemVT.getSizeInBits();
7250
  int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;
7251
  int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
7252

7253
  if ((MulImm % MemWidthBytes) != 0)
7254
    return false;
7255

7256
  int64_t Offset = MulImm / MemWidthBytes;
7257
  if (Offset < Min || Offset > Max)
7258
    return false;
7259

7260
  Base = N.getOperand(0);
7261
  if (Base.getOpcode() == ISD::FrameIndex) {
7262
    int FI = cast<FrameIndexSDNode>(Base)->getIndex();
7263
    // We can only encode VL scaled offsets, so only fold in frame indexes
7264
    // referencing SVE objects.
7265
    if (MFI.getStackID(FI) == TargetStackID::ScalableVector)
7266
      Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
7267
  }
7268

7269
  OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
7270
  return true;
7271
}
7272

7273
/// Select register plus register addressing mode for SVE, with scaled
7274
/// offset.
7275
bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
7276
                                                  SDValue &Base,
7277
                                                  SDValue &Offset) {
7278
  if (N.getOpcode() != ISD::ADD)
7279
    return false;
7280

7281
  // Process an ADD node.
7282
  const SDValue LHS = N.getOperand(0);
7283
  const SDValue RHS = N.getOperand(1);
7284

7285
  // 8 bit data does not come with the SHL node, so it is treated
7286
  // separately.
7287
  if (Scale == 0) {
7288
    Base = LHS;
7289
    Offset = RHS;
7290
    return true;
7291
  }
7292

7293
  if (auto C = dyn_cast<ConstantSDNode>(RHS)) {
7294
    int64_t ImmOff = C->getSExtValue();
7295
    unsigned Size = 1 << Scale;
7296

7297
    // To use the reg+reg addressing mode, the immediate must be a multiple of
7298
    // the vector element's byte size.
7299
    if (ImmOff % Size)
7300
      return false;
7301

7302
    SDLoc DL(N);
7303
    Base = LHS;
7304
    Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);
7305
    SDValue Ops[] = {Offset};
7306
    SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
7307
    Offset = SDValue(MI, 0);
7308
    return true;
7309
  }
7310

7311
  // Check if the RHS is a shift node with a constant.
7312
  if (RHS.getOpcode() != ISD::SHL)
7313
    return false;
7314

7315
  const SDValue ShiftRHS = RHS.getOperand(1);
7316
  if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
7317
    if (C->getZExtValue() == Scale) {
7318
      Base = LHS;
7319
      Offset = RHS.getOperand(0);
7320
      return true;
7321
    }
7322

7323
  return false;
7324
}
7325

7326
bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
7327
  const AArch64TargetLowering *TLI =
7328
      static_cast<const AArch64TargetLowering *>(getTargetLowering());
7329

7330
  return TLI->isAllActivePredicate(*CurDAG, N);
7331
}
7332

7333
bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {
7334
  EVT VT = N.getValueType();
7335
  return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;
7336
}
7337

7338
bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,
7339
                                             SDValue &Base, SDValue &Offset,
7340
                                             unsigned Scale) {
7341
  // Try to untangle an ADD node into a 'reg + offset'
7342
  if (N.getOpcode() == ISD::ADD)
7343
    if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
7344
      int64_t ImmOff = C->getSExtValue();
7345
      if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {
7346
        Base = N.getOperand(0);
7347
        Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);
7348
        return true;
7349
      }
7350
    }
7351

7352
  // By default, just match reg + 0.
7353
  Base = N;
7354
  Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);
7355
  return true;
7356
}
7357

7358
Product

Resources

Company