CoCalc -- AArch64ISelLowering.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
³⁵²⁶⁹ views
1
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16

17
#include "AArch64.h"
18
#include "Utils/AArch64SMEAttributes.h"
19
#include "llvm/CodeGen/CallingConvLower.h"
20
#include "llvm/CodeGen/MachineFunction.h"
21
#include "llvm/CodeGen/SelectionDAG.h"
22
#include "llvm/CodeGen/TargetLowering.h"
23
#include "llvm/IR/CallingConv.h"
24
#include "llvm/IR/Instruction.h"
25

26
namespace llvm {
27

28
namespace AArch64ISD {
29

30
// For predicated nodes where the result is a vector, the operation is
31
// controlled by a governing predicate and the inactive lanes are explicitly
32
// defined with a value, please stick the following naming convention:
33
//
34
//    _MERGE_OP<n>        The result value is a vector with inactive lanes equal
35
//                        to source operand OP<n>.
36
//
37
//    _MERGE_ZERO         The result value is a vector with inactive lanes
38
//                        actively zeroed.
39
//
40
//    _MERGE_PASSTHRU     The result value is a vector with inactive lanes equal
41
//                        to the last source operand which only purpose is being
42
//                        a passthru value.
43
//
44
// For other cases where no explicit action is needed to set the inactive lanes,
45
// or when the result is not a vector and it is needed or helpful to
46
// distinguish a node from similar unpredicated nodes, use:
47
//
48
//    _PRED
49
//
50
enum NodeType : unsigned {
51
  FIRST_NUMBER = ISD::BUILTIN_OP_END,
52
  WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53
  CALL,         // Function call.
54

55
  // Pseudo for a OBJC call that gets emitted together with a special `mov
56
  // x29, x29` marker instruction.
57
  CALL_RVMARKER,
58

59
  CALL_BTI, // Function call followed by a BTI instruction.
60

61
  // Function call, authenticating the callee value first:
62
  // AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
63
  AUTH_CALL,
64
  // AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
65
  // operands.
66
  AUTH_TC_RETURN,
67

68
  // Authenticated variant of CALL_RVMARKER.
69
  AUTH_CALL_RVMARKER,
70

71
  COALESCER_BARRIER,
72

73
  VG_SAVE,
74
  VG_RESTORE,
75

76
  SMSTART,
77
  SMSTOP,
78
  RESTORE_ZA,
79
  RESTORE_ZT,
80
  SAVE_ZT,
81

82
  // A call with the callee in x16, i.e. "blr x16".
83
  CALL_ARM64EC_TO_X64,
84

85
  // Produces the full sequence of instructions for getting the thread pointer
86
  // offset of a variable into X0, using the TLSDesc model.
87
  TLSDESC_CALLSEQ,
88
  ADRP,     // Page address of a TargetGlobalAddress operand.
89
  ADR,      // ADR
90
  ADDlow,   // Add the low 12 bits of a TargetGlobalAddress operand.
91
  LOADgot,  // Load from automatically generated descriptor (e.g. Global
92
            // Offset Table, TLS record).
93
  RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
94
  BRCOND,   // Conditional branch instruction; "b.cond".
95
  CSEL,
96
  CSINV, // Conditional select invert.
97
  CSNEG, // Conditional select negate.
98
  CSINC, // Conditional select increment.
99

100
  // Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
101
  // ELF.
102
  THREAD_POINTER,
103
  ADC,
104
  SBC, // adc, sbc instructions
105

106
  // To avoid stack clash, allocation is performed by block and each block is
107
  // probed.
108
  PROBED_ALLOCA,
109

110
  // Predicated instructions where inactive lanes produce undefined results.
111
  ABDS_PRED,
112
  ABDU_PRED,
113
  FADD_PRED,
114
  FDIV_PRED,
115
  FMA_PRED,
116
  FMAX_PRED,
117
  FMAXNM_PRED,
118
  FMIN_PRED,
119
  FMINNM_PRED,
120
  FMUL_PRED,
121
  FSUB_PRED,
122
  HADDS_PRED,
123
  HADDU_PRED,
124
  MUL_PRED,
125
  MULHS_PRED,
126
  MULHU_PRED,
127
  RHADDS_PRED,
128
  RHADDU_PRED,
129
  SDIV_PRED,
130
  SHL_PRED,
131
  SMAX_PRED,
132
  SMIN_PRED,
133
  SRA_PRED,
134
  SRL_PRED,
135
  UDIV_PRED,
136
  UMAX_PRED,
137
  UMIN_PRED,
138

139
  // Unpredicated vector instructions
140
  BIC,
141

142
  SRAD_MERGE_OP1,
143

144
  // Predicated instructions with the result of inactive lanes provided by the
145
  // last operand.
146
  FABS_MERGE_PASSTHRU,
147
  FCEIL_MERGE_PASSTHRU,
148
  FFLOOR_MERGE_PASSTHRU,
149
  FNEARBYINT_MERGE_PASSTHRU,
150
  FNEG_MERGE_PASSTHRU,
151
  FRECPX_MERGE_PASSTHRU,
152
  FRINT_MERGE_PASSTHRU,
153
  FROUND_MERGE_PASSTHRU,
154
  FROUNDEVEN_MERGE_PASSTHRU,
155
  FSQRT_MERGE_PASSTHRU,
156
  FTRUNC_MERGE_PASSTHRU,
157
  FP_ROUND_MERGE_PASSTHRU,
158
  FP_EXTEND_MERGE_PASSTHRU,
159
  UINT_TO_FP_MERGE_PASSTHRU,
160
  SINT_TO_FP_MERGE_PASSTHRU,
161
  FCVTZU_MERGE_PASSTHRU,
162
  FCVTZS_MERGE_PASSTHRU,
163
  SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164
  ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165
  ABS_MERGE_PASSTHRU,
166
  NEG_MERGE_PASSTHRU,
167

168
  SETCC_MERGE_ZERO,
169

170
  // Arithmetic instructions which write flags.
171
  ADDS,
172
  SUBS,
173
  ADCS,
174
  SBCS,
175
  ANDS,
176

177
  // Conditional compares. Operands: left,right,falsecc,cc,flags
178
  CCMP,
179
  CCMN,
180
  FCCMP,
181

182
  // Floating point comparison
183
  FCMP,
184

185
  // Scalar-to-vector duplication
186
  DUP,
187
  DUPLANE8,
188
  DUPLANE16,
189
  DUPLANE32,
190
  DUPLANE64,
191
  DUPLANE128,
192

193
  // Vector immedate moves
194
  MOVI,
195
  MOVIshift,
196
  MOVIedit,
197
  MOVImsl,
198
  FMOV,
199
  MVNIshift,
200
  MVNImsl,
201

202
  // Vector immediate ops
203
  BICi,
204
  ORRi,
205

206
  // Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207
  // element must be identical.
208
  BSP,
209

210
  // Vector shuffles
211
  ZIP1,
212
  ZIP2,
213
  UZP1,
214
  UZP2,
215
  TRN1,
216
  TRN2,
217
  REV16,
218
  REV32,
219
  REV64,
220
  EXT,
221
  SPLICE,
222

223
  // Vector shift by scalar
224
  VSHL,
225
  VLSHR,
226
  VASHR,
227

228
  // Vector shift by scalar (again)
229
  SQSHL_I,
230
  UQSHL_I,
231
  SQSHLU_I,
232
  SRSHR_I,
233
  URSHR_I,
234
  URSHR_I_PRED,
235

236
  // Vector narrowing shift by immediate (bottom)
237
  RSHRNB_I,
238

239
  // Vector shift by constant and insert
240
  VSLI,
241
  VSRI,
242

243
  // Vector comparisons
244
  CMEQ,
245
  CMGE,
246
  CMGT,
247
  CMHI,
248
  CMHS,
249
  FCMEQ,
250
  FCMGE,
251
  FCMGT,
252

253
  // Vector zero comparisons
254
  CMEQz,
255
  CMGEz,
256
  CMGTz,
257
  CMLEz,
258
  CMLTz,
259
  FCMEQz,
260
  FCMGEz,
261
  FCMGTz,
262
  FCMLEz,
263
  FCMLTz,
264

265
  // Round wide FP to narrow FP with inexact results to odd.
266
  FCVTXN,
267

268
  // Vector across-lanes addition
269
  // Only the lower result lane is defined.
270
  SADDV,
271
  UADDV,
272

273
  // Unsigned sum Long across Vector
274
  UADDLV,
275
  SADDLV,
276

277
  // Add Pairwise of two vectors
278
  ADDP,
279
  // Add Long Pairwise
280
  SADDLP,
281
  UADDLP,
282

283
  // udot/sdot instructions
284
  UDOT,
285
  SDOT,
286

287
  // Vector across-lanes min/max
288
  // Only the lower result lane is defined.
289
  SMINV,
290
  UMINV,
291
  SMAXV,
292
  UMAXV,
293

294
  SADDV_PRED,
295
  UADDV_PRED,
296
  SMAXV_PRED,
297
  UMAXV_PRED,
298
  SMINV_PRED,
299
  UMINV_PRED,
300
  ORV_PRED,
301
  EORV_PRED,
302
  ANDV_PRED,
303

304
  // Compare-and-branch
305
  CBZ,
306
  CBNZ,
307
  TBZ,
308
  TBNZ,
309

310
  // Tail calls
311
  TC_RETURN,
312

313
  // Custom prefetch handling
314
  PREFETCH,
315

316
  // {s|u}int to FP within a FP register.
317
  SITOF,
318
  UITOF,
319

320
  /// Natural vector cast. ISD::BITCAST is not natural in the big-endian
321
  /// world w.r.t vectors; which causes additional REV instructions to be
322
  /// generated to compensate for the byte-swapping. But sometimes we do
323
  /// need to re-interpret the data in SIMD vector registers in big-endian
324
  /// mode without emitting such REV instructions.
325
  NVCAST,
326

327
  MRS, // MRS, also sets the flags via a glue.
328

329
  SMULL,
330
  UMULL,
331

332
  PMULL,
333

334
  // Reciprocal estimates and steps.
335
  FRECPE,
336
  FRECPS,
337
  FRSQRTE,
338
  FRSQRTS,
339

340
  SUNPKHI,
341
  SUNPKLO,
342
  UUNPKHI,
343
  UUNPKLO,
344

345
  CLASTA_N,
346
  CLASTB_N,
347
  LASTA,
348
  LASTB,
349
  TBL,
350

351
  // Floating-point reductions.
352
  FADDA_PRED,
353
  FADDV_PRED,
354
  FMAXV_PRED,
355
  FMAXNMV_PRED,
356
  FMINV_PRED,
357
  FMINNMV_PRED,
358

359
  INSR,
360
  PTEST,
361
  PTEST_ANY,
362
  PTRUE,
363

364
  CTTZ_ELTS,
365

366
  BITREVERSE_MERGE_PASSTHRU,
367
  BSWAP_MERGE_PASSTHRU,
368
  REVH_MERGE_PASSTHRU,
369
  REVW_MERGE_PASSTHRU,
370
  CTLZ_MERGE_PASSTHRU,
371
  CTPOP_MERGE_PASSTHRU,
372
  DUP_MERGE_PASSTHRU,
373
  INDEX_VECTOR,
374

375
  // Cast between vectors of the same element type but differ in length.
376
  REINTERPRET_CAST,
377

378
  // Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
379
  LS64_BUILD,
380
  LS64_EXTRACT,
381

382
  LD1_MERGE_ZERO,
383
  LD1S_MERGE_ZERO,
384
  LDNF1_MERGE_ZERO,
385
  LDNF1S_MERGE_ZERO,
386
  LDFF1_MERGE_ZERO,
387
  LDFF1S_MERGE_ZERO,
388
  LD1RQ_MERGE_ZERO,
389
  LD1RO_MERGE_ZERO,
390

391
  // Structured loads.
392
  SVE_LD2_MERGE_ZERO,
393
  SVE_LD3_MERGE_ZERO,
394
  SVE_LD4_MERGE_ZERO,
395

396
  // Unsigned gather loads.
397
  GLD1_MERGE_ZERO,
398
  GLD1_SCALED_MERGE_ZERO,
399
  GLD1_UXTW_MERGE_ZERO,
400
  GLD1_SXTW_MERGE_ZERO,
401
  GLD1_UXTW_SCALED_MERGE_ZERO,
402
  GLD1_SXTW_SCALED_MERGE_ZERO,
403
  GLD1_IMM_MERGE_ZERO,
404
  GLD1Q_MERGE_ZERO,
405
  GLD1Q_INDEX_MERGE_ZERO,
406

407
  // Signed gather loads
408
  GLD1S_MERGE_ZERO,
409
  GLD1S_SCALED_MERGE_ZERO,
410
  GLD1S_UXTW_MERGE_ZERO,
411
  GLD1S_SXTW_MERGE_ZERO,
412
  GLD1S_UXTW_SCALED_MERGE_ZERO,
413
  GLD1S_SXTW_SCALED_MERGE_ZERO,
414
  GLD1S_IMM_MERGE_ZERO,
415

416
  // Unsigned gather loads.
417
  GLDFF1_MERGE_ZERO,
418
  GLDFF1_SCALED_MERGE_ZERO,
419
  GLDFF1_UXTW_MERGE_ZERO,
420
  GLDFF1_SXTW_MERGE_ZERO,
421
  GLDFF1_UXTW_SCALED_MERGE_ZERO,
422
  GLDFF1_SXTW_SCALED_MERGE_ZERO,
423
  GLDFF1_IMM_MERGE_ZERO,
424

425
  // Signed gather loads.
426
  GLDFF1S_MERGE_ZERO,
427
  GLDFF1S_SCALED_MERGE_ZERO,
428
  GLDFF1S_UXTW_MERGE_ZERO,
429
  GLDFF1S_SXTW_MERGE_ZERO,
430
  GLDFF1S_UXTW_SCALED_MERGE_ZERO,
431
  GLDFF1S_SXTW_SCALED_MERGE_ZERO,
432
  GLDFF1S_IMM_MERGE_ZERO,
433

434
  // Non-temporal gather loads
435
  GLDNT1_MERGE_ZERO,
436
  GLDNT1_INDEX_MERGE_ZERO,
437
  GLDNT1S_MERGE_ZERO,
438

439
  // Contiguous masked store.
440
  ST1_PRED,
441

442
  // Scatter store
443
  SST1_PRED,
444
  SST1_SCALED_PRED,
445
  SST1_UXTW_PRED,
446
  SST1_SXTW_PRED,
447
  SST1_UXTW_SCALED_PRED,
448
  SST1_SXTW_SCALED_PRED,
449
  SST1_IMM_PRED,
450
  SST1Q_PRED,
451
  SST1Q_INDEX_PRED,
452

453
  // Non-temporal scatter store
454
  SSTNT1_PRED,
455
  SSTNT1_INDEX_PRED,
456

457
  // SME
458
  RDSVL,
459
  REVD_MERGE_PASSTHRU,
460
  ALLOCATE_ZA_BUFFER,
461
  INIT_TPIDR2OBJ,
462

463
  // Asserts that a function argument (i32) is zero-extended to i8 by
464
  // the caller
465
  ASSERT_ZEXT_BOOL,
466

467
  // 128-bit system register accesses
468
  // lo64, hi64, chain = MRRS(chain, sysregname)
469
  MRRS,
470
  // chain = MSRR(chain, sysregname, lo64, hi64)
471
  MSRR,
472

473
  // Strict (exception-raising) floating point comparison
474
  STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
475
  STRICT_FCMPE,
476

477
  // SME ZA loads and stores
478
  SME_ZA_LDR,
479
  SME_ZA_STR,
480

481
  // NEON Load/Store with post-increment base updates
482
  LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
483
  LD3post,
484
  LD4post,
485
  ST2post,
486
  ST3post,
487
  ST4post,
488
  LD1x2post,
489
  LD1x3post,
490
  LD1x4post,
491
  ST1x2post,
492
  ST1x3post,
493
  ST1x4post,
494
  LD1DUPpost,
495
  LD2DUPpost,
496
  LD3DUPpost,
497
  LD4DUPpost,
498
  LD1LANEpost,
499
  LD2LANEpost,
500
  LD3LANEpost,
501
  LD4LANEpost,
502
  ST2LANEpost,
503
  ST3LANEpost,
504
  ST4LANEpost,
505

506
  STG,
507
  STZG,
508
  ST2G,
509
  STZ2G,
510

511
  LDP,
512
  LDIAPP,
513
  LDNP,
514
  STP,
515
  STILP,
516
  STNP,
517

518
  // Memory Operations
519
  MOPS_MEMSET,
520
  MOPS_MEMSET_TAGGING,
521
  MOPS_MEMCOPY,
522
  MOPS_MEMMOVE,
523
};
524

525
} // end namespace AArch64ISD
526

527
namespace AArch64 {
528
/// Possible values of current rounding mode, which is specified in bits
529
/// 23:22 of FPCR.
530
enum Rounding {
531
  RN = 0,    // Round to Nearest
532
  RP = 1,    // Round towards Plus infinity
533
  RM = 2,    // Round towards Minus infinity
534
  RZ = 3,    // Round towards Zero
535
  rmMask = 3 // Bit mask selecting rounding mode
536
};
537

538
// Bit position of rounding mode bits in FPCR.
539
const unsigned RoundingBitsPos = 22;
540

541
// Reserved bits should be preserved when modifying FPCR.
542
const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
543

544
// Registers used to pass function arguments.
545
ArrayRef<MCPhysReg> getGPRArgRegs();
546
ArrayRef<MCPhysReg> getFPRArgRegs();
547

548
/// Maximum allowed number of unprobed bytes above SP at an ABI
549
/// boundary.
550
const unsigned StackProbeMaxUnprobedStack = 1024;
551

552
/// Maximum number of iterations to unroll for a constant size probing loop.
553
const unsigned StackProbeMaxLoopUnroll = 4;
554

555
} // namespace AArch64
556

557
class AArch64Subtarget;
558

559
class AArch64TargetLowering : public TargetLowering {
560
public:
561
  explicit AArch64TargetLowering(const TargetMachine &TM,
562
                                 const AArch64Subtarget &STI);
563

564
  /// Control the following reassociation of operands: (op (op x, c1), y) -> (op
565
  /// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
566
  bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
567
                           SDValue N1) const override;
568

569
  /// Selects the correct CCAssignFn for a given CallingConvention value.
570
  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
571

572
  /// Selects the correct CCAssignFn for a given CallingConvention value.
573
  CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
574

575
  /// Determine which of the bits specified in Mask are known to be either zero
576
  /// or one and return them in the KnownZero/KnownOne bitsets.
577
  void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
578
                                     const APInt &DemandedElts,
579
                                     const SelectionDAG &DAG,
580
                                     unsigned Depth = 0) const override;
581

582
  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
583
                                           const APInt &DemandedElts,
584
                                           const SelectionDAG &DAG,
585
                                           unsigned Depth) const override;
586

587
  MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
588
    // Returning i64 unconditionally here (i.e. even for ILP32) means that the
589
    // *DAG* representation of pointers will always be 64-bits. They will be
590
    // truncated and extended when transferred to memory, but the 64-bit DAG
591
    // allows us to use AArch64's addressing modes much more easily.
592
    return MVT::getIntegerVT(64);
593
  }
594

595
  bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
596
                                    const APInt &DemandedElts,
597
                                    TargetLoweringOpt &TLO) const override;
598

599
  MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
600

601
  /// Returns true if the target allows unaligned memory accesses of the
602
  /// specified type.
603
  bool allowsMisalignedMemoryAccesses(
604
      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
605
      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
606
      unsigned *Fast = nullptr) const override;
607
  /// LLT variant.
608
  bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
609
                                      Align Alignment,
610
                                      MachineMemOperand::Flags Flags,
611
                                      unsigned *Fast = nullptr) const override;
612

613
  /// Provide custom lowering hooks for some operations.
614
  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
615

616
  const char *getTargetNodeName(unsigned Opcode) const override;
617

618
  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
619

620
  /// This method returns a target specific FastISel object, or null if the
621
  /// target does not support "fast" ISel.
622
  FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
623
                           const TargetLibraryInfo *libInfo) const override;
624

625
  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
626

627
  bool isFPImmLegal(const APFloat &Imm, EVT VT,
628
                    bool ForCodeSize) const override;
629

630
  /// Return true if the given shuffle mask can be codegen'd directly, or if it
631
  /// should be stack expanded.
632
  bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
633

634
  /// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
635
  /// shuffle mask can be codegen'd directly.
636
  bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
637

638
  /// Return the ISD::SETCC ValueType.
639
  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
640
                         EVT VT) const override;
641

642
  SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
643

644
  MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
645
                                  MachineBasicBlock *BB) const;
646

647
  MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
648
                                           MachineBasicBlock *BB) const;
649

650
  MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
651
                                            MachineBasicBlock *MBB) const;
652

653
  MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654
                                  MachineInstr &MI,
655
                                  MachineBasicBlock *BB) const;
656
  MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657
  MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658
                                 MachineInstr &MI, MachineBasicBlock *BB) const;
659
  MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
660
                                 unsigned Opcode, bool Op0IsDef) const;
661
  MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
662
  MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
663
                                          MachineBasicBlock *BB) const;
664
  MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
665
                                          MachineBasicBlock *BB) const;
666

667
  MachineBasicBlock *
668
  EmitInstrWithCustomInserter(MachineInstr &MI,
669
                              MachineBasicBlock *MBB) const override;
670

671
  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
672
                          MachineFunction &MF,
673
                          unsigned Intrinsic) const override;
674

675
  bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
676
                             EVT NewVT) const override;
677

678
  bool shouldRemoveRedundantExtend(SDValue Op) const override;
679

680
  bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
681
  bool isTruncateFree(EVT VT1, EVT VT2) const override;
682

683
  bool isProfitableToHoist(Instruction *I) const override;
684

685
  bool isZExtFree(Type *Ty1, Type *Ty2) const override;
686
  bool isZExtFree(EVT VT1, EVT VT2) const override;
687
  bool isZExtFree(SDValue Val, EVT VT2) const override;
688

689
  bool shouldSinkOperands(Instruction *I,
690
                          SmallVectorImpl<Use *> &Ops) const override;
691

692
  bool optimizeExtendOrTruncateConversion(
693
      Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
694

695
  bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
696

697
  unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
698

699
  bool lowerInterleavedLoad(LoadInst *LI,
700
                            ArrayRef<ShuffleVectorInst *> Shuffles,
701
                            ArrayRef<unsigned> Indices,
702
                            unsigned Factor) const override;
703
  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
704
                             unsigned Factor) const override;
705

706
  bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707
                                        LoadInst *LI) const override;
708

709
  bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710
                                       StoreInst *SI) const override;
711

712
  bool isLegalAddImmediate(int64_t) const override;
713
  bool isLegalAddScalableImmediate(int64_t) const override;
714
  bool isLegalICmpImmediate(int64_t) const override;
715

716
  bool isMulAddWithConstProfitable(SDValue AddNode,
717
                                   SDValue ConstNode) const override;
718

719
  bool shouldConsiderGEPOffsetSplit() const override;
720

721
  EVT getOptimalMemOpType(const MemOp &Op,
722
                          const AttributeList &FuncAttributes) const override;
723

724
  LLT getOptimalMemOpLLT(const MemOp &Op,
725
                         const AttributeList &FuncAttributes) const override;
726

727
  /// Return true if the addressing mode represented by AM is legal for this
728
  /// target, for a load/store of the specified type.
729
  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
730
                             unsigned AS,
731
                             Instruction *I = nullptr) const override;
732

733
  int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
734
                                         int64_t MaxOffset) const override;
735

736
  /// Return true if an FMA operation is faster than a pair of fmul and fadd
737
  /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
738
  /// returns true, otherwise fmuladd is expanded to fmul + fadd.
739
  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
740
                                  EVT VT) const override;
741
  bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
742

743
  bool generateFMAsInMachineCombiner(EVT VT,
744
                                     CodeGenOptLevel OptLevel) const override;
745

746
  const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747
  ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748

749
  /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
750
  bool isDesirableToCommuteWithShift(const SDNode *N,
751
                                     CombineLevel Level) const override;
752

753
  bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
754
    return false;
755
  }
756

757
  /// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
758
  bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
759

760
  /// Return true if it is profitable to fold a pair of shifts into a mask.
761
  bool shouldFoldConstantShiftPairToMask(const SDNode *N,
762
                                         CombineLevel Level) const override;
763

764
  bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
765
                                            EVT VT) const override;
766

767
  /// Returns true if it is beneficial to convert a load of a constant
768
  /// to just the constant itself.
769
  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
770
                                         Type *Ty) const override;
771

772
  /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
773
  /// with this index.
774
  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
775
                               unsigned Index) const override;
776

777
  bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
778
                            bool MathUsed) const override {
779
    // Using overflow ops for overflow checks only should beneficial on
780
    // AArch64.
781
    return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
782
  }
783

784
  Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
785
                        AtomicOrdering Ord) const override;
786
  Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
787
                              AtomicOrdering Ord) const override;
788

789
  void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
790

791
  bool isOpSuitableForLDPSTP(const Instruction *I) const;
792
  bool isOpSuitableForLSE128(const Instruction *I) const;
793
  bool isOpSuitableForRCPC3(const Instruction *I) const;
794
  bool shouldInsertFencesForAtomic(const Instruction *I) const override;
795
  bool
796
  shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
797

798
  TargetLoweringBase::AtomicExpansionKind
799
  shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
800
  TargetLoweringBase::AtomicExpansionKind
801
  shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
802
  TargetLoweringBase::AtomicExpansionKind
803
  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
804

805
  TargetLoweringBase::AtomicExpansionKind
806
  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
807

808
  bool useLoadStackGuardNode() const override;
809
  TargetLoweringBase::LegalizeTypeAction
810
  getPreferredVectorAction(MVT VT) const override;
811

812
  /// If the target has a standard location for the stack protector cookie,
813
  /// returns the address of that location. Otherwise, returns nullptr.
814
  Value *getIRStackGuard(IRBuilderBase &IRB) const override;
815

816
  void insertSSPDeclarations(Module &M) const override;
817
  Value *getSDagStackGuard(const Module &M) const override;
818
  Function *getSSPStackGuardCheck(const Module &M) const override;
819

820
  /// If the target has a standard location for the unsafe stack pointer,
821
  /// returns the address of that location. Otherwise, returns nullptr.
822
  Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
823

824
  /// If a physical register, this returns the register that receives the
825
  /// exception address on entry to an EH pad.
826
  Register
827
  getExceptionPointerRegister(const Constant *PersonalityFn) const override {
828
    // FIXME: This is a guess. Has this been defined yet?
829
    return AArch64::X0;
830
  }
831

832
  /// If a physical register, this returns the register that receives the
833
  /// exception typeid on entry to a landing pad.
834
  Register
835
  getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
836
    // FIXME: This is a guess. Has this been defined yet?
837
    return AArch64::X1;
838
  }
839

840
  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
841

842
  bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
843
                        const MachineFunction &MF) const override {
844
    // Do not merge to float value size (128 bytes) if no implicit
845
    // float attribute is set.
846

847
    bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
848

849
    if (NoFloat)
850
      return (MemVT.getSizeInBits() <= 64);
851
    return true;
852
  }
853

854
  bool isCheapToSpeculateCttz(Type *) const override {
855
    return true;
856
  }
857

858
  bool isCheapToSpeculateCtlz(Type *) const override {
859
    return true;
860
  }
861

862
  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863

864
  bool hasAndNotCompare(SDValue V) const override {
865
    // We can use bics for any scalar.
866
    return V.getValueType().isScalarInteger();
867
  }
868

869
  bool hasAndNot(SDValue Y) const override {
870
    EVT VT = Y.getValueType();
871

872
    if (!VT.isVector())
873
      return hasAndNotCompare(Y);
874

875
    TypeSize TS = VT.getSizeInBits();
876
    // TODO: We should be able to use bic/bif too for SVE.
877
    return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
878
  }
879

880
  bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881
      SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
882
      unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883
      SelectionDAG &DAG) const override;
884

885
  ShiftLegalizationStrategy
886
  preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887
                                     unsigned ExpansionFactor) const override;
888

889
  bool shouldTransformSignedTruncationCheck(EVT XVT,
890
                                            unsigned KeptBits) const override {
891
    // For vectors, we don't have a preference..
892
    if (XVT.isVector())
893
      return false;
894

895
    auto VTIsOk = [](EVT VT) -> bool {
896
      return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
897
             VT == MVT::i64;
898
    };
899

900
    // We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901
    // XVT will be larger than KeptBitsVT.
902
    MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
903
    return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904
  }
905

906
  bool preferIncOfAddToSubOfNot(EVT VT) const override;
907

908
  bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909

910
  bool shouldExpandCmpUsingSelects() const override { return true; }
911

912
  bool isComplexDeinterleavingSupported() const override;
913
  bool isComplexDeinterleavingOperationSupported(
914
      ComplexDeinterleavingOperation Operation, Type *Ty) const override;
915

916
  Value *createComplexDeinterleavingIR(
917
      IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918
      ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
919
      Value *Accumulator = nullptr) const override;
920

921
  bool supportSplitCSR(MachineFunction *MF) const override {
922
    return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923
           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
924
  }
925
  void initializeSplitCSR(MachineBasicBlock *Entry) const override;
926
  void insertCopiesSplitCSR(
927
      MachineBasicBlock *Entry,
928
      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
929

930
  bool supportSwiftError() const override {
931
    return true;
932
  }
933

934
  bool supportPtrAuthBundles() const override { return true; }
935

936
  bool supportKCFIBundles() const override { return true; }
937

938
  MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939
                              MachineBasicBlock::instr_iterator &MBBI,
940
                              const TargetInstrInfo *TII) const override;
941

942
  /// Enable aggressive FMA fusion on targets that want it.
943
  bool enableAggressiveFMAFusion(EVT VT) const override;
944

945
  /// Returns the size of the platform's va_list object.
946
  unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947

948
  /// Returns true if \p VecTy is a legal interleaved access type. This
949
  /// function checks the vector element type and the overall width of the
950
  /// vector.
951
  bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
952
                                    bool &UseScalable) const;
953

954
  /// Returns the number of interleaved accesses that will be generated when
955
  /// lowering accesses of the given type.
956
  unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
957
                                     bool UseScalable) const;
958

959
  MachineMemOperand::Flags getTargetMMOFlags(
960
    const Instruction &I) const override;
961

962
  bool functionArgumentNeedsConsecutiveRegisters(
963
      Type *Ty, CallingConv::ID CallConv, bool isVarArg,
964
      const DataLayout &DL) const override;
965

966
  /// Used for exception handling on Win64.
967
  bool needsFixedCatchObjects() const override;
968

969
  bool fallBackToDAGISel(const Instruction &Inst) const override;
970

971
  /// SVE code generation for fixed length vectors does not custom lower
972
  /// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973
  /// merge. However, merging them creates a BUILD_VECTOR that is just as
974
  /// illegal as the original, thus leading to an infinite legalisation loop.
975
  /// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976
  /// vector types this override can be removed.
977
  bool mergeStoresAfterLegalization(EVT VT) const override;
978

979
  // If the platform/function should have a redzone, return the size in bytes.
980
  unsigned getRedZoneSize(const Function &F) const {
981
    if (F.hasFnAttribute(Attribute::NoRedZone))
982
      return 0;
983
    return 128;
984
  }
985

986
  bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987
  EVT getPromotedVTForPredicate(EVT VT) const;
988

989
  EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990
                             bool AllowUnknown = false) const override;
991

992
  bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993

994
  bool shouldExpandCttzElements(EVT VT) const override;
995

996
  /// If a change in streaming mode is required on entry to/return from a
997
  /// function call it emits and returns the corresponding SMSTART or SMSTOP
998
  /// node. \p Condition should be one of the enum values from
999
  /// AArch64SME::ToggleCondition.
1000
  SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1001
                              SDValue Chain, SDValue InGlue, unsigned Condition,
1002
                              SDValue PStateSM = SDValue()) const;
1003

1004
  bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1005

1006
  // Normally SVE is only used for byte size vectors that do not fit within a
1007
  // NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1008
  // used for 64bit and 128bit vectors as well.
1009
  bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1010

1011
  // Follow NEON ABI rules even when using SVE for fixed length vectors.
1012
  MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1013
                                    EVT VT) const override;
1014
  unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1015
                                         CallingConv::ID CC,
1016
                                         EVT VT) const override;
1017
  unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1018
                                                CallingConv::ID CC, EVT VT,
1019
                                                EVT &IntermediateVT,
1020
                                                unsigned &NumIntermediates,
1021
                                                MVT &RegisterVT) const override;
1022

1023
  /// True if stack clash protection is enabled for this functions.
1024
  bool hasInlineStackProbe(const MachineFunction &MF) const override;
1025

1026
#ifndef NDEBUG
1027
  void verifyTargetSDNode(const SDNode *N) const override;
1028
#endif
1029

1030
private:
1031
  /// Keep a pointer to the AArch64Subtarget around so that we can
1032
  /// make the right decision when generating code for different targets.
1033
  const AArch64Subtarget *Subtarget;
1034

1035
  llvm::BumpPtrAllocator BumpAlloc;
1036
  llvm::StringSaver Saver{BumpAlloc};
1037

1038
  bool isExtFreeImpl(const Instruction *Ext) const override;
1039

1040
  void addTypeForNEON(MVT VT);
1041
  void addTypeForFixedLengthSVE(MVT VT);
1042
  void addDRType(MVT VT);
1043
  void addQRType(MVT VT);
1044

1045
  bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1046

1047
  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1048
                               bool isVarArg,
1049
                               const SmallVectorImpl<ISD::InputArg> &Ins,
1050
                               const SDLoc &DL, SelectionDAG &DAG,
1051
                               SmallVectorImpl<SDValue> &InVals) const override;
1052

1053
  void AdjustInstrPostInstrSelection(MachineInstr &MI,
1054
                                     SDNode *Node) const override;
1055

1056
  SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1057
                    SmallVectorImpl<SDValue> &InVals) const override;
1058

1059
  SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1060
                          CallingConv::ID CallConv, bool isVarArg,
1061
                          const SmallVectorImpl<CCValAssign> &RVLocs,
1062
                          const SDLoc &DL, SelectionDAG &DAG,
1063
                          SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1064
                          SDValue ThisVal, bool RequiresSMChange) const;
1065

1066
  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1067
  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1068
  SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1069
  SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1070

1071
  SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1072
  SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1073

1074
  SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1075

1076
  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1077
  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1078
  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1079

1080
  bool
1081
  isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1082

1083
  /// Finds the incoming stack arguments which overlap the given fixed stack
1084
  /// object and incorporates their load into the current chain. This prevents
1085
  /// an upcoming store from clobbering the stack argument before it's used.
1086
  SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1087
                              MachineFrameInfo &MFI, int ClobberedFI) const;
1088

1089
  bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1090

1091
  void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1092
                           SDValue &Chain) const;
1093

1094
  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1095
                      bool isVarArg,
1096
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
1097
                      LLVMContext &Context) const override;
1098

1099
  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1100
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
1101
                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1102
                      SelectionDAG &DAG) const override;
1103

1104
  SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1105
                        unsigned Flag) const;
1106
  SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1107
                        unsigned Flag) const;
1108
  SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1109
                        unsigned Flag) const;
1110
  SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1111
                        unsigned Flag) const;
1112
  SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1113
                        unsigned Flag) const;
1114
  template <class NodeTy>
1115
  SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1116
  template <class NodeTy>
1117
  SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1118
  template <class NodeTy>
1119
  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1120
  template <class NodeTy>
1121
  SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1122
  SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123
  SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1124
  SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1125
  SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1126
  SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1127
  SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1128
                               const SDLoc &DL, SelectionDAG &DAG) const;
1129
  SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1130
                                 SelectionDAG &DAG) const;
1131
  SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132
  SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1133
  SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
1134
                                              AArch64PACKey::ID Key,
1135
                                              SDValue Discriminator,
1136
                                              SDValue AddrDiscriminator,
1137
                                              SelectionDAG &DAG) const;
1138
  SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1139
  SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1140
  SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1141
  SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1142
  SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1143
  SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1144
                         SDValue TVal, SDValue FVal, const SDLoc &dl,
1145
                         SelectionDAG &DAG) const;
1146
  SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147
  SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1148
  SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1149
  SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1150
  SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1151
  SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1152
  SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1153
  SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1154
  SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1155
  SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156
  SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1157
  SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1158
  SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1159
  SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1160
  SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1161
  SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1162
  SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1163
  SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1164
  SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1165
  SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1166
  SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167
  SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1168
  SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1169
  SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1170
  SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1171
  SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1172
  SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1173
  SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1174
  SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1175
                              unsigned NewOp) const;
1176
  SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1177
  SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1178
  SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1179
  SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1180
  SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1181
  SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1182
  SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1183
  SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1184
  SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1185
  SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1186
  SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1187
  SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1188
  SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1189
  SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1190
  SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1191
  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1192
  SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1193
  SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1194
  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1195
  SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1196
  SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1197
  SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198
  SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199
  SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1200
  SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1201
  SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1202
  SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1203
  SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1204
  SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1205
  SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1206
  SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1207
  SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1208
  SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1209
  SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1210
  SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1211
  SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1212
  SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1213
  SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214

1215
  SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1216

1217
  SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1218
                                               SelectionDAG &DAG) const;
1219
  SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1220
                                               SelectionDAG &DAG) const;
1221
  SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1222
  SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1223
  SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1224
  SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1225
  SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1226
                              SelectionDAG &DAG) const;
1227
  SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1228
  SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1229
  SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1230
  SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1231
                                            SelectionDAG &DAG) const;
1232
  SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1233
                                              SelectionDAG &DAG) const;
1234
  SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1235
  SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1236
  SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1237
  SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1238
                                             SelectionDAG &DAG) const;
1239
  SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1240
  SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1241
  SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1242
  SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1243
  SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1244
                                              SelectionDAG &DAG) const;
1245

1246
  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1247
                        SmallVectorImpl<SDNode *> &Created) const override;
1248
  SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1249
                        SmallVectorImpl<SDNode *> &Created) const override;
1250
  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1251
                          int &ExtraSteps, bool &UseOneConst,
1252
                          bool Reciprocal) const override;
1253
  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254
                           int &ExtraSteps) const override;
1255
  SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1256
                           const DenormalMode &Mode) const override;
1257
  SDValue getSqrtResultForDenormInput(SDValue Operand,
1258
                                      SelectionDAG &DAG) const override;
1259
  unsigned combineRepeatedFPDivisors() const override;
1260

1261
  ConstraintType getConstraintType(StringRef Constraint) const override;
1262
  Register getRegisterByName(const char* RegName, LLT VT,
1263
                             const MachineFunction &MF) const override;
1264

1265
  /// Examine constraint string and operand type and determine a weight value.
1266
  /// The operand object must already have been set up with the operand type.
1267
  ConstraintWeight
1268
  getSingleConstraintMatchWeight(AsmOperandInfo &info,
1269
                                 const char *constraint) const override;
1270

1271
  std::pair<unsigned, const TargetRegisterClass *>
1272
  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1273
                               StringRef Constraint, MVT VT) const override;
1274

1275
  const char *LowerXConstraint(EVT ConstraintVT) const override;
1276

1277
  void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1278
                                    std::vector<SDValue> &Ops,
1279
                                    SelectionDAG &DAG) const override;
1280

1281
  InlineAsm::ConstraintCode
1282
  getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1283
    if (ConstraintCode == "Q")
1284
      return InlineAsm::ConstraintCode::Q;
1285
    // FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1286
    //        followed by llvm_unreachable so we'll leave them unimplemented in
1287
    //        the backend for now.
1288
    return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1289
  }
1290

1291
  /// Handle Lowering flag assembly outputs.
1292
  SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1293
                                      const SDLoc &DL,
1294
                                      const AsmOperandInfo &Constraint,
1295
                                      SelectionDAG &DAG) const override;
1296

1297
  bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1298
  bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1299
  bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1300
  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1301
  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1302
  bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1303
                              SDValue &Offset, SelectionDAG &DAG) const;
1304
  bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1305
                                 ISD::MemIndexedMode &AM,
1306
                                 SelectionDAG &DAG) const override;
1307
  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1308
                                  SDValue &Offset, ISD::MemIndexedMode &AM,
1309
                                  SelectionDAG &DAG) const override;
1310
  bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1311
                       bool IsPre, MachineRegisterInfo &MRI) const override;
1312

1313
  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1314
                          SelectionDAG &DAG) const override;
1315
  void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1316
                             SelectionDAG &DAG) const;
1317
  void ReplaceExtractSubVectorResults(SDNode *N,
1318
                                      SmallVectorImpl<SDValue> &Results,
1319
                                      SelectionDAG &DAG) const;
1320

1321
  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1322

1323
  void finalizeLowering(MachineFunction &MF) const override;
1324

1325
  bool shouldLocalize(const MachineInstr &MI,
1326
                      const TargetTransformInfo *TTI) const override;
1327

1328
  bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1329
                                         const APInt &OriginalDemandedBits,
1330
                                         const APInt &OriginalDemandedElts,
1331
                                         KnownBits &Known,
1332
                                         TargetLoweringOpt &TLO,
1333
                                         unsigned Depth) const override;
1334

1335
  bool isTargetCanonicalConstantNode(SDValue Op) const override;
1336

1337
  // With the exception of data-predicate transitions, no instructions are
1338
  // required to cast between legal scalable vector types. However:
1339
  //  1. Packed and unpacked types have different bit lengths, meaning BITCAST
1340
  //     is not universally useable.
1341
  //  2. Most unpacked integer types are not legal and thus integer extends
1342
  //     cannot be used to convert between unpacked and packed types.
1343
  // These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1344
  // to transition between unpacked and packed types of the same element type,
1345
  // with BITCAST used otherwise.
1346
  // This function does not handle predicate bitcasts.
1347
  SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1348

1349
  // Returns the runtime value for PSTATE.SM by generating a call to
1350
  // __arm_sme_state.
1351
  SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1352
                             EVT VT) const;
1353

1354
  bool preferScalarizeSplat(SDNode *N) const override;
1355

1356
  unsigned getMinimumJumpTableEntries() const override;
1357

1358
  bool softPromoteHalfType() const override { return true; }
1359
};
1360

1361
namespace AArch64 {
1362
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1363
                         const TargetLibraryInfo *libInfo);
1364
} // end namespace AArch64
1365

1366
} // end namespace llvm
1367

1368
#endif
1369

1370
Product

Resources

Company