Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.h
35269 views
1
//==-- AArch64ISelLowering.h - AArch64 DAG Lowering Interface ----*- C++ -*-==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that AArch64 uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
15
#define LLVM_LIB_TARGET_AARCH64_AARCH64ISELLOWERING_H
16
17
#include "AArch64.h"
18
#include "Utils/AArch64SMEAttributes.h"
19
#include "llvm/CodeGen/CallingConvLower.h"
20
#include "llvm/CodeGen/MachineFunction.h"
21
#include "llvm/CodeGen/SelectionDAG.h"
22
#include "llvm/CodeGen/TargetLowering.h"
23
#include "llvm/IR/CallingConv.h"
24
#include "llvm/IR/Instruction.h"
25
26
namespace llvm {
27
28
namespace AArch64ISD {
29
30
// For predicated nodes where the result is a vector, the operation is
31
// controlled by a governing predicate and the inactive lanes are explicitly
32
// defined with a value, please stick the following naming convention:
33
//
34
// _MERGE_OP<n> The result value is a vector with inactive lanes equal
35
// to source operand OP<n>.
36
//
37
// _MERGE_ZERO The result value is a vector with inactive lanes
38
// actively zeroed.
39
//
40
// _MERGE_PASSTHRU The result value is a vector with inactive lanes equal
41
// to the last source operand which only purpose is being
42
// a passthru value.
43
//
44
// For other cases where no explicit action is needed to set the inactive lanes,
45
// or when the result is not a vector and it is needed or helpful to
46
// distinguish a node from similar unpredicated nodes, use:
47
//
48
// _PRED
49
//
50
enum NodeType : unsigned {
51
FIRST_NUMBER = ISD::BUILTIN_OP_END,
52
WrapperLarge, // 4-instruction MOVZ/MOVK sequence for 64-bit addresses.
53
CALL, // Function call.
54
55
// Pseudo for a OBJC call that gets emitted together with a special `mov
56
// x29, x29` marker instruction.
57
CALL_RVMARKER,
58
59
CALL_BTI, // Function call followed by a BTI instruction.
60
61
// Function call, authenticating the callee value first:
62
// AUTH_CALL chain, callee, auth key #, int disc, addr disc, operands.
63
AUTH_CALL,
64
// AUTH_TC_RETURN chain, callee, fpdiff, auth key #, int disc, addr disc,
65
// operands.
66
AUTH_TC_RETURN,
67
68
// Authenticated variant of CALL_RVMARKER.
69
AUTH_CALL_RVMARKER,
70
71
COALESCER_BARRIER,
72
73
VG_SAVE,
74
VG_RESTORE,
75
76
SMSTART,
77
SMSTOP,
78
RESTORE_ZA,
79
RESTORE_ZT,
80
SAVE_ZT,
81
82
// A call with the callee in x16, i.e. "blr x16".
83
CALL_ARM64EC_TO_X64,
84
85
// Produces the full sequence of instructions for getting the thread pointer
86
// offset of a variable into X0, using the TLSDesc model.
87
TLSDESC_CALLSEQ,
88
ADRP, // Page address of a TargetGlobalAddress operand.
89
ADR, // ADR
90
ADDlow, // Add the low 12 bits of a TargetGlobalAddress operand.
91
LOADgot, // Load from automatically generated descriptor (e.g. Global
92
// Offset Table, TLS record).
93
RET_GLUE, // Return with a glue operand. Operand 0 is the chain operand.
94
BRCOND, // Conditional branch instruction; "b.cond".
95
CSEL,
96
CSINV, // Conditional select invert.
97
CSNEG, // Conditional select negate.
98
CSINC, // Conditional select increment.
99
100
// Pointer to the thread's local storage area. Materialised from TPIDR_EL0 on
101
// ELF.
102
THREAD_POINTER,
103
ADC,
104
SBC, // adc, sbc instructions
105
106
// To avoid stack clash, allocation is performed by block and each block is
107
// probed.
108
PROBED_ALLOCA,
109
110
// Predicated instructions where inactive lanes produce undefined results.
111
ABDS_PRED,
112
ABDU_PRED,
113
FADD_PRED,
114
FDIV_PRED,
115
FMA_PRED,
116
FMAX_PRED,
117
FMAXNM_PRED,
118
FMIN_PRED,
119
FMINNM_PRED,
120
FMUL_PRED,
121
FSUB_PRED,
122
HADDS_PRED,
123
HADDU_PRED,
124
MUL_PRED,
125
MULHS_PRED,
126
MULHU_PRED,
127
RHADDS_PRED,
128
RHADDU_PRED,
129
SDIV_PRED,
130
SHL_PRED,
131
SMAX_PRED,
132
SMIN_PRED,
133
SRA_PRED,
134
SRL_PRED,
135
UDIV_PRED,
136
UMAX_PRED,
137
UMIN_PRED,
138
139
// Unpredicated vector instructions
140
BIC,
141
142
SRAD_MERGE_OP1,
143
144
// Predicated instructions with the result of inactive lanes provided by the
145
// last operand.
146
FABS_MERGE_PASSTHRU,
147
FCEIL_MERGE_PASSTHRU,
148
FFLOOR_MERGE_PASSTHRU,
149
FNEARBYINT_MERGE_PASSTHRU,
150
FNEG_MERGE_PASSTHRU,
151
FRECPX_MERGE_PASSTHRU,
152
FRINT_MERGE_PASSTHRU,
153
FROUND_MERGE_PASSTHRU,
154
FROUNDEVEN_MERGE_PASSTHRU,
155
FSQRT_MERGE_PASSTHRU,
156
FTRUNC_MERGE_PASSTHRU,
157
FP_ROUND_MERGE_PASSTHRU,
158
FP_EXTEND_MERGE_PASSTHRU,
159
UINT_TO_FP_MERGE_PASSTHRU,
160
SINT_TO_FP_MERGE_PASSTHRU,
161
FCVTZU_MERGE_PASSTHRU,
162
FCVTZS_MERGE_PASSTHRU,
163
SIGN_EXTEND_INREG_MERGE_PASSTHRU,
164
ZERO_EXTEND_INREG_MERGE_PASSTHRU,
165
ABS_MERGE_PASSTHRU,
166
NEG_MERGE_PASSTHRU,
167
168
SETCC_MERGE_ZERO,
169
170
// Arithmetic instructions which write flags.
171
ADDS,
172
SUBS,
173
ADCS,
174
SBCS,
175
ANDS,
176
177
// Conditional compares. Operands: left,right,falsecc,cc,flags
178
CCMP,
179
CCMN,
180
FCCMP,
181
182
// Floating point comparison
183
FCMP,
184
185
// Scalar-to-vector duplication
186
DUP,
187
DUPLANE8,
188
DUPLANE16,
189
DUPLANE32,
190
DUPLANE64,
191
DUPLANE128,
192
193
// Vector immedate moves
194
MOVI,
195
MOVIshift,
196
MOVIedit,
197
MOVImsl,
198
FMOV,
199
MVNIshift,
200
MVNImsl,
201
202
// Vector immediate ops
203
BICi,
204
ORRi,
205
206
// Vector bitwise select: similar to ISD::VSELECT but not all bits within an
207
// element must be identical.
208
BSP,
209
210
// Vector shuffles
211
ZIP1,
212
ZIP2,
213
UZP1,
214
UZP2,
215
TRN1,
216
TRN2,
217
REV16,
218
REV32,
219
REV64,
220
EXT,
221
SPLICE,
222
223
// Vector shift by scalar
224
VSHL,
225
VLSHR,
226
VASHR,
227
228
// Vector shift by scalar (again)
229
SQSHL_I,
230
UQSHL_I,
231
SQSHLU_I,
232
SRSHR_I,
233
URSHR_I,
234
URSHR_I_PRED,
235
236
// Vector narrowing shift by immediate (bottom)
237
RSHRNB_I,
238
239
// Vector shift by constant and insert
240
VSLI,
241
VSRI,
242
243
// Vector comparisons
244
CMEQ,
245
CMGE,
246
CMGT,
247
CMHI,
248
CMHS,
249
FCMEQ,
250
FCMGE,
251
FCMGT,
252
253
// Vector zero comparisons
254
CMEQz,
255
CMGEz,
256
CMGTz,
257
CMLEz,
258
CMLTz,
259
FCMEQz,
260
FCMGEz,
261
FCMGTz,
262
FCMLEz,
263
FCMLTz,
264
265
// Round wide FP to narrow FP with inexact results to odd.
266
FCVTXN,
267
268
// Vector across-lanes addition
269
// Only the lower result lane is defined.
270
SADDV,
271
UADDV,
272
273
// Unsigned sum Long across Vector
274
UADDLV,
275
SADDLV,
276
277
// Add Pairwise of two vectors
278
ADDP,
279
// Add Long Pairwise
280
SADDLP,
281
UADDLP,
282
283
// udot/sdot instructions
284
UDOT,
285
SDOT,
286
287
// Vector across-lanes min/max
288
// Only the lower result lane is defined.
289
SMINV,
290
UMINV,
291
SMAXV,
292
UMAXV,
293
294
SADDV_PRED,
295
UADDV_PRED,
296
SMAXV_PRED,
297
UMAXV_PRED,
298
SMINV_PRED,
299
UMINV_PRED,
300
ORV_PRED,
301
EORV_PRED,
302
ANDV_PRED,
303
304
// Compare-and-branch
305
CBZ,
306
CBNZ,
307
TBZ,
308
TBNZ,
309
310
// Tail calls
311
TC_RETURN,
312
313
// Custom prefetch handling
314
PREFETCH,
315
316
// {s|u}int to FP within a FP register.
317
SITOF,
318
UITOF,
319
320
/// Natural vector cast. ISD::BITCAST is not natural in the big-endian
321
/// world w.r.t vectors; which causes additional REV instructions to be
322
/// generated to compensate for the byte-swapping. But sometimes we do
323
/// need to re-interpret the data in SIMD vector registers in big-endian
324
/// mode without emitting such REV instructions.
325
NVCAST,
326
327
MRS, // MRS, also sets the flags via a glue.
328
329
SMULL,
330
UMULL,
331
332
PMULL,
333
334
// Reciprocal estimates and steps.
335
FRECPE,
336
FRECPS,
337
FRSQRTE,
338
FRSQRTS,
339
340
SUNPKHI,
341
SUNPKLO,
342
UUNPKHI,
343
UUNPKLO,
344
345
CLASTA_N,
346
CLASTB_N,
347
LASTA,
348
LASTB,
349
TBL,
350
351
// Floating-point reductions.
352
FADDA_PRED,
353
FADDV_PRED,
354
FMAXV_PRED,
355
FMAXNMV_PRED,
356
FMINV_PRED,
357
FMINNMV_PRED,
358
359
INSR,
360
PTEST,
361
PTEST_ANY,
362
PTRUE,
363
364
CTTZ_ELTS,
365
366
BITREVERSE_MERGE_PASSTHRU,
367
BSWAP_MERGE_PASSTHRU,
368
REVH_MERGE_PASSTHRU,
369
REVW_MERGE_PASSTHRU,
370
CTLZ_MERGE_PASSTHRU,
371
CTPOP_MERGE_PASSTHRU,
372
DUP_MERGE_PASSTHRU,
373
INDEX_VECTOR,
374
375
// Cast between vectors of the same element type but differ in length.
376
REINTERPRET_CAST,
377
378
// Nodes to build an LD64B / ST64B 64-bit quantity out of i64, and vice versa
379
LS64_BUILD,
380
LS64_EXTRACT,
381
382
LD1_MERGE_ZERO,
383
LD1S_MERGE_ZERO,
384
LDNF1_MERGE_ZERO,
385
LDNF1S_MERGE_ZERO,
386
LDFF1_MERGE_ZERO,
387
LDFF1S_MERGE_ZERO,
388
LD1RQ_MERGE_ZERO,
389
LD1RO_MERGE_ZERO,
390
391
// Structured loads.
392
SVE_LD2_MERGE_ZERO,
393
SVE_LD3_MERGE_ZERO,
394
SVE_LD4_MERGE_ZERO,
395
396
// Unsigned gather loads.
397
GLD1_MERGE_ZERO,
398
GLD1_SCALED_MERGE_ZERO,
399
GLD1_UXTW_MERGE_ZERO,
400
GLD1_SXTW_MERGE_ZERO,
401
GLD1_UXTW_SCALED_MERGE_ZERO,
402
GLD1_SXTW_SCALED_MERGE_ZERO,
403
GLD1_IMM_MERGE_ZERO,
404
GLD1Q_MERGE_ZERO,
405
GLD1Q_INDEX_MERGE_ZERO,
406
407
// Signed gather loads
408
GLD1S_MERGE_ZERO,
409
GLD1S_SCALED_MERGE_ZERO,
410
GLD1S_UXTW_MERGE_ZERO,
411
GLD1S_SXTW_MERGE_ZERO,
412
GLD1S_UXTW_SCALED_MERGE_ZERO,
413
GLD1S_SXTW_SCALED_MERGE_ZERO,
414
GLD1S_IMM_MERGE_ZERO,
415
416
// Unsigned gather loads.
417
GLDFF1_MERGE_ZERO,
418
GLDFF1_SCALED_MERGE_ZERO,
419
GLDFF1_UXTW_MERGE_ZERO,
420
GLDFF1_SXTW_MERGE_ZERO,
421
GLDFF1_UXTW_SCALED_MERGE_ZERO,
422
GLDFF1_SXTW_SCALED_MERGE_ZERO,
423
GLDFF1_IMM_MERGE_ZERO,
424
425
// Signed gather loads.
426
GLDFF1S_MERGE_ZERO,
427
GLDFF1S_SCALED_MERGE_ZERO,
428
GLDFF1S_UXTW_MERGE_ZERO,
429
GLDFF1S_SXTW_MERGE_ZERO,
430
GLDFF1S_UXTW_SCALED_MERGE_ZERO,
431
GLDFF1S_SXTW_SCALED_MERGE_ZERO,
432
GLDFF1S_IMM_MERGE_ZERO,
433
434
// Non-temporal gather loads
435
GLDNT1_MERGE_ZERO,
436
GLDNT1_INDEX_MERGE_ZERO,
437
GLDNT1S_MERGE_ZERO,
438
439
// Contiguous masked store.
440
ST1_PRED,
441
442
// Scatter store
443
SST1_PRED,
444
SST1_SCALED_PRED,
445
SST1_UXTW_PRED,
446
SST1_SXTW_PRED,
447
SST1_UXTW_SCALED_PRED,
448
SST1_SXTW_SCALED_PRED,
449
SST1_IMM_PRED,
450
SST1Q_PRED,
451
SST1Q_INDEX_PRED,
452
453
// Non-temporal scatter store
454
SSTNT1_PRED,
455
SSTNT1_INDEX_PRED,
456
457
// SME
458
RDSVL,
459
REVD_MERGE_PASSTHRU,
460
ALLOCATE_ZA_BUFFER,
461
INIT_TPIDR2OBJ,
462
463
// Asserts that a function argument (i32) is zero-extended to i8 by
464
// the caller
465
ASSERT_ZEXT_BOOL,
466
467
// 128-bit system register accesses
468
// lo64, hi64, chain = MRRS(chain, sysregname)
469
MRRS,
470
// chain = MSRR(chain, sysregname, lo64, hi64)
471
MSRR,
472
473
// Strict (exception-raising) floating point comparison
474
STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
475
STRICT_FCMPE,
476
477
// SME ZA loads and stores
478
SME_ZA_LDR,
479
SME_ZA_STR,
480
481
// NEON Load/Store with post-increment base updates
482
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
483
LD3post,
484
LD4post,
485
ST2post,
486
ST3post,
487
ST4post,
488
LD1x2post,
489
LD1x3post,
490
LD1x4post,
491
ST1x2post,
492
ST1x3post,
493
ST1x4post,
494
LD1DUPpost,
495
LD2DUPpost,
496
LD3DUPpost,
497
LD4DUPpost,
498
LD1LANEpost,
499
LD2LANEpost,
500
LD3LANEpost,
501
LD4LANEpost,
502
ST2LANEpost,
503
ST3LANEpost,
504
ST4LANEpost,
505
506
STG,
507
STZG,
508
ST2G,
509
STZ2G,
510
511
LDP,
512
LDIAPP,
513
LDNP,
514
STP,
515
STILP,
516
STNP,
517
518
// Memory Operations
519
MOPS_MEMSET,
520
MOPS_MEMSET_TAGGING,
521
MOPS_MEMCOPY,
522
MOPS_MEMMOVE,
523
};
524
525
} // end namespace AArch64ISD
526
527
namespace AArch64 {
528
/// Possible values of current rounding mode, which is specified in bits
529
/// 23:22 of FPCR.
530
enum Rounding {
531
RN = 0, // Round to Nearest
532
RP = 1, // Round towards Plus infinity
533
RM = 2, // Round towards Minus infinity
534
RZ = 3, // Round towards Zero
535
rmMask = 3 // Bit mask selecting rounding mode
536
};
537
538
// Bit position of rounding mode bits in FPCR.
539
const unsigned RoundingBitsPos = 22;
540
541
// Reserved bits should be preserved when modifying FPCR.
542
const uint64_t ReservedFPControlBits = 0xfffffffff80040f8;
543
544
// Registers used to pass function arguments.
545
ArrayRef<MCPhysReg> getGPRArgRegs();
546
ArrayRef<MCPhysReg> getFPRArgRegs();
547
548
/// Maximum allowed number of unprobed bytes above SP at an ABI
549
/// boundary.
550
const unsigned StackProbeMaxUnprobedStack = 1024;
551
552
/// Maximum number of iterations to unroll for a constant size probing loop.
553
const unsigned StackProbeMaxLoopUnroll = 4;
554
555
} // namespace AArch64
556
557
class AArch64Subtarget;
558
559
class AArch64TargetLowering : public TargetLowering {
560
public:
561
explicit AArch64TargetLowering(const TargetMachine &TM,
562
const AArch64Subtarget &STI);
563
564
/// Control the following reassociation of operands: (op (op x, c1), y) -> (op
565
/// (op x, y), c1) where N0 is (op x, c1) and N1 is y.
566
bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
567
SDValue N1) const override;
568
569
/// Selects the correct CCAssignFn for a given CallingConvention value.
570
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
571
572
/// Selects the correct CCAssignFn for a given CallingConvention value.
573
CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
574
575
/// Determine which of the bits specified in Mask are known to be either zero
576
/// or one and return them in the KnownZero/KnownOne bitsets.
577
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known,
578
const APInt &DemandedElts,
579
const SelectionDAG &DAG,
580
unsigned Depth = 0) const override;
581
582
unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
583
const APInt &DemandedElts,
584
const SelectionDAG &DAG,
585
unsigned Depth) const override;
586
587
MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override {
588
// Returning i64 unconditionally here (i.e. even for ILP32) means that the
589
// *DAG* representation of pointers will always be 64-bits. They will be
590
// truncated and extended when transferred to memory, but the 64-bit DAG
591
// allows us to use AArch64's addressing modes much more easily.
592
return MVT::getIntegerVT(64);
593
}
594
595
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
596
const APInt &DemandedElts,
597
TargetLoweringOpt &TLO) const override;
598
599
MVT getScalarShiftAmountTy(const DataLayout &DL, EVT) const override;
600
601
/// Returns true if the target allows unaligned memory accesses of the
602
/// specified type.
603
bool allowsMisalignedMemoryAccesses(
604
EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
605
MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
606
unsigned *Fast = nullptr) const override;
607
/// LLT variant.
608
bool allowsMisalignedMemoryAccesses(LLT Ty, unsigned AddrSpace,
609
Align Alignment,
610
MachineMemOperand::Flags Flags,
611
unsigned *Fast = nullptr) const override;
612
613
/// Provide custom lowering hooks for some operations.
614
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
615
616
const char *getTargetNodeName(unsigned Opcode) const override;
617
618
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
619
620
/// This method returns a target specific FastISel object, or null if the
621
/// target does not support "fast" ISel.
622
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
623
const TargetLibraryInfo *libInfo) const override;
624
625
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
626
627
bool isFPImmLegal(const APFloat &Imm, EVT VT,
628
bool ForCodeSize) const override;
629
630
/// Return true if the given shuffle mask can be codegen'd directly, or if it
631
/// should be stack expanded.
632
bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
633
634
/// Similar to isShuffleMaskLegal. Return true is the given 'select with zero'
635
/// shuffle mask can be codegen'd directly.
636
bool isVectorClearMaskLegal(ArrayRef<int> M, EVT VT) const override;
637
638
/// Return the ISD::SETCC ValueType.
639
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
640
EVT VT) const override;
641
642
SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const;
643
644
MachineBasicBlock *EmitF128CSEL(MachineInstr &MI,
645
MachineBasicBlock *BB) const;
646
647
MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
648
MachineBasicBlock *BB) const;
649
650
MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI,
651
MachineBasicBlock *MBB) const;
652
653
MachineBasicBlock *EmitTileLoad(unsigned Opc, unsigned BaseReg,
654
MachineInstr &MI,
655
MachineBasicBlock *BB) const;
656
MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const;
657
MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg,
658
MachineInstr &MI, MachineBasicBlock *BB) const;
659
MachineBasicBlock *EmitZTInstr(MachineInstr &MI, MachineBasicBlock *BB,
660
unsigned Opcode, bool Op0IsDef) const;
661
MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const;
662
MachineBasicBlock *EmitInitTPIDR2Object(MachineInstr &MI,
663
MachineBasicBlock *BB) const;
664
MachineBasicBlock *EmitAllocateZABuffer(MachineInstr &MI,
665
MachineBasicBlock *BB) const;
666
667
MachineBasicBlock *
668
EmitInstrWithCustomInserter(MachineInstr &MI,
669
MachineBasicBlock *MBB) const override;
670
671
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
672
MachineFunction &MF,
673
unsigned Intrinsic) const override;
674
675
bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
676
EVT NewVT) const override;
677
678
bool shouldRemoveRedundantExtend(SDValue Op) const override;
679
680
bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
681
bool isTruncateFree(EVT VT1, EVT VT2) const override;
682
683
bool isProfitableToHoist(Instruction *I) const override;
684
685
bool isZExtFree(Type *Ty1, Type *Ty2) const override;
686
bool isZExtFree(EVT VT1, EVT VT2) const override;
687
bool isZExtFree(SDValue Val, EVT VT2) const override;
688
689
bool shouldSinkOperands(Instruction *I,
690
SmallVectorImpl<Use *> &Ops) const override;
691
692
bool optimizeExtendOrTruncateConversion(
693
Instruction *I, Loop *L, const TargetTransformInfo &TTI) const override;
694
695
bool hasPairedLoad(EVT LoadedType, Align &RequiredAligment) const override;
696
697
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
698
699
bool lowerInterleavedLoad(LoadInst *LI,
700
ArrayRef<ShuffleVectorInst *> Shuffles,
701
ArrayRef<unsigned> Indices,
702
unsigned Factor) const override;
703
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
704
unsigned Factor) const override;
705
706
bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI,
707
LoadInst *LI) const override;
708
709
bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
710
StoreInst *SI) const override;
711
712
bool isLegalAddImmediate(int64_t) const override;
713
bool isLegalAddScalableImmediate(int64_t) const override;
714
bool isLegalICmpImmediate(int64_t) const override;
715
716
bool isMulAddWithConstProfitable(SDValue AddNode,
717
SDValue ConstNode) const override;
718
719
bool shouldConsiderGEPOffsetSplit() const override;
720
721
EVT getOptimalMemOpType(const MemOp &Op,
722
const AttributeList &FuncAttributes) const override;
723
724
LLT getOptimalMemOpLLT(const MemOp &Op,
725
const AttributeList &FuncAttributes) const override;
726
727
/// Return true if the addressing mode represented by AM is legal for this
728
/// target, for a load/store of the specified type.
729
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
730
unsigned AS,
731
Instruction *I = nullptr) const override;
732
733
int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset,
734
int64_t MaxOffset) const override;
735
736
/// Return true if an FMA operation is faster than a pair of fmul and fadd
737
/// instructions. fmuladd intrinsics will be expanded to FMAs when this method
738
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
739
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
740
EVT VT) const override;
741
bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
742
743
bool generateFMAsInMachineCombiner(EVT VT,
744
CodeGenOptLevel OptLevel) const override;
745
746
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
747
ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
748
749
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
750
bool isDesirableToCommuteWithShift(const SDNode *N,
751
CombineLevel Level) const override;
752
753
bool isDesirableToPullExtFromShl(const MachineInstr &MI) const override {
754
return false;
755
}
756
757
/// Returns false if N is a bit extraction pattern of (X >> C) & Mask.
758
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override;
759
760
/// Return true if it is profitable to fold a pair of shifts into a mask.
761
bool shouldFoldConstantShiftPairToMask(const SDNode *N,
762
CombineLevel Level) const override;
763
764
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
765
EVT VT) const override;
766
767
/// Returns true if it is beneficial to convert a load of a constant
768
/// to just the constant itself.
769
bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
770
Type *Ty) const override;
771
772
/// Return true if EXTRACT_SUBVECTOR is cheap for this result type
773
/// with this index.
774
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
775
unsigned Index) const override;
776
777
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
778
bool MathUsed) const override {
779
// Using overflow ops for overflow checks only should beneficial on
780
// AArch64.
781
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
782
}
783
784
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
785
AtomicOrdering Ord) const override;
786
Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
787
AtomicOrdering Ord) const override;
788
789
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
790
791
bool isOpSuitableForLDPSTP(const Instruction *I) const;
792
bool isOpSuitableForLSE128(const Instruction *I) const;
793
bool isOpSuitableForRCPC3(const Instruction *I) const;
794
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
795
bool
796
shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
797
798
TargetLoweringBase::AtomicExpansionKind
799
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
800
TargetLoweringBase::AtomicExpansionKind
801
shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
802
TargetLoweringBase::AtomicExpansionKind
803
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
804
805
TargetLoweringBase::AtomicExpansionKind
806
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
807
808
bool useLoadStackGuardNode() const override;
809
TargetLoweringBase::LegalizeTypeAction
810
getPreferredVectorAction(MVT VT) const override;
811
812
/// If the target has a standard location for the stack protector cookie,
813
/// returns the address of that location. Otherwise, returns nullptr.
814
Value *getIRStackGuard(IRBuilderBase &IRB) const override;
815
816
void insertSSPDeclarations(Module &M) const override;
817
Value *getSDagStackGuard(const Module &M) const override;
818
Function *getSSPStackGuardCheck(const Module &M) const override;
819
820
/// If the target has a standard location for the unsafe stack pointer,
821
/// returns the address of that location. Otherwise, returns nullptr.
822
Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
823
824
/// If a physical register, this returns the register that receives the
825
/// exception address on entry to an EH pad.
826
Register
827
getExceptionPointerRegister(const Constant *PersonalityFn) const override {
828
// FIXME: This is a guess. Has this been defined yet?
829
return AArch64::X0;
830
}
831
832
/// If a physical register, this returns the register that receives the
833
/// exception typeid on entry to a landing pad.
834
Register
835
getExceptionSelectorRegister(const Constant *PersonalityFn) const override {
836
// FIXME: This is a guess. Has this been defined yet?
837
return AArch64::X1;
838
}
839
840
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
841
842
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
843
const MachineFunction &MF) const override {
844
// Do not merge to float value size (128 bytes) if no implicit
845
// float attribute is set.
846
847
bool NoFloat = MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat);
848
849
if (NoFloat)
850
return (MemVT.getSizeInBits() <= 64);
851
return true;
852
}
853
854
bool isCheapToSpeculateCttz(Type *) const override {
855
return true;
856
}
857
858
bool isCheapToSpeculateCtlz(Type *) const override {
859
return true;
860
}
861
862
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
863
864
bool hasAndNotCompare(SDValue V) const override {
865
// We can use bics for any scalar.
866
return V.getValueType().isScalarInteger();
867
}
868
869
bool hasAndNot(SDValue Y) const override {
870
EVT VT = Y.getValueType();
871
872
if (!VT.isVector())
873
return hasAndNotCompare(Y);
874
875
TypeSize TS = VT.getSizeInBits();
876
// TODO: We should be able to use bic/bif too for SVE.
877
return !TS.isScalable() && TS.getFixedValue() >= 64; // vector 'bic'
878
}
879
880
bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
881
SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
882
unsigned OldShiftOpcode, unsigned NewShiftOpcode,
883
SelectionDAG &DAG) const override;
884
885
ShiftLegalizationStrategy
886
preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
887
unsigned ExpansionFactor) const override;
888
889
bool shouldTransformSignedTruncationCheck(EVT XVT,
890
unsigned KeptBits) const override {
891
// For vectors, we don't have a preference..
892
if (XVT.isVector())
893
return false;
894
895
auto VTIsOk = [](EVT VT) -> bool {
896
return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
897
VT == MVT::i64;
898
};
899
900
// We are ok with KeptBitsVT being byte/word/dword, what SXT supports.
901
// XVT will be larger than KeptBitsVT.
902
MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
903
return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
904
}
905
906
bool preferIncOfAddToSubOfNot(EVT VT) const override;
907
908
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
909
910
bool shouldExpandCmpUsingSelects() const override { return true; }
911
912
bool isComplexDeinterleavingSupported() const override;
913
bool isComplexDeinterleavingOperationSupported(
914
ComplexDeinterleavingOperation Operation, Type *Ty) const override;
915
916
Value *createComplexDeinterleavingIR(
917
IRBuilderBase &B, ComplexDeinterleavingOperation OperationType,
918
ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
919
Value *Accumulator = nullptr) const override;
920
921
bool supportSplitCSR(MachineFunction *MF) const override {
922
return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
923
MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
924
}
925
void initializeSplitCSR(MachineBasicBlock *Entry) const override;
926
void insertCopiesSplitCSR(
927
MachineBasicBlock *Entry,
928
const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
929
930
bool supportSwiftError() const override {
931
return true;
932
}
933
934
bool supportPtrAuthBundles() const override { return true; }
935
936
bool supportKCFIBundles() const override { return true; }
937
938
MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
939
MachineBasicBlock::instr_iterator &MBBI,
940
const TargetInstrInfo *TII) const override;
941
942
/// Enable aggressive FMA fusion on targets that want it.
943
bool enableAggressiveFMAFusion(EVT VT) const override;
944
945
/// Returns the size of the platform's va_list object.
946
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
947
948
/// Returns true if \p VecTy is a legal interleaved access type. This
949
/// function checks the vector element type and the overall width of the
950
/// vector.
951
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL,
952
bool &UseScalable) const;
953
954
/// Returns the number of interleaved accesses that will be generated when
955
/// lowering accesses of the given type.
956
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL,
957
bool UseScalable) const;
958
959
MachineMemOperand::Flags getTargetMMOFlags(
960
const Instruction &I) const override;
961
962
bool functionArgumentNeedsConsecutiveRegisters(
963
Type *Ty, CallingConv::ID CallConv, bool isVarArg,
964
const DataLayout &DL) const override;
965
966
/// Used for exception handling on Win64.
967
bool needsFixedCatchObjects() const override;
968
969
bool fallBackToDAGISel(const Instruction &Inst) const override;
970
971
/// SVE code generation for fixed length vectors does not custom lower
972
/// BUILD_VECTOR. This makes BUILD_VECTOR legalisation a source of stores to
973
/// merge. However, merging them creates a BUILD_VECTOR that is just as
974
/// illegal as the original, thus leading to an infinite legalisation loop.
975
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
976
/// vector types this override can be removed.
977
bool mergeStoresAfterLegalization(EVT VT) const override;
978
979
// If the platform/function should have a redzone, return the size in bytes.
980
unsigned getRedZoneSize(const Function &F) const {
981
if (F.hasFnAttribute(Attribute::NoRedZone))
982
return 0;
983
return 128;
984
}
985
986
bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
987
EVT getPromotedVTForPredicate(EVT VT) const;
988
989
EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
990
bool AllowUnknown = false) const override;
991
992
bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const override;
993
994
bool shouldExpandCttzElements(EVT VT) const override;
995
996
/// If a change in streaming mode is required on entry to/return from a
997
/// function call it emits and returns the corresponding SMSTART or SMSTOP
998
/// node. \p Condition should be one of the enum values from
999
/// AArch64SME::ToggleCondition.
1000
SDValue changeStreamingMode(SelectionDAG &DAG, SDLoc DL, bool Enable,
1001
SDValue Chain, SDValue InGlue, unsigned Condition,
1002
SDValue PStateSM = SDValue()) const;
1003
1004
bool isVScaleKnownToBeAPowerOfTwo() const override { return true; }
1005
1006
// Normally SVE is only used for byte size vectors that do not fit within a
1007
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
1008
// used for 64bit and 128bit vectors as well.
1009
bool useSVEForFixedLengthVectorVT(EVT VT, bool OverrideNEON = false) const;
1010
1011
// Follow NEON ABI rules even when using SVE for fixed length vectors.
1012
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1013
EVT VT) const override;
1014
unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1015
CallingConv::ID CC,
1016
EVT VT) const override;
1017
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
1018
CallingConv::ID CC, EVT VT,
1019
EVT &IntermediateVT,
1020
unsigned &NumIntermediates,
1021
MVT &RegisterVT) const override;
1022
1023
/// True if stack clash protection is enabled for this functions.
1024
bool hasInlineStackProbe(const MachineFunction &MF) const override;
1025
1026
#ifndef NDEBUG
1027
void verifyTargetSDNode(const SDNode *N) const override;
1028
#endif
1029
1030
private:
1031
/// Keep a pointer to the AArch64Subtarget around so that we can
1032
/// make the right decision when generating code for different targets.
1033
const AArch64Subtarget *Subtarget;
1034
1035
llvm::BumpPtrAllocator BumpAlloc;
1036
llvm::StringSaver Saver{BumpAlloc};
1037
1038
bool isExtFreeImpl(const Instruction *Ext) const override;
1039
1040
void addTypeForNEON(MVT VT);
1041
void addTypeForFixedLengthSVE(MVT VT);
1042
void addDRType(MVT VT);
1043
void addQRType(MVT VT);
1044
1045
bool shouldExpandBuildVectorWithShuffles(EVT, unsigned) const override;
1046
1047
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
1048
bool isVarArg,
1049
const SmallVectorImpl<ISD::InputArg> &Ins,
1050
const SDLoc &DL, SelectionDAG &DAG,
1051
SmallVectorImpl<SDValue> &InVals) const override;
1052
1053
void AdjustInstrPostInstrSelection(MachineInstr &MI,
1054
SDNode *Node) const override;
1055
1056
SDValue LowerCall(CallLoweringInfo & /*CLI*/,
1057
SmallVectorImpl<SDValue> &InVals) const override;
1058
1059
SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1060
CallingConv::ID CallConv, bool isVarArg,
1061
const SmallVectorImpl<CCValAssign> &RVLocs,
1062
const SDLoc &DL, SelectionDAG &DAG,
1063
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1064
SDValue ThisVal, bool RequiresSMChange) const;
1065
1066
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1067
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1068
SDValue LowerStore128(SDValue Op, SelectionDAG &DAG) const;
1069
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
1070
1071
SDValue LowerMGATHER(SDValue Op, SelectionDAG &DAG) const;
1072
SDValue LowerMSCATTER(SDValue Op, SelectionDAG &DAG) const;
1073
1074
SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) const;
1075
1076
SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1077
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1078
SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1079
1080
bool
1081
isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const;
1082
1083
/// Finds the incoming stack arguments which overlap the given fixed stack
1084
/// object and incorporates their load into the current chain. This prevents
1085
/// an upcoming store from clobbering the stack argument before it's used.
1086
SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
1087
MachineFrameInfo &MFI, int ClobberedFI) const;
1088
1089
bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
1090
1091
void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
1092
SDValue &Chain) const;
1093
1094
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1095
bool isVarArg,
1096
const SmallVectorImpl<ISD::OutputArg> &Outs,
1097
LLVMContext &Context) const override;
1098
1099
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1100
const SmallVectorImpl<ISD::OutputArg> &Outs,
1101
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1102
SelectionDAG &DAG) const override;
1103
1104
SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1105
unsigned Flag) const;
1106
SDValue getTargetNode(JumpTableSDNode *N, EVT Ty, SelectionDAG &DAG,
1107
unsigned Flag) const;
1108
SDValue getTargetNode(ConstantPoolSDNode *N, EVT Ty, SelectionDAG &DAG,
1109
unsigned Flag) const;
1110
SDValue getTargetNode(BlockAddressSDNode *N, EVT Ty, SelectionDAG &DAG,
1111
unsigned Flag) const;
1112
SDValue getTargetNode(ExternalSymbolSDNode *N, EVT Ty, SelectionDAG &DAG,
1113
unsigned Flag) const;
1114
template <class NodeTy>
1115
SDValue getGOT(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1116
template <class NodeTy>
1117
SDValue getAddrLarge(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1118
template <class NodeTy>
1119
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1120
template <class NodeTy>
1121
SDValue getAddrTiny(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;
1122
SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1123
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1124
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1125
SDValue LowerDarwinGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1126
SDValue LowerELFGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1127
SDValue LowerELFTLSLocalExec(const GlobalValue *GV, SDValue ThreadBase,
1128
const SDLoc &DL, SelectionDAG &DAG) const;
1129
SDValue LowerELFTLSDescCallSeq(SDValue SymAddr, const SDLoc &DL,
1130
SelectionDAG &DAG) const;
1131
SDValue LowerWindowsGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1132
SDValue LowerPtrAuthGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1133
SDValue LowerPtrAuthGlobalAddressStatically(SDValue TGA, SDLoc DL, EVT VT,
1134
AArch64PACKey::ID Key,
1135
SDValue Discriminator,
1136
SDValue AddrDiscriminator,
1137
SelectionDAG &DAG) const;
1138
SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1139
SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1140
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
1141
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1142
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1143
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
1144
SDValue TVal, SDValue FVal, const SDLoc &dl,
1145
SelectionDAG &DAG) const;
1146
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1148
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1149
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
1150
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
1151
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1152
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1153
SDValue LowerAAPCS_VASTART(SDValue Op, SelectionDAG &DAG) const;
1154
SDValue LowerDarwin_VASTART(SDValue Op, SelectionDAG &DAG) const;
1155
SDValue LowerWin64_VASTART(SDValue Op, SelectionDAG &DAG) const;
1156
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1157
SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1158
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1159
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1160
SDValue LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const;
1161
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1162
SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1163
SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1164
SDValue LowerGET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1165
SDValue LowerSET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1166
SDValue LowerRESET_FPMODE(SDValue Op, SelectionDAG &DAG) const;
1167
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1168
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1169
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1170
SDValue LowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const;
1171
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1172
SDValue LowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1173
SDValue LowerDUPQLane(SDValue Op, SelectionDAG &DAG) const;
1174
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
1175
unsigned NewOp) const;
1176
SDValue LowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
1177
SDValue LowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
1178
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1179
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
1180
SDValue LowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1181
SDValue LowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
1182
SDValue LowerVECTOR_HISTOGRAM(SDValue Op, SelectionDAG &DAG) const;
1183
SDValue LowerDIV(SDValue Op, SelectionDAG &DAG) const;
1184
SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1185
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
1186
SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
1187
SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
1188
SDValue LowerCTPOP_PARITY(SDValue Op, SelectionDAG &DAG) const;
1189
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
1190
SDValue LowerBitreverse(SDValue Op, SelectionDAG &DAG) const;
1191
SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
1192
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
1193
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1194
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1195
SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1196
SDValue LowerVectorFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1197
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1198
SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1199
SDValue LowerVectorXRINT(SDValue Op, SelectionDAG &DAG) const;
1200
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1201
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1202
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
1203
SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
1204
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
1205
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
1206
SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1207
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
1208
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1209
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
1210
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
1211
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1212
SDValue LowerInlineDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1213
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1214
1215
SDValue LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const;
1216
1217
SDValue LowerFixedLengthVectorIntDivideToSVE(SDValue Op,
1218
SelectionDAG &DAG) const;
1219
SDValue LowerFixedLengthVectorIntExtendToSVE(SDValue Op,
1220
SelectionDAG &DAG) const;
1221
SDValue LowerFixedLengthVectorLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1222
SDValue LowerFixedLengthVectorMLoadToSVE(SDValue Op, SelectionDAG &DAG) const;
1223
SDValue LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp, SelectionDAG &DAG) const;
1224
SDValue LowerPredReductionToSVE(SDValue ScalarOp, SelectionDAG &DAG) const;
1225
SDValue LowerReductionToSVE(unsigned Opcode, SDValue ScalarOp,
1226
SelectionDAG &DAG) const;
1227
SDValue LowerFixedLengthVectorSelectToSVE(SDValue Op, SelectionDAG &DAG) const;
1228
SDValue LowerFixedLengthVectorSetccToSVE(SDValue Op, SelectionDAG &DAG) const;
1229
SDValue LowerFixedLengthVectorStoreToSVE(SDValue Op, SelectionDAG &DAG) const;
1230
SDValue LowerFixedLengthVectorMStoreToSVE(SDValue Op,
1231
SelectionDAG &DAG) const;
1232
SDValue LowerFixedLengthVectorTruncateToSVE(SDValue Op,
1233
SelectionDAG &DAG) const;
1234
SDValue LowerFixedLengthExtractVectorElt(SDValue Op, SelectionDAG &DAG) const;
1235
SDValue LowerFixedLengthInsertVectorElt(SDValue Op, SelectionDAG &DAG) const;
1236
SDValue LowerFixedLengthBitcastToSVE(SDValue Op, SelectionDAG &DAG) const;
1237
SDValue LowerFixedLengthConcatVectorsToSVE(SDValue Op,
1238
SelectionDAG &DAG) const;
1239
SDValue LowerFixedLengthFPExtendToSVE(SDValue Op, SelectionDAG &DAG) const;
1240
SDValue LowerFixedLengthFPRoundToSVE(SDValue Op, SelectionDAG &DAG) const;
1241
SDValue LowerFixedLengthIntToFPToSVE(SDValue Op, SelectionDAG &DAG) const;
1242
SDValue LowerFixedLengthFPToIntToSVE(SDValue Op, SelectionDAG &DAG) const;
1243
SDValue LowerFixedLengthVECTOR_SHUFFLEToSVE(SDValue Op,
1244
SelectionDAG &DAG) const;
1245
1246
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1247
SmallVectorImpl<SDNode *> &Created) const override;
1248
SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1249
SmallVectorImpl<SDNode *> &Created) const override;
1250
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1251
int &ExtraSteps, bool &UseOneConst,
1252
bool Reciprocal) const override;
1253
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1254
int &ExtraSteps) const override;
1255
SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1256
const DenormalMode &Mode) const override;
1257
SDValue getSqrtResultForDenormInput(SDValue Operand,
1258
SelectionDAG &DAG) const override;
1259
unsigned combineRepeatedFPDivisors() const override;
1260
1261
ConstraintType getConstraintType(StringRef Constraint) const override;
1262
Register getRegisterByName(const char* RegName, LLT VT,
1263
const MachineFunction &MF) const override;
1264
1265
/// Examine constraint string and operand type and determine a weight value.
1266
/// The operand object must already have been set up with the operand type.
1267
ConstraintWeight
1268
getSingleConstraintMatchWeight(AsmOperandInfo &info,
1269
const char *constraint) const override;
1270
1271
std::pair<unsigned, const TargetRegisterClass *>
1272
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1273
StringRef Constraint, MVT VT) const override;
1274
1275
const char *LowerXConstraint(EVT ConstraintVT) const override;
1276
1277
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1278
std::vector<SDValue> &Ops,
1279
SelectionDAG &DAG) const override;
1280
1281
InlineAsm::ConstraintCode
1282
getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1283
if (ConstraintCode == "Q")
1284
return InlineAsm::ConstraintCode::Q;
1285
// FIXME: clang has code for 'Ump', 'Utf', 'Usa', and 'Ush' but these are
1286
// followed by llvm_unreachable so we'll leave them unimplemented in
1287
// the backend for now.
1288
return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1289
}
1290
1291
/// Handle Lowering flag assembly outputs.
1292
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1293
const SDLoc &DL,
1294
const AsmOperandInfo &Constraint,
1295
SelectionDAG &DAG) const override;
1296
1297
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
1298
bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
1299
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
1300
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1301
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1302
bool getIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1303
SDValue &Offset, SelectionDAG &DAG) const;
1304
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
1305
ISD::MemIndexedMode &AM,
1306
SelectionDAG &DAG) const override;
1307
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
1308
SDValue &Offset, ISD::MemIndexedMode &AM,
1309
SelectionDAG &DAG) const override;
1310
bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
1311
bool IsPre, MachineRegisterInfo &MRI) const override;
1312
1313
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1314
SelectionDAG &DAG) const override;
1315
void ReplaceBITCASTResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
1316
SelectionDAG &DAG) const;
1317
void ReplaceExtractSubVectorResults(SDNode *N,
1318
SmallVectorImpl<SDValue> &Results,
1319
SelectionDAG &DAG) const;
1320
1321
bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override;
1322
1323
void finalizeLowering(MachineFunction &MF) const override;
1324
1325
bool shouldLocalize(const MachineInstr &MI,
1326
const TargetTransformInfo *TTI) const override;
1327
1328
bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1329
const APInt &OriginalDemandedBits,
1330
const APInt &OriginalDemandedElts,
1331
KnownBits &Known,
1332
TargetLoweringOpt &TLO,
1333
unsigned Depth) const override;
1334
1335
bool isTargetCanonicalConstantNode(SDValue Op) const override;
1336
1337
// With the exception of data-predicate transitions, no instructions are
1338
// required to cast between legal scalable vector types. However:
1339
// 1. Packed and unpacked types have different bit lengths, meaning BITCAST
1340
// is not universally useable.
1341
// 2. Most unpacked integer types are not legal and thus integer extends
1342
// cannot be used to convert between unpacked and packed types.
1343
// These can make "bitcasting" a multiphase process. REINTERPRET_CAST is used
1344
// to transition between unpacked and packed types of the same element type,
1345
// with BITCAST used otherwise.
1346
// This function does not handle predicate bitcasts.
1347
SDValue getSVESafeBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) const;
1348
1349
// Returns the runtime value for PSTATE.SM by generating a call to
1350
// __arm_sme_state.
1351
SDValue getRuntimePStateSM(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
1352
EVT VT) const;
1353
1354
bool preferScalarizeSplat(SDNode *N) const override;
1355
1356
unsigned getMinimumJumpTableEntries() const override;
1357
1358
bool softPromoteHalfType() const override { return true; }
1359
};
1360
1361
namespace AArch64 {
1362
FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1363
const TargetLibraryInfo *libInfo);
1364
} // end namespace AArch64
1365
1366
} // end namespace llvm
1367
1368
#endif
1369
1370