CoCalc -- RISCVISelLowering.h

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h
³⁵²⁶⁶ views
1
//===-- RISCVISelLowering.h - RISC-V DAG Lowering Interface -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that RISC-V uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#ifndef LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
15
#define LLVM_LIB_TARGET_RISCV_RISCVISELLOWERING_H
16

17
#include "RISCV.h"
18
#include "llvm/CodeGen/CallingConvLower.h"
19
#include "llvm/CodeGen/SelectionDAG.h"
20
#include "llvm/CodeGen/TargetLowering.h"
21
#include <optional>
22

23
namespace llvm {
24
class InstructionCost;
25
class RISCVSubtarget;
26
struct RISCVRegisterInfo;
27
class RVVArgDispatcher;
28

29
namespace RISCVISD {
30
// clang-format off
31
enum NodeType : unsigned {
32
  FIRST_NUMBER = ISD::BUILTIN_OP_END,
33
  RET_GLUE,
34
  SRET_GLUE,
35
  MRET_GLUE,
36
  CALL,
37
  /// Select with condition operator - This selects between a true value and
38
  /// a false value (ops #3 and #4) based on the boolean result of comparing
39
  /// the lhs and rhs (ops #0 and #1) of a conditional expression with the
40
  /// condition code in op #2, a XLenVT constant from the ISD::CondCode enum.
41
  /// The lhs and rhs are XLenVT integers. The true and false values can be
42
  /// integer or floating point.
43
  SELECT_CC,
44
  BR_CC,
45
  BuildPairF64,
46
  SplitF64,
47
  TAIL,
48

49
  // Add the Lo 12 bits from an address. Selected to ADDI.
50
  ADD_LO,
51
  // Get the Hi 20 bits from an address. Selected to LUI.
52
  HI,
53

54
  // Represents an AUIPC+ADDI pair. Selected to PseudoLLA.
55
  LLA,
56

57
  // Selected as PseudoAddTPRel. Used to emit a TP-relative relocation.
58
  ADD_TPREL,
59

60
  // Multiply high for signedxunsigned.
61
  MULHSU,
62

63
  // Represents (ADD (SHL a, b), c) with the arguments appearing in the order
64
  // a, b, c.  'b' must be a constant.  Maps to sh1add/sh2add/sh3add with zba
65
  // or addsl with XTheadBa.
66
  SHL_ADD,
67

68
  // RV64I shifts, directly matching the semantics of the named RISC-V
69
  // instructions.
70
  SLLW,
71
  SRAW,
72
  SRLW,
73
  // 32-bit operations from RV64M that can't be simply matched with a pattern
74
  // at instruction selection time. These have undefined behavior for division
75
  // by 0 or overflow (divw) like their target independent counterparts.
76
  DIVW,
77
  DIVUW,
78
  REMUW,
79
  // RV64IB rotates, directly matching the semantics of the named RISC-V
80
  // instructions.
81
  ROLW,
82
  RORW,
83
  // RV64IZbb bit counting instructions directly matching the semantics of the
84
  // named RISC-V instructions.
85
  CLZW,
86
  CTZW,
87

88
  // RV64IZbb absolute value for i32. Expanded to (max (negw X), X) during isel.
89
  ABSW,
90

91
  // FPR<->GPR transfer operations when the FPR is smaller than XLEN, needed as
92
  // XLEN is the only legal integer width.
93
  //
94
  // FMV_H_X matches the semantics of the FMV.H.X.
95
  // FMV_X_ANYEXTH is similar to FMV.X.H but has an any-extended result.
96
  // FMV_X_SIGNEXTH is similar to FMV.X.H and has a sign-extended result.
97
  // FMV_W_X_RV64 matches the semantics of the FMV.W.X.
98
  // FMV_X_ANYEXTW_RV64 is similar to FMV.X.W but has an any-extended result.
99
  //
100
  // This is a more convenient semantic for producing dagcombines that remove
101
  // unnecessary GPR->FPR->GPR moves.
102
  FMV_H_X,
103
  FMV_X_ANYEXTH,
104
  FMV_X_SIGNEXTH,
105
  FMV_W_X_RV64,
106
  FMV_X_ANYEXTW_RV64,
107
  // FP to XLen int conversions. Corresponds to fcvt.l(u).s/d/h on RV64 and
108
  // fcvt.w(u).s/d/h on RV32. Unlike FP_TO_S/UINT these saturate out of
109
  // range inputs. These are used for FP_TO_S/UINT_SAT lowering. Rounding mode
110
  // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
111
  FCVT_X,
112
  FCVT_XU,
113
  // FP to 32 bit int conversions for RV64. These are used to keep track of the
114
  // result being sign extended to 64 bit. These saturate out of range inputs.
115
  // Used for FP_TO_S/UINT and FP_TO_S/UINT_SAT lowering. Rounding mode
116
  // is passed as a TargetConstant operand using the RISCVFPRndMode enum.
117
  FCVT_W_RV64,
118
  FCVT_WU_RV64,
119

120
  FP_ROUND_BF16,
121
  FP_EXTEND_BF16,
122

123
  // Rounds an FP value to its corresponding integer in the same FP format.
124
  // First operand is the value to round, the second operand is the largest
125
  // integer that can be represented exactly in the FP format. This will be
126
  // expanded into multiple instructions and basic blocks with a custom
127
  // inserter.
128
  FROUND,
129

130
  FCLASS,
131

132
  // Floating point fmax and fmin matching the RISC-V instruction semantics.
133
  FMAX, FMIN,
134

135
  // A read of the 64-bit counter CSR on a 32-bit target (returns (Lo, Hi)).
136
  // It takes a chain operand and another two target constant operands (the
137
  // CSR numbers of the low and high parts of the counter).
138
  READ_COUNTER_WIDE,
139

140
  // brev8, orc.b, zip, and unzip from Zbb and Zbkb. All operands are i32 or
141
  // XLenVT.
142
  BREV8,
143
  ORC_B,
144
  ZIP,
145
  UNZIP,
146

147
  // Scalar cryptography
148
  CLMUL, CLMULH, CLMULR,
149
  SHA256SIG0, SHA256SIG1, SHA256SUM0, SHA256SUM1,
150
  SM4KS, SM4ED,
151
  SM3P0, SM3P1,
152

153
  // May-Be-Operations
154
  MOPR, MOPRR,
155

156
  // Vector Extension
157
  FIRST_VL_VECTOR_OP,
158
  // VMV_V_V_VL matches the semantics of vmv.v.v but includes an extra operand
159
  // for the VL value to be used for the operation. The first operand is
160
  // passthru operand.
161
  VMV_V_V_VL = FIRST_VL_VECTOR_OP,
162
  // VMV_V_X_VL matches the semantics of vmv.v.x but includes an extra operand
163
  // for the VL value to be used for the operation. The first operand is
164
  // passthru operand.
165
  VMV_V_X_VL,
166
  // VFMV_V_F_VL matches the semantics of vfmv.v.f but includes an extra operand
167
  // for the VL value to be used for the operation. The first operand is
168
  // passthru operand.
169
  VFMV_V_F_VL,
170
  // VMV_X_S matches the semantics of vmv.x.s. The result is always XLenVT sign
171
  // extended from the vector element size.
172
  VMV_X_S,
173
  // VMV_S_X_VL matches the semantics of vmv.s.x. It carries a VL operand.
174
  VMV_S_X_VL,
175
  // VFMV_S_F_VL matches the semantics of vfmv.s.f. It carries a VL operand.
176
  VFMV_S_F_VL,
177
  // Splats an 64-bit value that has been split into two i32 parts. This is
178
  // expanded late to two scalar stores and a stride 0 vector load.
179
  // The first operand is passthru operand.
180
  SPLAT_VECTOR_SPLIT_I64_VL,
181
  // Truncates a RVV integer vector by one power-of-two. Carries both an extra
182
  // mask and VL operand.
183
  TRUNCATE_VECTOR_VL,
184
  // Matches the semantics of vslideup/vslidedown. The first operand is the
185
  // pass-thru operand, the second is the source vector, the third is the XLenVT
186
  // index (either constant or non-constant), the fourth is the mask, the fifth
187
  // is the VL and the sixth is the policy.
188
  VSLIDEUP_VL,
189
  VSLIDEDOWN_VL,
190
  // Matches the semantics of vslide1up/slide1down. The first operand is
191
  // passthru operand, the second is source vector, third is the XLenVT scalar
192
  // value. The fourth and fifth operands are the mask and VL operands.
193
  VSLIDE1UP_VL,
194
  VSLIDE1DOWN_VL,
195
  // Matches the semantics of vfslide1up/vfslide1down. The first operand is
196
  // passthru operand, the second is source vector, third is a scalar value
197
  // whose type matches the element type of the vectors.  The fourth and fifth
198
  // operands are the mask and VL operands.
199
  VFSLIDE1UP_VL,
200
  VFSLIDE1DOWN_VL,
201
  // Matches the semantics of the vid.v instruction, with a mask and VL
202
  // operand.
203
  VID_VL,
204
  // Matches the semantics of the vfcnvt.rod function (Convert double-width
205
  // float to single-width float, rounding towards odd). Takes a double-width
206
  // float vector and produces a single-width float vector. Also has a mask and
207
  // VL operand.
208
  VFNCVT_ROD_VL,
209
  // These nodes match the semantics of the corresponding RVV vector reduction
210
  // instructions. They produce a vector result which is the reduction
211
  // performed over the second vector operand plus the first element of the
212
  // third vector operand. The first operand is the pass-thru operand. The
213
  // second operand is an unconstrained vector type, and the result, first, and
214
  // third operand's types are expected to be the corresponding full-width
215
  // LMUL=1 type for the second operand:
216
  //   nxv8i8 = vecreduce_add nxv8i8, nxv32i8, nxv8i8
217
  //   nxv2i32 = vecreduce_add nxv2i32, nxv8i32, nxv2i32
218
  // The different in types does introduce extra vsetvli instructions but
219
  // similarly it reduces the number of registers consumed per reduction.
220
  // Also has a mask and VL operand.
221
  VECREDUCE_ADD_VL,
222
  VECREDUCE_UMAX_VL,
223
  VECREDUCE_SMAX_VL,
224
  VECREDUCE_UMIN_VL,
225
  VECREDUCE_SMIN_VL,
226
  VECREDUCE_AND_VL,
227
  VECREDUCE_OR_VL,
228
  VECREDUCE_XOR_VL,
229
  VECREDUCE_FADD_VL,
230
  VECREDUCE_SEQ_FADD_VL,
231
  VECREDUCE_FMIN_VL,
232
  VECREDUCE_FMAX_VL,
233

234
  // Vector binary ops with a merge as a third operand, a mask as a fourth
235
  // operand, and VL as a fifth operand.
236
  ADD_VL,
237
  AND_VL,
238
  MUL_VL,
239
  OR_VL,
240
  SDIV_VL,
241
  SHL_VL,
242
  SREM_VL,
243
  SRA_VL,
244
  SRL_VL,
245
  ROTL_VL,
246
  ROTR_VL,
247
  SUB_VL,
248
  UDIV_VL,
249
  UREM_VL,
250
  XOR_VL,
251
  SMIN_VL,
252
  SMAX_VL,
253
  UMIN_VL,
254
  UMAX_VL,
255

256
  BITREVERSE_VL,
257
  BSWAP_VL,
258
  CTLZ_VL,
259
  CTTZ_VL,
260
  CTPOP_VL,
261

262
  SADDSAT_VL,
263
  UADDSAT_VL,
264
  SSUBSAT_VL,
265
  USUBSAT_VL,
266

267
  // Averaging adds of signed integers.
268
  AVGFLOORS_VL,
269
  // Averaging adds of unsigned integers.
270
  AVGFLOORU_VL,
271
  // Rounding averaging adds of signed integers.
272
  AVGCEILS_VL,
273
  // Rounding averaging adds of unsigned integers.
274
  AVGCEILU_VL,
275

276
  // Operands are (source, shift, merge, mask, roundmode, vl)
277
  VNCLIPU_VL,
278
  VNCLIP_VL,
279

280
  MULHS_VL,
281
  MULHU_VL,
282
  FADD_VL,
283
  FSUB_VL,
284
  FMUL_VL,
285
  FDIV_VL,
286
  VFMIN_VL,
287
  VFMAX_VL,
288

289
  // Vector unary ops with a mask as a second operand and VL as a third operand.
290
  FNEG_VL,
291
  FABS_VL,
292
  FSQRT_VL,
293
  FCLASS_VL,
294
  FCOPYSIGN_VL, // Has a merge operand
295
  VFCVT_RTZ_X_F_VL,
296
  VFCVT_RTZ_XU_F_VL,
297
  VFCVT_X_F_VL,
298
  VFCVT_XU_F_VL,
299
  VFROUND_NOEXCEPT_VL,
300
  VFCVT_RM_X_F_VL,  // Has a rounding mode operand.
301
  VFCVT_RM_XU_F_VL, // Has a rounding mode operand.
302
  SINT_TO_FP_VL,
303
  UINT_TO_FP_VL,
304
  VFCVT_RM_F_X_VL,  // Has a rounding mode operand.
305
  VFCVT_RM_F_XU_VL, // Has a rounding mode operand.
306
  FP_ROUND_VL,
307
  FP_EXTEND_VL,
308

309
  // Vector FMA ops with a mask as a fourth operand and VL as a fifth operand.
310
  VFMADD_VL,
311
  VFNMADD_VL,
312
  VFMSUB_VL,
313
  VFNMSUB_VL,
314

315
  // Vector widening FMA ops with a mask as a fourth operand and VL as a fifth
316
  // operand.
317
  VFWMADD_VL,
318
  VFWNMADD_VL,
319
  VFWMSUB_VL,
320
  VFWNMSUB_VL,
321

322
  // Widening instructions with a merge value a third operand, a mask as a
323
  // fourth operand, and VL as a fifth operand.
324
  VWMUL_VL,
325
  VWMULU_VL,
326
  VWMULSU_VL,
327
  VWADD_VL,
328
  VWADDU_VL,
329
  VWSUB_VL,
330
  VWSUBU_VL,
331
  VWADD_W_VL,
332
  VWADDU_W_VL,
333
  VWSUB_W_VL,
334
  VWSUBU_W_VL,
335
  VWSLL_VL,
336

337
  VFWMUL_VL,
338
  VFWADD_VL,
339
  VFWSUB_VL,
340
  VFWADD_W_VL,
341
  VFWSUB_W_VL,
342

343
  // Widening ternary operations with a mask as the fourth operand and VL as the
344
  // fifth operand.
345
  VWMACC_VL,
346
  VWMACCU_VL,
347
  VWMACCSU_VL,
348

349
  // Narrowing logical shift right.
350
  // Operands are (source, shift, passthru, mask, vl)
351
  VNSRL_VL,
352

353
  // Vector compare producing a mask. Fourth operand is input mask. Fifth
354
  // operand is VL.
355
  SETCC_VL,
356

357
  // General vmerge node with mask, true, false, passthru, and vl operands.
358
  // Tail agnostic vselect can be implemented by setting passthru to undef.
359
  VMERGE_VL,
360

361
  // Mask binary operators.
362
  VMAND_VL,
363
  VMOR_VL,
364
  VMXOR_VL,
365

366
  // Set mask vector to all zeros or ones.
367
  VMCLR_VL,
368
  VMSET_VL,
369

370
  // Matches the semantics of vrgather.vx and vrgather.vv with extra operands
371
  // for passthru and VL. Operands are (src, index, mask, passthru, vl).
372
  VRGATHER_VX_VL,
373
  VRGATHER_VV_VL,
374
  VRGATHEREI16_VV_VL,
375

376
  // Vector sign/zero extend with additional mask & VL operands.
377
  VSEXT_VL,
378
  VZEXT_VL,
379

380
  //  vcpop.m with additional mask and VL operands.
381
  VCPOP_VL,
382

383
  //  vfirst.m with additional mask and VL operands.
384
  VFIRST_VL,
385

386
  LAST_VL_VECTOR_OP = VFIRST_VL,
387

388
  // Read VLENB CSR
389
  READ_VLENB,
390
  // Reads value of CSR.
391
  // The first operand is a chain pointer. The second specifies address of the
392
  // required CSR. Two results are produced, the read value and the new chain
393
  // pointer.
394
  READ_CSR,
395
  // Write value to CSR.
396
  // The first operand is a chain pointer, the second specifies address of the
397
  // required CSR and the third is the value to write. The result is the new
398
  // chain pointer.
399
  WRITE_CSR,
400
  // Read and write value of CSR.
401
  // The first operand is a chain pointer, the second specifies address of the
402
  // required CSR and the third is the value to write. Two results are produced,
403
  // the value read before the modification and the new chain pointer.
404
  SWAP_CSR,
405

406
  // Branchless select operations, matching the semantics of the instructions
407
  // defined in Zicond or XVentanaCondOps.
408
  CZERO_EQZ, // vt.maskc for XVentanaCondOps.
409
  CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
410

411
  /// Software guarded BRIND node. Operand 0 is the chain operand and
412
  /// operand 1 is the target address.
413
  SW_GUARDED_BRIND,
414

415
  // FP to 32 bit int conversions for RV64. These are used to keep track of the
416
  // result being sign extended to 64 bit. These saturate out of range inputs.
417
  STRICT_FCVT_W_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE,
418
  STRICT_FCVT_WU_RV64,
419
  STRICT_FADD_VL,
420
  STRICT_FSUB_VL,
421
  STRICT_FMUL_VL,
422
  STRICT_FDIV_VL,
423
  STRICT_FSQRT_VL,
424
  STRICT_VFMADD_VL,
425
  STRICT_VFNMADD_VL,
426
  STRICT_VFMSUB_VL,
427
  STRICT_VFNMSUB_VL,
428
  STRICT_FP_ROUND_VL,
429
  STRICT_FP_EXTEND_VL,
430
  STRICT_VFNCVT_ROD_VL,
431
  STRICT_SINT_TO_FP_VL,
432
  STRICT_UINT_TO_FP_VL,
433
  STRICT_VFCVT_RM_X_F_VL,
434
  STRICT_VFCVT_RTZ_X_F_VL,
435
  STRICT_VFCVT_RTZ_XU_F_VL,
436
  STRICT_FSETCC_VL,
437
  STRICT_FSETCCS_VL,
438
  STRICT_VFROUND_NOEXCEPT_VL,
439
  LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
440

441
  SF_VC_XV_SE,
442
  SF_VC_IV_SE,
443
  SF_VC_VV_SE,
444
  SF_VC_FV_SE,
445
  SF_VC_XVV_SE,
446
  SF_VC_IVV_SE,
447
  SF_VC_VVV_SE,
448
  SF_VC_FVV_SE,
449
  SF_VC_XVW_SE,
450
  SF_VC_IVW_SE,
451
  SF_VC_VVW_SE,
452
  SF_VC_FVW_SE,
453
  SF_VC_V_X_SE,
454
  SF_VC_V_I_SE,
455
  SF_VC_V_XV_SE,
456
  SF_VC_V_IV_SE,
457
  SF_VC_V_VV_SE,
458
  SF_VC_V_FV_SE,
459
  SF_VC_V_XVV_SE,
460
  SF_VC_V_IVV_SE,
461
  SF_VC_V_VVV_SE,
462
  SF_VC_V_FVV_SE,
463
  SF_VC_V_XVW_SE,
464
  SF_VC_V_IVW_SE,
465
  SF_VC_V_VVW_SE,
466
  SF_VC_V_FVW_SE,
467

468
  // WARNING: Do not add anything in the end unless you want the node to
469
  // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
470
  // opcodes will be thought as target memory ops!
471

472
  TH_LWD = ISD::FIRST_TARGET_MEMORY_OPCODE,
473
  TH_LWUD,
474
  TH_LDD,
475
  TH_SWD,
476
  TH_SDD,
477
};
478
// clang-format on
479
} // namespace RISCVISD
480

481
class RISCVTargetLowering : public TargetLowering {
482
  const RISCVSubtarget &Subtarget;
483

484
public:
485
  explicit RISCVTargetLowering(const TargetMachine &TM,
486
                               const RISCVSubtarget &STI);
487

488
  const RISCVSubtarget &getSubtarget() const { return Subtarget; }
489

490
  bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
491
                          MachineFunction &MF,
492
                          unsigned Intrinsic) const override;
493
  bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
494
                             unsigned AS,
495
                             Instruction *I = nullptr) const override;
496
  bool isLegalICmpImmediate(int64_t Imm) const override;
497
  bool isLegalAddImmediate(int64_t Imm) const override;
498
  bool isTruncateFree(Type *SrcTy, Type *DstTy) const override;
499
  bool isTruncateFree(EVT SrcVT, EVT DstVT) const override;
500
  bool isTruncateFree(SDValue Val, EVT VT2) const override;
501
  bool isZExtFree(SDValue Val, EVT VT2) const override;
502
  bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override;
503
  bool signExtendConstant(const ConstantInt *CI) const override;
504
  bool isCheapToSpeculateCttz(Type *Ty) const override;
505
  bool isCheapToSpeculateCtlz(Type *Ty) const override;
506
  bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
507
  bool hasAndNotCompare(SDValue Y) const override;
508
  bool hasBitTest(SDValue X, SDValue Y) const override;
509
  bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
510
      SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
511
      unsigned OldShiftOpcode, unsigned NewShiftOpcode,
512
      SelectionDAG &DAG) const override;
513
  /// Return true if the (vector) instruction I will be lowered to an instruction
514
  /// with a scalar splat operand for the given Operand number.
515
  bool canSplatOperand(Instruction *I, int Operand) const;
516
  /// Return true if a vector instruction will lower to a target instruction
517
  /// able to splat the given operand.
518
  bool canSplatOperand(unsigned Opcode, int Operand) const;
519
  bool shouldSinkOperands(Instruction *I,
520
                          SmallVectorImpl<Use *> &Ops) const override;
521
  bool shouldScalarizeBinop(SDValue VecOp) const override;
522
  bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
523
  std::pair<int, bool> getLegalZfaFPImm(const APFloat &Imm, EVT VT) const;
524
  bool isFPImmLegal(const APFloat &Imm, EVT VT,
525
                    bool ForCodeSize) const override;
526
  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
527
                               unsigned Index) const override;
528

529
  bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
530

531
  bool preferScalarizeSplat(SDNode *N) const override;
532

533
  bool softPromoteHalfType() const override { return true; }
534

535
  /// Return the register type for a given MVT, ensuring vectors are treated
536
  /// as a series of gpr sized integers.
537
  MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
538
                                    EVT VT) const override;
539

540
  /// Return the number of registers for a given MVT, ensuring vectors are
541
  /// treated as a series of gpr sized integers.
542
  unsigned getNumRegistersForCallingConv(LLVMContext &Context,
543
                                         CallingConv::ID CC,
544
                                         EVT VT) const override;
545

546
  unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context,
547
                                                CallingConv::ID CC, EVT VT,
548
                                                EVT &IntermediateVT,
549
                                                unsigned &NumIntermediates,
550
                                                MVT &RegisterVT) const override;
551

552
  bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
553
                                            EVT VT) const override;
554

555
  /// Return true if the given shuffle mask can be codegen'd directly, or if it
556
  /// should be stack expanded.
557
  bool isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const override;
558

559
  bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
560
    // If the pair to store is a mixture of float and int values, we will
561
    // save two bitwise instructions and one float-to-int instruction and
562
    // increase one store instruction. There is potentially a more
563
    // significant benefit because it avoids the float->int domain switch
564
    // for input value. So It is more likely a win.
565
    if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
566
        (LTy.isInteger() && HTy.isFloatingPoint()))
567
      return true;
568
    // If the pair only contains int values, we will save two bitwise
569
    // instructions and increase one store instruction (costing one more
570
    // store buffer). Since the benefit is more blurred we leave such a pair
571
    // out until we get testcase to prove it is a win.
572
    return false;
573
  }
574

575
  bool
576
  shouldExpandBuildVectorWithShuffles(EVT VT,
577
                                      unsigned DefinedValues) const override;
578

579
  bool shouldExpandCttzElements(EVT VT) const override;
580

581
  /// Return the cost of LMUL for linear operations.
582
  InstructionCost getLMULCost(MVT VT) const;
583

584
  InstructionCost getVRGatherVVCost(MVT VT) const;
585
  InstructionCost getVRGatherVICost(MVT VT) const;
586
  InstructionCost getVSlideVXCost(MVT VT) const;
587
  InstructionCost getVSlideVICost(MVT VT) const;
588

589
  // Provide custom lowering hooks for some operations.
590
  SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
591
  void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
592
                          SelectionDAG &DAG) const override;
593

594
  SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
595

596
  bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
597
                                    const APInt &DemandedElts,
598
                                    TargetLoweringOpt &TLO) const override;
599

600
  void computeKnownBitsForTargetNode(const SDValue Op,
601
                                     KnownBits &Known,
602
                                     const APInt &DemandedElts,
603
                                     const SelectionDAG &DAG,
604
                                     unsigned Depth) const override;
605
  unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
606
                                           const APInt &DemandedElts,
607
                                           const SelectionDAG &DAG,
608
                                           unsigned Depth) const override;
609

610
  bool canCreateUndefOrPoisonForTargetNode(SDValue Op,
611
                                           const APInt &DemandedElts,
612
                                           const SelectionDAG &DAG,
613
                                           bool PoisonOnly, bool ConsiderFlags,
614
                                           unsigned Depth) const override;
615

616
  const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
617

618
  // This method returns the name of a target specific DAG node.
619
  const char *getTargetNodeName(unsigned Opcode) const override;
620

621
  MachineMemOperand::Flags
622
  getTargetMMOFlags(const Instruction &I) const override;
623

624
  MachineMemOperand::Flags
625
  getTargetMMOFlags(const MemSDNode &Node) const override;
626

627
  bool
628
  areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX,
629
                                      const MemSDNode &NodeY) const override;
630

631
  ConstraintType getConstraintType(StringRef Constraint) const override;
632

633
  InlineAsm::ConstraintCode
634
  getInlineAsmMemConstraint(StringRef ConstraintCode) const override;
635

636
  std::pair<unsigned, const TargetRegisterClass *>
637
  getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
638
                               StringRef Constraint, MVT VT) const override;
639

640
  void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
641
                                    std::vector<SDValue> &Ops,
642
                                    SelectionDAG &DAG) const override;
643

644
  MachineBasicBlock *
645
  EmitInstrWithCustomInserter(MachineInstr &MI,
646
                              MachineBasicBlock *BB) const override;
647

648
  void AdjustInstrPostInstrSelection(MachineInstr &MI,
649
                                     SDNode *Node) const override;
650

651
  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
652
                         EVT VT) const override;
653

654
  bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
655
                            bool MathUsed) const override {
656
    if (VT == MVT::i8 || VT == MVT::i16)
657
      return false;
658

659
    return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
660
  }
661

662
  bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
663
                                    unsigned AddrSpace) const override {
664
    // If we can replace 4 or more scalar stores, there will be a reduction
665
    // in instructions even after we add a vector constant load.
666
    return NumElem >= 4;
667
  }
668

669
  bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
670
    return VT.isScalarInteger();
671
  }
672
  bool convertSelectOfConstantsToMath(EVT VT) const override { return true; }
673

674
  bool isCtpopFast(EVT VT) const override;
675

676
  unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const override;
677

678
  bool preferZeroCompareBranch() const override { return true; }
679

680
  bool shouldInsertFencesForAtomic(const Instruction *I) const override {
681
    return isa<LoadInst>(I) || isa<StoreInst>(I);
682
  }
683
  Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
684
                                AtomicOrdering Ord) const override;
685
  Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
686
                                 AtomicOrdering Ord) const override;
687

688
  bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
689
                                  EVT VT) const override;
690

691
  ISD::NodeType getExtendForAtomicOps() const override {
692
    return ISD::SIGN_EXTEND;
693
  }
694

695
  ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
696

697
  bool shouldTransformSignedTruncationCheck(EVT XVT,
698
                                            unsigned KeptBits) const override;
699

700
  TargetLowering::ShiftLegalizationStrategy
701
  preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
702
                                     unsigned ExpansionFactor) const override {
703
    if (DAG.getMachineFunction().getFunction().hasMinSize())
704
      return ShiftLegalizationStrategy::LowerToLibcall;
705
    return TargetLowering::preferredShiftLegalizationStrategy(DAG, N,
706
                                                              ExpansionFactor);
707
  }
708

709
  bool isDesirableToCommuteWithShift(const SDNode *N,
710
                                     CombineLevel Level) const override;
711

712
  /// If a physical register, this returns the register that receives the
713
  /// exception address on entry to an EH pad.
714
  Register
715
  getExceptionPointerRegister(const Constant *PersonalityFn) const override;
716

717
  /// If a physical register, this returns the register that receives the
718
  /// exception typeid on entry to a landing pad.
719
  Register
720
  getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
721

722
  bool shouldExtendTypeInLibCall(EVT Type) const override;
723
  bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const override;
724

725
  /// Returns the register with the specified architectural or ABI name. This
726
  /// method is necessary to lower the llvm.read_register.* and
727
  /// llvm.write_register.* intrinsics. Allocatable registers must be reserved
728
  /// with the clang -ffixed-xX flag for access to be allowed.
729
  Register getRegisterByName(const char *RegName, LLT VT,
730
                             const MachineFunction &MF) const override;
731

732
  // Lower incoming arguments, copy physregs into vregs
733
  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
734
                               bool IsVarArg,
735
                               const SmallVectorImpl<ISD::InputArg> &Ins,
736
                               const SDLoc &DL, SelectionDAG &DAG,
737
                               SmallVectorImpl<SDValue> &InVals) const override;
738
  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
739
                      bool IsVarArg,
740
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
741
                      LLVMContext &Context) const override;
742
  SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
743
                      const SmallVectorImpl<ISD::OutputArg> &Outs,
744
                      const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
745
                      SelectionDAG &DAG) const override;
746
  SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
747
                    SmallVectorImpl<SDValue> &InVals) const override;
748

749
  bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
750
                                         Type *Ty) const override;
751
  bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
752
  bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
753
  bool shouldConsiderGEPOffsetSplit() const override { return true; }
754

755
  bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
756
                              SDValue C) const override;
757

758
  bool isMulAddWithConstProfitable(SDValue AddNode,
759
                                   SDValue ConstNode) const override;
760

761
  TargetLowering::AtomicExpansionKind
762
  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
763
  Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI,
764
                                      Value *AlignedAddr, Value *Incr,
765
                                      Value *Mask, Value *ShiftAmt,
766
                                      AtomicOrdering Ord) const override;
767
  TargetLowering::AtomicExpansionKind
768
  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *CI) const override;
769
  Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
770
                                          AtomicCmpXchgInst *CI,
771
                                          Value *AlignedAddr, Value *CmpVal,
772
                                          Value *NewVal, Value *Mask,
773
                                          AtomicOrdering Ord) const override;
774

775
  /// Returns true if the target allows unaligned memory accesses of the
776
  /// specified type.
777
  bool allowsMisalignedMemoryAccesses(
778
      EVT VT, unsigned AddrSpace = 0, Align Alignment = Align(1),
779
      MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
780
      unsigned *Fast = nullptr) const override;
781

782
  EVT getOptimalMemOpType(const MemOp &Op,
783
                          const AttributeList &FuncAttributes) const override;
784

785
  bool splitValueIntoRegisterParts(
786
      SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
787
      unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
788
      const override;
789

790
  SDValue joinRegisterPartsIntoValue(
791
      SelectionDAG & DAG, const SDLoc &DL, const SDValue *Parts,
792
      unsigned NumParts, MVT PartVT, EVT ValueVT,
793
      std::optional<CallingConv::ID> CC) const override;
794

795
  // Return the value of VLMax for the given vector type (i.e. SEW and LMUL)
796
  SDValue computeVLMax(MVT VecVT, const SDLoc &DL, SelectionDAG &DAG) const;
797

798
  static RISCVII::VLMUL getLMUL(MVT VT);
799
  inline static unsigned computeVLMAX(unsigned VectorBits, unsigned EltSize,
800
                                      unsigned MinSize) {
801
    // Original equation:
802
    //   VLMAX = (VectorBits / EltSize) * LMUL
803
    //   where LMUL = MinSize / RISCV::RVVBitsPerBlock
804
    // The following equations have been reordered to prevent loss of precision
805
    // when calculating fractional LMUL.
806
    return ((VectorBits / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
807
  }
808

809
  // Return inclusive (low, high) bounds on the value of VLMAX for the
810
  // given scalable container type given known bounds on VLEN.
811
  static std::pair<unsigned, unsigned>
812
  computeVLMAXBounds(MVT ContainerVT, const RISCVSubtarget &Subtarget);
813

814
  static unsigned getRegClassIDForLMUL(RISCVII::VLMUL LMul);
815
  static unsigned getSubregIndexByMVT(MVT VT, unsigned Index);
816
  static unsigned getRegClassIDForVecVT(MVT VT);
817
  static std::pair<unsigned, unsigned>
818
  decomposeSubvectorInsertExtractToSubRegs(MVT VecVT, MVT SubVecVT,
819
                                           unsigned InsertExtractIdx,
820
                                           const RISCVRegisterInfo *TRI);
821
  MVT getContainerForFixedLengthVector(MVT VT) const;
822

823
  bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
824

825
  bool isLegalElementTypeForRVV(EVT ScalarTy) const;
826

827
  bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
828

829
  unsigned getJumpTableEncoding() const override;
830

831
  const MCExpr *LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
832
                                          const MachineBasicBlock *MBB,
833
                                          unsigned uid,
834
                                          MCContext &Ctx) const override;
835

836
  bool isVScaleKnownToBeAPowerOfTwo() const override;
837

838
  bool getIndexedAddressParts(SDNode *Op, SDValue &Base, SDValue &Offset,
839
                              ISD::MemIndexedMode &AM, SelectionDAG &DAG) const;
840
  bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
841
                                 ISD::MemIndexedMode &AM,
842
                                 SelectionDAG &DAG) const override;
843
  bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base,
844
                                  SDValue &Offset, ISD::MemIndexedMode &AM,
845
                                  SelectionDAG &DAG) const override;
846

847
  bool isLegalScaleForGatherScatter(uint64_t Scale,
848
                                    uint64_t ElemSize) const override {
849
    // Scaled addressing not supported on indexed load/stores
850
    return Scale == 1;
851
  }
852

853
  /// If the target has a standard location for the stack protector cookie,
854
  /// returns the address of that location. Otherwise, returns nullptr.
855
  Value *getIRStackGuard(IRBuilderBase &IRB) const override;
856

857
  /// Returns whether or not generating a interleaved load/store intrinsic for
858
  /// this type will be legal.
859
  bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
860
                                    Align Alignment, unsigned AddrSpace,
861
                                    const DataLayout &) const;
862

863
  /// Return true if a stride load store of the given result type and
864
  /// alignment is legal.
865
  bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;
866

867
  unsigned getMaxSupportedInterleaveFactor() const override { return 8; }
868

869
  bool fallBackToDAGISel(const Instruction &Inst) const override;
870

871
  bool lowerInterleavedLoad(LoadInst *LI,
872
                            ArrayRef<ShuffleVectorInst *> Shuffles,
873
                            ArrayRef<unsigned> Indices,
874
                            unsigned Factor) const override;
875

876
  bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
877
                             unsigned Factor) const override;
878

879
  bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *II,
880
                                        LoadInst *LI) const override;
881

882
  bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II,
883
                                       StoreInst *SI) const override;
884

885
  bool supportKCFIBundles() const override { return true; }
886

887
  SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
888
                                 int JTI, SelectionDAG &DAG) const override;
889

890
  MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
891
                              MachineBasicBlock::instr_iterator &MBBI,
892
                              const TargetInstrInfo *TII) const override;
893

894
  /// RISCVCCAssignFn - This target-specific function extends the default
895
  /// CCValAssign with additional information used to lower RISC-V calling
896
  /// conventions.
897
  typedef bool RISCVCCAssignFn(const DataLayout &DL, RISCVABI::ABI,
898
                               unsigned ValNo, MVT ValVT, MVT LocVT,
899
                               CCValAssign::LocInfo LocInfo,
900
                               ISD::ArgFlagsTy ArgFlags, CCState &State,
901
                               bool IsFixed, bool IsRet, Type *OrigTy,
902
                               const RISCVTargetLowering &TLI,
903
                               RVVArgDispatcher &RVVDispatcher);
904

905
private:
906
  void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
907
                        const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
908
                        RISCVCCAssignFn Fn) const;
909
  void analyzeOutputArgs(MachineFunction &MF, CCState &CCInfo,
910
                         const SmallVectorImpl<ISD::OutputArg> &Outs,
911
                         bool IsRet, CallLoweringInfo *CLI,
912
                         RISCVCCAssignFn Fn) const;
913

914
  template <class NodeTy>
915
  SDValue getAddr(NodeTy *N, SelectionDAG &DAG, bool IsLocal = true,
916
                  bool IsExternWeak = false) const;
917
  SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
918
                           bool UseGOT) const;
919
  SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
920
  SDValue getTLSDescAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
921

922
  SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
923
  SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
924
  SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
925
  SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
926
  SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
927
  SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
928
  SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
929
  SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
930
  SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
931
  SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
932
  SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
933
  SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
934
  SDValue lowerSPLAT_VECTOR_PARTS(SDValue Op, SelectionDAG &DAG) const;
935
  SDValue lowerVectorMaskSplat(SDValue Op, SelectionDAG &DAG) const;
936
  SDValue lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
937
                             int64_t ExtTrueVal) const;
938
  SDValue lowerVectorMaskTruncLike(SDValue Op, SelectionDAG &DAG) const;
939
  SDValue lowerVectorTruncLike(SDValue Op, SelectionDAG &DAG) const;
940
  SDValue lowerVectorFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
941
  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
942
  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
943
  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
944
  SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
945
  SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
946
  SDValue lowerVPREDUCE(SDValue Op, SelectionDAG &DAG) const;
947
  SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
948
  SDValue lowerVectorMaskVecReduction(SDValue Op, SelectionDAG &DAG,
949
                                      bool IsVP) const;
950
  SDValue lowerFPVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
951
  SDValue lowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
952
  SDValue lowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
953
  SDValue lowerVECTOR_DEINTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
954
  SDValue lowerVECTOR_INTERLEAVE(SDValue Op, SelectionDAG &DAG) const;
955
  SDValue lowerSTEP_VECTOR(SDValue Op, SelectionDAG &DAG) const;
956
  SDValue lowerVECTOR_REVERSE(SDValue Op, SelectionDAG &DAG) const;
957
  SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const;
958
  SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
959
  SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
960
  SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
961
  SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
962
                                               SelectionDAG &DAG) const;
963
  SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
964
  SDValue lowerMaskedScatter(SDValue Op, SelectionDAG &DAG) const;
965
  SDValue lowerFixedLengthVectorLoadToRVV(SDValue Op, SelectionDAG &DAG) const;
966
  SDValue lowerFixedLengthVectorStoreToRVV(SDValue Op, SelectionDAG &DAG) const;
967
  SDValue lowerFixedLengthVectorSetccToRVV(SDValue Op, SelectionDAG &DAG) const;
968
  SDValue lowerFixedLengthVectorSelectToRVV(SDValue Op,
969
                                            SelectionDAG &DAG) const;
970
  SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG) const;
971
  SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
972
  SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG) const;
973
  SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG) const;
974
  SDValue lowerVPExtMaskOp(SDValue Op, SelectionDAG &DAG) const;
975
  SDValue lowerVPSetCCMaskOp(SDValue Op, SelectionDAG &DAG) const;
976
  SDValue lowerVPSplatExperimental(SDValue Op, SelectionDAG &DAG) const;
977
  SDValue lowerVPSpliceExperimental(SDValue Op, SelectionDAG &DAG) const;
978
  SDValue lowerVPReverseExperimental(SDValue Op, SelectionDAG &DAG) const;
979
  SDValue lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG) const;
980
  SDValue lowerVPStridedLoad(SDValue Op, SelectionDAG &DAG) const;
981
  SDValue lowerVPStridedStore(SDValue Op, SelectionDAG &DAG) const;
982
  SDValue lowerVPCttzElements(SDValue Op, SelectionDAG &DAG) const;
983
  SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG,
984
                                            unsigned ExtendOpc) const;
985
  SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
986
  SDValue lowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
987

988
  SDValue lowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
989
  SDValue lowerCTLZ_CTTZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
990

991
  SDValue lowerStrictFPExtendOrRoundLike(SDValue Op, SelectionDAG &DAG) const;
992

993
  SDValue lowerVectorStrictFSetcc(SDValue Op, SelectionDAG &DAG) const;
994

995
  SDValue expandUnalignedRVVLoad(SDValue Op, SelectionDAG &DAG) const;
996
  SDValue expandUnalignedRVVStore(SDValue Op, SelectionDAG &DAG) const;
997

998
  bool isEligibleForTailCallOptimization(
999
      CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
1000
      const SmallVector<CCValAssign, 16> &ArgLocs) const;
1001

1002
  /// Generate error diagnostics if any register used by CC has been marked
1003
  /// reserved.
1004
  void validateCCReservedRegs(
1005
      const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
1006
      MachineFunction &MF) const;
1007

1008
  bool useRVVForFixedLengthVectorVT(MVT VT) const;
1009

1010
  MVT getVPExplicitVectorLengthTy() const override;
1011

1012
  bool shouldExpandGetVectorLength(EVT TripCountVT, unsigned VF,
1013
                                   bool IsScalable) const override;
1014

1015
  /// RVV code generation for fixed length vectors does not lower all
1016
  /// BUILD_VECTORs. This makes BUILD_VECTOR legalisation a source of stores to
1017
  /// merge. However, merging them creates a BUILD_VECTOR that is just as
1018
  /// illegal as the original, thus leading to an infinite legalisation loop.
1019
  /// NOTE: Once BUILD_VECTOR can be custom lowered for all legal vector types,
1020
  /// this override can be removed.
1021
  bool mergeStoresAfterLegalization(EVT VT) const override;
1022

1023
  /// Disable normalizing
1024
  /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1025
  /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y))
1026
  /// RISC-V doesn't have flags so it's better to perform the and/or in a GPR.
1027
  bool shouldNormalizeToSelectSequence(LLVMContext &, EVT) const override {
1028
    return false;
1029
  }
1030

1031
  /// For available scheduling models FDIV + two independent FMULs are much
1032
  /// faster than two FDIVs.
1033
  unsigned combineRepeatedFPDivisors() const override;
1034

1035
  SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1036
                        SmallVectorImpl<SDNode *> &Created) const override;
1037

1038
  bool shouldFoldSelectWithSingleBitTest(EVT VT,
1039
                                         const APInt &AndMask) const override;
1040

1041
  unsigned getMinimumJumpTableEntries() const override;
1042

1043
  SDValue emitFlushICache(SelectionDAG &DAG, SDValue InChain, SDValue Start,
1044
                          SDValue End, SDValue Flags, SDLoc DL) const;
1045
};
1046

1047
/// As per the spec, the rules for passing vector arguments are as follows:
1048
///
1049
/// 1. For the first vector mask argument, use v0 to pass it.
1050
/// 2. For vector data arguments or rest vector mask arguments, starting from
1051
/// the v8 register, if a vector register group between v8-v23 that has not been
1052
/// allocated can be found and the first register number is a multiple of LMUL,
1053
/// then allocate this vector register group to the argument and mark these
1054
/// registers as allocated. Otherwise, pass it by reference and are replaced in
1055
/// the argument list with the address.
1056
/// 3. For tuple vector data arguments, starting from the v8 register, if
1057
/// NFIELDS consecutive vector register groups between v8-v23 that have not been
1058
/// allocated can be found and the first register number is a multiple of LMUL,
1059
/// then allocate these vector register groups to the argument and mark these
1060
/// registers as allocated. Otherwise, pass it by reference and are replaced in
1061
/// the argument list with the address.
1062
class RVVArgDispatcher {
1063
public:
1064
  static constexpr unsigned NumArgVRs = 16;
1065

1066
  struct RVVArgInfo {
1067
    unsigned NF;
1068
    MVT VT;
1069
    bool FirstVMask = false;
1070
  };
1071

1072
  template <typename Arg>
1073
  RVVArgDispatcher(const MachineFunction *MF, const RISCVTargetLowering *TLI,
1074
                   ArrayRef<Arg> ArgList)
1075
      : MF(MF), TLI(TLI) {
1076
    constructArgInfos(ArgList);
1077
    compute();
1078
  }
1079

1080
  RVVArgDispatcher() = default;
1081

1082
  MCPhysReg getNextPhysReg();
1083

1084
private:
1085
  SmallVector<RVVArgInfo, 4> RVVArgInfos;
1086
  SmallVector<MCPhysReg, 4> AllocatedPhysRegs;
1087

1088
  const MachineFunction *MF = nullptr;
1089
  const RISCVTargetLowering *TLI = nullptr;
1090

1091
  unsigned CurIdx = 0;
1092

1093
  template <typename Arg> void constructArgInfos(ArrayRef<Arg> Ret);
1094
  void compute();
1095
  void allocatePhysReg(unsigned NF = 1, unsigned LMul = 1,
1096
                       unsigned StartReg = 0);
1097
};
1098

1099
namespace RISCV {
1100

1101
bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1102
              MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1103
              ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1104
              bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1105
              RVVArgDispatcher &RVVDispatcher);
1106

1107
bool CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
1108
                     MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
1109
                     ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
1110
                     bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
1111
                     RVVArgDispatcher &RVVDispatcher);
1112

1113
bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
1114
                  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
1115
                  CCState &State);
1116

1117
ArrayRef<MCPhysReg> getArgGPRs(const RISCVABI::ABI ABI);
1118

1119
} // end namespace RISCV
1120

1121
namespace RISCVVIntrinsicsTable {
1122

1123
struct RISCVVIntrinsicInfo {
1124
  unsigned IntrinsicID;
1125
  uint8_t ScalarOperand;
1126
  uint8_t VLOperand;
1127
  bool hasScalarOperand() const {
1128
    // 0xF is not valid. See NoScalarOperand in IntrinsicsRISCV.td.
1129
    return ScalarOperand != 0xF;
1130
  }
1131
  bool hasVLOperand() const {
1132
    // 0x1F is not valid. See NoVLOperand in IntrinsicsRISCV.td.
1133
    return VLOperand != 0x1F;
1134
  }
1135
};
1136

1137
using namespace RISCV;
1138

1139
#define GET_RISCVVIntrinsicsTable_DECL
1140
#include "RISCVGenSearchableTables.inc"
1141
#undef GET_RISCVVIntrinsicsTable_DECL
1142

1143
} // end namespace RISCVVIntrinsicsTable
1144

1145
} // end namespace llvm
1146

1147
#endif
1148

1149
Product

Resources

Company