Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
35271 views
1
//===-- NVPTXISelLowering.h - NVPTX DAG Lowering Interface ------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
15
#define LLVM_LIB_TARGET_NVPTX_NVPTXISELLOWERING_H
16
17
#include "NVPTX.h"
18
#include "llvm/CodeGen/SelectionDAG.h"
19
#include "llvm/CodeGen/TargetLowering.h"
20
21
namespace llvm {
22
namespace NVPTXISD {
23
enum NodeType : unsigned {
24
// Start the numbering from where ISD NodeType finishes.
25
FIRST_NUMBER = ISD::BUILTIN_OP_END,
26
Wrapper,
27
CALL,
28
RET_GLUE,
29
LOAD_PARAM,
30
DeclareParam,
31
DeclareScalarParam,
32
DeclareRetParam,
33
DeclareRet,
34
DeclareScalarRet,
35
PrintCall,
36
PrintConvergentCall,
37
PrintCallUni,
38
PrintConvergentCallUni,
39
CallArgBegin,
40
CallArg,
41
LastCallArg,
42
CallArgEnd,
43
CallVoid,
44
CallVal,
45
CallSymbol,
46
Prototype,
47
MoveParam,
48
PseudoUseParam,
49
RETURN,
50
CallSeqBegin,
51
CallSeqEnd,
52
CallPrototype,
53
ProxyReg,
54
FUN_SHFL_CLAMP,
55
FUN_SHFR_CLAMP,
56
MUL_WIDE_SIGNED,
57
MUL_WIDE_UNSIGNED,
58
IMAD,
59
SETP_F16X2,
60
SETP_BF16X2,
61
BFE,
62
BFI,
63
PRMT,
64
DYNAMIC_STACKALLOC,
65
Dummy,
66
67
LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE,
68
LoadV4,
69
LDGV2, // LDG.v2
70
LDGV4, // LDG.v4
71
LDUV2, // LDU.v2
72
LDUV4, // LDU.v4
73
StoreV2,
74
StoreV4,
75
LoadParam,
76
LoadParamV2,
77
LoadParamV4,
78
StoreParam,
79
StoreParamV2,
80
StoreParamV4,
81
StoreParamS32, // to sext and store a <32bit value, not used currently
82
StoreParamU32, // to zext and store a <32bit value, not used currently
83
StoreRetval,
84
StoreRetvalV2,
85
StoreRetvalV4,
86
87
// Texture intrinsics
88
Tex1DFloatS32,
89
Tex1DFloatFloat,
90
Tex1DFloatFloatLevel,
91
Tex1DFloatFloatGrad,
92
Tex1DS32S32,
93
Tex1DS32Float,
94
Tex1DS32FloatLevel,
95
Tex1DS32FloatGrad,
96
Tex1DU32S32,
97
Tex1DU32Float,
98
Tex1DU32FloatLevel,
99
Tex1DU32FloatGrad,
100
Tex1DArrayFloatS32,
101
Tex1DArrayFloatFloat,
102
Tex1DArrayFloatFloatLevel,
103
Tex1DArrayFloatFloatGrad,
104
Tex1DArrayS32S32,
105
Tex1DArrayS32Float,
106
Tex1DArrayS32FloatLevel,
107
Tex1DArrayS32FloatGrad,
108
Tex1DArrayU32S32,
109
Tex1DArrayU32Float,
110
Tex1DArrayU32FloatLevel,
111
Tex1DArrayU32FloatGrad,
112
Tex2DFloatS32,
113
Tex2DFloatFloat,
114
Tex2DFloatFloatLevel,
115
Tex2DFloatFloatGrad,
116
Tex2DS32S32,
117
Tex2DS32Float,
118
Tex2DS32FloatLevel,
119
Tex2DS32FloatGrad,
120
Tex2DU32S32,
121
Tex2DU32Float,
122
Tex2DU32FloatLevel,
123
Tex2DU32FloatGrad,
124
Tex2DArrayFloatS32,
125
Tex2DArrayFloatFloat,
126
Tex2DArrayFloatFloatLevel,
127
Tex2DArrayFloatFloatGrad,
128
Tex2DArrayS32S32,
129
Tex2DArrayS32Float,
130
Tex2DArrayS32FloatLevel,
131
Tex2DArrayS32FloatGrad,
132
Tex2DArrayU32S32,
133
Tex2DArrayU32Float,
134
Tex2DArrayU32FloatLevel,
135
Tex2DArrayU32FloatGrad,
136
Tex3DFloatS32,
137
Tex3DFloatFloat,
138
Tex3DFloatFloatLevel,
139
Tex3DFloatFloatGrad,
140
Tex3DS32S32,
141
Tex3DS32Float,
142
Tex3DS32FloatLevel,
143
Tex3DS32FloatGrad,
144
Tex3DU32S32,
145
Tex3DU32Float,
146
Tex3DU32FloatLevel,
147
Tex3DU32FloatGrad,
148
TexCubeFloatFloat,
149
TexCubeFloatFloatLevel,
150
TexCubeS32Float,
151
TexCubeS32FloatLevel,
152
TexCubeU32Float,
153
TexCubeU32FloatLevel,
154
TexCubeArrayFloatFloat,
155
TexCubeArrayFloatFloatLevel,
156
TexCubeArrayS32Float,
157
TexCubeArrayS32FloatLevel,
158
TexCubeArrayU32Float,
159
TexCubeArrayU32FloatLevel,
160
Tld4R2DFloatFloat,
161
Tld4G2DFloatFloat,
162
Tld4B2DFloatFloat,
163
Tld4A2DFloatFloat,
164
Tld4R2DS64Float,
165
Tld4G2DS64Float,
166
Tld4B2DS64Float,
167
Tld4A2DS64Float,
168
Tld4R2DU64Float,
169
Tld4G2DU64Float,
170
Tld4B2DU64Float,
171
Tld4A2DU64Float,
172
TexUnified1DFloatS32,
173
TexUnified1DFloatFloat,
174
TexUnified1DFloatFloatLevel,
175
TexUnified1DFloatFloatGrad,
176
TexUnified1DS32S32,
177
TexUnified1DS32Float,
178
TexUnified1DS32FloatLevel,
179
TexUnified1DS32FloatGrad,
180
TexUnified1DU32S32,
181
TexUnified1DU32Float,
182
TexUnified1DU32FloatLevel,
183
TexUnified1DU32FloatGrad,
184
TexUnified1DArrayFloatS32,
185
TexUnified1DArrayFloatFloat,
186
TexUnified1DArrayFloatFloatLevel,
187
TexUnified1DArrayFloatFloatGrad,
188
TexUnified1DArrayS32S32,
189
TexUnified1DArrayS32Float,
190
TexUnified1DArrayS32FloatLevel,
191
TexUnified1DArrayS32FloatGrad,
192
TexUnified1DArrayU32S32,
193
TexUnified1DArrayU32Float,
194
TexUnified1DArrayU32FloatLevel,
195
TexUnified1DArrayU32FloatGrad,
196
TexUnified2DFloatS32,
197
TexUnified2DFloatFloat,
198
TexUnified2DFloatFloatLevel,
199
TexUnified2DFloatFloatGrad,
200
TexUnified2DS32S32,
201
TexUnified2DS32Float,
202
TexUnified2DS32FloatLevel,
203
TexUnified2DS32FloatGrad,
204
TexUnified2DU32S32,
205
TexUnified2DU32Float,
206
TexUnified2DU32FloatLevel,
207
TexUnified2DU32FloatGrad,
208
TexUnified2DArrayFloatS32,
209
TexUnified2DArrayFloatFloat,
210
TexUnified2DArrayFloatFloatLevel,
211
TexUnified2DArrayFloatFloatGrad,
212
TexUnified2DArrayS32S32,
213
TexUnified2DArrayS32Float,
214
TexUnified2DArrayS32FloatLevel,
215
TexUnified2DArrayS32FloatGrad,
216
TexUnified2DArrayU32S32,
217
TexUnified2DArrayU32Float,
218
TexUnified2DArrayU32FloatLevel,
219
TexUnified2DArrayU32FloatGrad,
220
TexUnified3DFloatS32,
221
TexUnified3DFloatFloat,
222
TexUnified3DFloatFloatLevel,
223
TexUnified3DFloatFloatGrad,
224
TexUnified3DS32S32,
225
TexUnified3DS32Float,
226
TexUnified3DS32FloatLevel,
227
TexUnified3DS32FloatGrad,
228
TexUnified3DU32S32,
229
TexUnified3DU32Float,
230
TexUnified3DU32FloatLevel,
231
TexUnified3DU32FloatGrad,
232
TexUnifiedCubeFloatFloat,
233
TexUnifiedCubeFloatFloatLevel,
234
TexUnifiedCubeS32Float,
235
TexUnifiedCubeS32FloatLevel,
236
TexUnifiedCubeU32Float,
237
TexUnifiedCubeU32FloatLevel,
238
TexUnifiedCubeArrayFloatFloat,
239
TexUnifiedCubeArrayFloatFloatLevel,
240
TexUnifiedCubeArrayS32Float,
241
TexUnifiedCubeArrayS32FloatLevel,
242
TexUnifiedCubeArrayU32Float,
243
TexUnifiedCubeArrayU32FloatLevel,
244
TexUnifiedCubeFloatFloatGrad,
245
TexUnifiedCubeS32FloatGrad,
246
TexUnifiedCubeU32FloatGrad,
247
TexUnifiedCubeArrayFloatFloatGrad,
248
TexUnifiedCubeArrayS32FloatGrad,
249
TexUnifiedCubeArrayU32FloatGrad,
250
Tld4UnifiedR2DFloatFloat,
251
Tld4UnifiedG2DFloatFloat,
252
Tld4UnifiedB2DFloatFloat,
253
Tld4UnifiedA2DFloatFloat,
254
Tld4UnifiedR2DS64Float,
255
Tld4UnifiedG2DS64Float,
256
Tld4UnifiedB2DS64Float,
257
Tld4UnifiedA2DS64Float,
258
Tld4UnifiedR2DU64Float,
259
Tld4UnifiedG2DU64Float,
260
Tld4UnifiedB2DU64Float,
261
Tld4UnifiedA2DU64Float,
262
263
// Surface intrinsics
264
Suld1DI8Clamp,
265
Suld1DI16Clamp,
266
Suld1DI32Clamp,
267
Suld1DI64Clamp,
268
Suld1DV2I8Clamp,
269
Suld1DV2I16Clamp,
270
Suld1DV2I32Clamp,
271
Suld1DV2I64Clamp,
272
Suld1DV4I8Clamp,
273
Suld1DV4I16Clamp,
274
Suld1DV4I32Clamp,
275
276
Suld1DArrayI8Clamp,
277
Suld1DArrayI16Clamp,
278
Suld1DArrayI32Clamp,
279
Suld1DArrayI64Clamp,
280
Suld1DArrayV2I8Clamp,
281
Suld1DArrayV2I16Clamp,
282
Suld1DArrayV2I32Clamp,
283
Suld1DArrayV2I64Clamp,
284
Suld1DArrayV4I8Clamp,
285
Suld1DArrayV4I16Clamp,
286
Suld1DArrayV4I32Clamp,
287
288
Suld2DI8Clamp,
289
Suld2DI16Clamp,
290
Suld2DI32Clamp,
291
Suld2DI64Clamp,
292
Suld2DV2I8Clamp,
293
Suld2DV2I16Clamp,
294
Suld2DV2I32Clamp,
295
Suld2DV2I64Clamp,
296
Suld2DV4I8Clamp,
297
Suld2DV4I16Clamp,
298
Suld2DV4I32Clamp,
299
300
Suld2DArrayI8Clamp,
301
Suld2DArrayI16Clamp,
302
Suld2DArrayI32Clamp,
303
Suld2DArrayI64Clamp,
304
Suld2DArrayV2I8Clamp,
305
Suld2DArrayV2I16Clamp,
306
Suld2DArrayV2I32Clamp,
307
Suld2DArrayV2I64Clamp,
308
Suld2DArrayV4I8Clamp,
309
Suld2DArrayV4I16Clamp,
310
Suld2DArrayV4I32Clamp,
311
312
Suld3DI8Clamp,
313
Suld3DI16Clamp,
314
Suld3DI32Clamp,
315
Suld3DI64Clamp,
316
Suld3DV2I8Clamp,
317
Suld3DV2I16Clamp,
318
Suld3DV2I32Clamp,
319
Suld3DV2I64Clamp,
320
Suld3DV4I8Clamp,
321
Suld3DV4I16Clamp,
322
Suld3DV4I32Clamp,
323
324
Suld1DI8Trap,
325
Suld1DI16Trap,
326
Suld1DI32Trap,
327
Suld1DI64Trap,
328
Suld1DV2I8Trap,
329
Suld1DV2I16Trap,
330
Suld1DV2I32Trap,
331
Suld1DV2I64Trap,
332
Suld1DV4I8Trap,
333
Suld1DV4I16Trap,
334
Suld1DV4I32Trap,
335
336
Suld1DArrayI8Trap,
337
Suld1DArrayI16Trap,
338
Suld1DArrayI32Trap,
339
Suld1DArrayI64Trap,
340
Suld1DArrayV2I8Trap,
341
Suld1DArrayV2I16Trap,
342
Suld1DArrayV2I32Trap,
343
Suld1DArrayV2I64Trap,
344
Suld1DArrayV4I8Trap,
345
Suld1DArrayV4I16Trap,
346
Suld1DArrayV4I32Trap,
347
348
Suld2DI8Trap,
349
Suld2DI16Trap,
350
Suld2DI32Trap,
351
Suld2DI64Trap,
352
Suld2DV2I8Trap,
353
Suld2DV2I16Trap,
354
Suld2DV2I32Trap,
355
Suld2DV2I64Trap,
356
Suld2DV4I8Trap,
357
Suld2DV4I16Trap,
358
Suld2DV4I32Trap,
359
360
Suld2DArrayI8Trap,
361
Suld2DArrayI16Trap,
362
Suld2DArrayI32Trap,
363
Suld2DArrayI64Trap,
364
Suld2DArrayV2I8Trap,
365
Suld2DArrayV2I16Trap,
366
Suld2DArrayV2I32Trap,
367
Suld2DArrayV2I64Trap,
368
Suld2DArrayV4I8Trap,
369
Suld2DArrayV4I16Trap,
370
Suld2DArrayV4I32Trap,
371
372
Suld3DI8Trap,
373
Suld3DI16Trap,
374
Suld3DI32Trap,
375
Suld3DI64Trap,
376
Suld3DV2I8Trap,
377
Suld3DV2I16Trap,
378
Suld3DV2I32Trap,
379
Suld3DV2I64Trap,
380
Suld3DV4I8Trap,
381
Suld3DV4I16Trap,
382
Suld3DV4I32Trap,
383
384
Suld1DI8Zero,
385
Suld1DI16Zero,
386
Suld1DI32Zero,
387
Suld1DI64Zero,
388
Suld1DV2I8Zero,
389
Suld1DV2I16Zero,
390
Suld1DV2I32Zero,
391
Suld1DV2I64Zero,
392
Suld1DV4I8Zero,
393
Suld1DV4I16Zero,
394
Suld1DV4I32Zero,
395
396
Suld1DArrayI8Zero,
397
Suld1DArrayI16Zero,
398
Suld1DArrayI32Zero,
399
Suld1DArrayI64Zero,
400
Suld1DArrayV2I8Zero,
401
Suld1DArrayV2I16Zero,
402
Suld1DArrayV2I32Zero,
403
Suld1DArrayV2I64Zero,
404
Suld1DArrayV4I8Zero,
405
Suld1DArrayV4I16Zero,
406
Suld1DArrayV4I32Zero,
407
408
Suld2DI8Zero,
409
Suld2DI16Zero,
410
Suld2DI32Zero,
411
Suld2DI64Zero,
412
Suld2DV2I8Zero,
413
Suld2DV2I16Zero,
414
Suld2DV2I32Zero,
415
Suld2DV2I64Zero,
416
Suld2DV4I8Zero,
417
Suld2DV4I16Zero,
418
Suld2DV4I32Zero,
419
420
Suld2DArrayI8Zero,
421
Suld2DArrayI16Zero,
422
Suld2DArrayI32Zero,
423
Suld2DArrayI64Zero,
424
Suld2DArrayV2I8Zero,
425
Suld2DArrayV2I16Zero,
426
Suld2DArrayV2I32Zero,
427
Suld2DArrayV2I64Zero,
428
Suld2DArrayV4I8Zero,
429
Suld2DArrayV4I16Zero,
430
Suld2DArrayV4I32Zero,
431
432
Suld3DI8Zero,
433
Suld3DI16Zero,
434
Suld3DI32Zero,
435
Suld3DI64Zero,
436
Suld3DV2I8Zero,
437
Suld3DV2I16Zero,
438
Suld3DV2I32Zero,
439
Suld3DV2I64Zero,
440
Suld3DV4I8Zero,
441
Suld3DV4I16Zero,
442
Suld3DV4I32Zero
443
};
444
}
445
446
class NVPTXSubtarget;
447
448
//===--------------------------------------------------------------------===//
449
// TargetLowering Implementation
450
//===--------------------------------------------------------------------===//
451
class NVPTXTargetLowering : public TargetLowering {
452
public:
453
explicit NVPTXTargetLowering(const NVPTXTargetMachine &TM,
454
const NVPTXSubtarget &STI);
455
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
456
457
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
458
459
const char *getTargetNodeName(unsigned Opcode) const override;
460
461
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
462
MachineFunction &MF,
463
unsigned Intrinsic) const override;
464
465
Align getFunctionArgumentAlignment(const Function *F, Type *Ty, unsigned Idx,
466
const DataLayout &DL) const;
467
468
/// getFunctionParamOptimizedAlign - since function arguments are passed via
469
/// .param space, we may want to increase their alignment in a way that
470
/// ensures that we can effectively vectorize their loads & stores. We can
471
/// increase alignment only if the function has internal or has private
472
/// linkage as for other linkage types callers may already rely on default
473
/// alignment. To allow using 128-bit vectorized loads/stores, this function
474
/// ensures that alignment is 16 or greater.
475
Align getFunctionParamOptimizedAlign(const Function *F, Type *ArgTy,
476
const DataLayout &DL) const;
477
478
/// Helper for computing alignment of a device function byval parameter.
479
Align getFunctionByValParamAlign(const Function *F, Type *ArgTy,
480
Align InitialAlign,
481
const DataLayout &DL) const;
482
483
// Helper for getting a function parameter name. Name is composed from
484
// its index and the function name. Negative index corresponds to special
485
// parameter (unsized array) used for passing variable arguments.
486
std::string getParamName(const Function *F, int Idx) const;
487
488
/// isLegalAddressingMode - Return true if the addressing mode represented
489
/// by AM is legal for this target, for a load/store of the specified type
490
/// Used to guide target specific optimizations, like loop strength
491
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
492
/// address mode (CodeGenPrepare.cpp)
493
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
494
unsigned AS,
495
Instruction *I = nullptr) const override;
496
497
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
498
// Truncating 64-bit to 32-bit is free in SASS.
499
if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
500
return false;
501
return SrcTy->getPrimitiveSizeInBits() == 64 &&
502
DstTy->getPrimitiveSizeInBits() == 32;
503
}
504
505
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
506
EVT VT) const override {
507
if (VT.isVector())
508
return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
509
return MVT::i1;
510
}
511
512
ConstraintType getConstraintType(StringRef Constraint) const override;
513
std::pair<unsigned, const TargetRegisterClass *>
514
getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
515
StringRef Constraint, MVT VT) const override;
516
517
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
518
bool isVarArg,
519
const SmallVectorImpl<ISD::InputArg> &Ins,
520
const SDLoc &dl, SelectionDAG &DAG,
521
SmallVectorImpl<SDValue> &InVals) const override;
522
523
SDValue LowerCall(CallLoweringInfo &CLI,
524
SmallVectorImpl<SDValue> &InVals) const override;
525
526
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
527
528
std::string
529
getPrototype(const DataLayout &DL, Type *, const ArgListTy &,
530
const SmallVectorImpl<ISD::OutputArg> &, MaybeAlign retAlignment,
531
std::optional<std::pair<unsigned, const APInt &>> VAInfo,
532
const CallBase &CB, unsigned UniqueCallSite) const;
533
534
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
535
const SmallVectorImpl<ISD::OutputArg> &Outs,
536
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
537
SelectionDAG &DAG) const override;
538
539
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
540
std::vector<SDValue> &Ops,
541
SelectionDAG &DAG) const override;
542
543
const NVPTXTargetMachine *nvTM;
544
545
// PTX always uses 32-bit shift amounts
546
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
547
return MVT::i32;
548
}
549
550
TargetLoweringBase::LegalizeTypeAction
551
getPreferredVectorAction(MVT VT) const override;
552
553
// Get the degree of precision we want from 32-bit floating point division
554
// operations.
555
//
556
// 0 - Use ptx div.approx
557
// 1 - Use ptx.div.full (approximate, but less so than div.approx)
558
// 2 - Use IEEE-compliant div instructions, if available.
559
int getDivF32Level() const;
560
561
// Get whether we should use a precise or approximate 32-bit floating point
562
// sqrt instruction.
563
bool usePrecSqrtF32() const;
564
565
// Get whether we should use instructions that flush floating-point denormals
566
// to sign-preserving zero.
567
bool useF32FTZ(const MachineFunction &MF) const;
568
569
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
570
int &ExtraSteps, bool &UseOneConst,
571
bool Reciprocal) const override;
572
573
unsigned combineRepeatedFPDivisors() const override { return 2; }
574
575
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
576
bool allowUnsafeFPMath(MachineFunction &MF) const;
577
578
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
579
EVT) const override {
580
return true;
581
}
582
583
bool enableAggressiveFMAFusion(EVT VT) const override { return true; }
584
585
// The default is to transform llvm.ctlz(x, false) (where false indicates that
586
// x == 0 is not undefined behavior) into a branch that checks whether x is 0
587
// and avoids calling ctlz in that case. We have a dedicated ctlz
588
// instruction, so we say that ctlz is cheap to speculate.
589
bool isCheapToSpeculateCtlz(Type *Ty) const override { return true; }
590
591
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
592
return AtomicExpansionKind::None;
593
}
594
595
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
596
return AtomicExpansionKind::None;
597
}
598
599
AtomicExpansionKind
600
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
601
602
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override {
603
// There's rarely any point of packing something into a vector type if we
604
// already have the source data.
605
return true;
606
}
607
608
private:
609
const NVPTXSubtarget &STI; // cache the subtarget here
610
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT) const;
611
612
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
613
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
614
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
615
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
616
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
617
618
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
619
SDValue LowerFROUND32(SDValue Op, SelectionDAG &DAG) const;
620
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
621
622
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
623
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
624
625
SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
626
SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
627
628
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
629
SDValue LowerLOADi1(SDValue Op, SelectionDAG &DAG) const;
630
631
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
632
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
633
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
634
635
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
636
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
637
638
SDValue LowerSelect(SDValue Op, SelectionDAG &DAG) const;
639
640
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
641
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
642
643
SDValue LowerCopyToReg_128(SDValue Op, SelectionDAG &DAG) const;
644
unsigned getNumRegisters(LLVMContext &Context, EVT VT,
645
std::optional<MVT> RegisterVT) const override;
646
bool
647
splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
648
SDValue *Parts, unsigned NumParts, MVT PartVT,
649
std::optional<CallingConv::ID> CC) const override;
650
651
void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
652
SelectionDAG &DAG) const override;
653
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
654
655
Align getArgumentAlignment(const CallBase *CB, Type *Ty, unsigned Idx,
656
const DataLayout &DL) const;
657
};
658
659
} // namespace llvm
660
661
#endif
662
663