Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
35269 views
1
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// Interface definition of the TargetLowering class that is common
11
/// to all AMD GPUs.
12
//
13
//===----------------------------------------------------------------------===//
14
15
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
16
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
17
18
#include "llvm/CodeGen/CallingConvLower.h"
19
#include "llvm/CodeGen/TargetLowering.h"
20
21
namespace llvm {
22
23
class AMDGPUMachineFunction;
24
class AMDGPUSubtarget;
25
struct ArgDescriptor;
26
27
class AMDGPUTargetLowering : public TargetLowering {
28
private:
29
const AMDGPUSubtarget *Subtarget;
30
31
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
32
/// legalized from a smaller type VT. Need to match pre-legalized type because
33
/// the generic legalization inserts the add/sub between the select and
34
/// compare.
35
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
36
37
public:
38
/// \returns The minimum number of bits needed to store the value of \Op as an
39
/// unsigned integer. Truncating to this size and then zero-extending to the
40
/// original size will not change the value.
41
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
42
43
/// \returns The minimum number of bits needed to store the value of \Op as a
44
/// signed integer. Truncating to this size and then sign-extending to the
45
/// original size will not change the value.
46
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
47
48
protected:
49
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
50
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
51
/// Split a vector store into multiple scalar stores.
52
/// \returns The resulting chain.
53
54
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
55
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
56
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
57
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
58
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
59
60
SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const;
61
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
62
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
63
64
static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags);
65
static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src,
66
SDNodeFlags Flags);
67
SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op,
68
SDNodeFlags Flags) const;
69
SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const;
70
std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG,
71
const SDLoc SL, SDValue Op,
72
SDNodeFlags Flags) const;
73
74
SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const;
75
SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const;
76
SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const;
77
SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
78
bool IsLog10, SDNodeFlags Flags) const;
79
SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const;
80
81
SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
82
SDNodeFlags Flags) const;
83
SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG,
84
SDNodeFlags Flags) const;
85
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
86
87
SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const;
88
89
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
90
91
SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
92
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
93
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
94
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
95
96
SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
97
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
98
SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
99
100
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
101
102
protected:
103
bool shouldCombineMemoryType(EVT VT) const;
104
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
105
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
106
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
107
SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const;
108
109
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
110
unsigned Opc, SDValue LHS,
111
uint32_t ValLo, uint32_t ValHi) const;
112
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
113
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
114
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
115
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
116
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
117
SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const;
118
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
119
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
120
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
121
SDValue RHS, DAGCombinerInfo &DCI) const;
122
123
SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI,
124
SDValue N) const;
125
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
126
127
TargetLowering::NegatibleCost
128
getConstantNegateCost(const ConstantFPSDNode *C) const;
129
130
bool isConstantCostlierToNegate(SDValue N) const;
131
bool isConstantCheaperToNegate(SDValue N) const;
132
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
133
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
134
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
135
136
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
137
138
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
139
SelectionDAG &DAG) const;
140
141
/// Return 64-bit value Op as two 32-bit integers.
142
std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
143
SelectionDAG &DAG) const;
144
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
145
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
146
147
/// Split a vector type into two parts. The first part is a power of two
148
/// vector. The second part is whatever is left over, and is a scalar if it
149
/// would otherwise be a 1-vector.
150
std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const;
151
152
/// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
153
/// scalar.
154
std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL,
155
const EVT &LoVT, const EVT &HighVT,
156
SelectionDAG &DAG) const;
157
158
/// Split a vector load into 2 loads of half the vector.
159
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
160
161
/// Widen a suitably aligned v3 load. For all other cases, split the input
162
/// vector load.
163
SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
164
165
/// Split a vector store into 2 stores of half the vector.
166
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
167
168
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
169
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
170
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
171
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
172
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
173
SmallVectorImpl<SDValue> &Results) const;
174
175
void analyzeFormalArgumentsCompute(
176
CCState &State,
177
const SmallVectorImpl<ISD::InputArg> &Ins) const;
178
179
public:
180
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
181
182
bool mayIgnoreSignedZero(SDValue Op) const;
183
184
static inline SDValue stripBitcast(SDValue Val) {
185
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
186
}
187
188
static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc);
189
static bool allUsesHaveSourceMods(const SDNode *N,
190
unsigned CostThreshold = 4);
191
bool isFAbsFree(EVT VT) const override;
192
bool isFNegFree(EVT VT) const override;
193
bool isTruncateFree(EVT Src, EVT Dest) const override;
194
bool isTruncateFree(Type *Src, Type *Dest) const override;
195
196
bool isZExtFree(Type *Src, Type *Dest) const override;
197
bool isZExtFree(EVT Src, EVT Dest) const override;
198
199
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
200
bool LegalOperations, bool ForCodeSize,
201
NegatibleCost &Cost,
202
unsigned Depth) const override;
203
204
bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
205
206
bool isDesirableToCommuteWithShift(const SDNode *N,
207
CombineLevel Level) const override;
208
209
EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
210
ISD::NodeType ExtendKind) const override;
211
212
MVT getVectorIdxTy(const DataLayout &) const override;
213
bool isSelectSupported(SelectSupportKind) const override;
214
215
bool isFPImmLegal(const APFloat &Imm, EVT VT,
216
bool ForCodeSize) const override;
217
bool ShouldShrinkFPConstant(EVT VT) const override;
218
bool shouldReduceLoadWidth(SDNode *Load,
219
ISD::LoadExtType ExtType,
220
EVT ExtVT) const override;
221
222
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
223
const MachineMemOperand &MMO) const final;
224
225
bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT,
226
unsigned NumElem,
227
unsigned AS) const override;
228
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
229
bool isCheapToSpeculateCttz(Type *Ty) const override;
230
bool isCheapToSpeculateCtlz(Type *Ty) const override;
231
232
bool isSDNodeAlwaysUniform(const SDNode *N) const override;
233
234
// FIXME: This hook should not exist
235
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override {
236
return AtomicExpansionKind::None;
237
}
238
239
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override {
240
return AtomicExpansionKind::None;
241
}
242
243
AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override {
244
return AtomicExpansionKind::None;
245
}
246
247
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
248
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
249
250
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
251
const SmallVectorImpl<ISD::OutputArg> &Outs,
252
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
253
SelectionDAG &DAG) const override;
254
255
SDValue addTokenForArgument(SDValue Chain,
256
SelectionDAG &DAG,
257
MachineFrameInfo &MFI,
258
int ClobberedFI) const;
259
260
SDValue lowerUnhandledCall(CallLoweringInfo &CLI,
261
SmallVectorImpl<SDValue> &InVals,
262
StringRef Reason) const;
263
SDValue LowerCall(CallLoweringInfo &CLI,
264
SmallVectorImpl<SDValue> &InVals) const override;
265
266
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
267
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
268
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
269
void ReplaceNodeResults(SDNode * N,
270
SmallVectorImpl<SDValue> &Results,
271
SelectionDAG &DAG) const override;
272
273
SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS,
274
SDValue RHS, SDValue True, SDValue False,
275
SDValue CC, DAGCombinerInfo &DCI) const;
276
277
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
278
SDValue RHS, SDValue True, SDValue False,
279
SDValue CC, DAGCombinerInfo &DCI) const;
280
281
const char* getTargetNodeName(unsigned Opcode) const override;
282
283
// FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
284
// AMDGPU. Commit r319036,
285
// (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
286
// turned on MergeConsecutiveStores() before Instruction Selection for all
287
// targets. Enough AMDGPU compiles go into an infinite loop (
288
// MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
289
// MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
290
// now.
291
bool mergeStoresAfterLegalization(EVT) const override { return false; }
292
293
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
294
return true;
295
}
296
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
297
int &RefinementSteps, bool &UseOneConstNR,
298
bool Reciprocal) const override;
299
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
300
int &RefinementSteps) const override;
301
302
virtual SDNode *PostISelFolding(MachineSDNode *N,
303
SelectionDAG &DAG) const = 0;
304
305
/// Determine which of the bits specified in \p Mask are known to be
306
/// either zero or one and return them in the \p KnownZero and \p KnownOne
307
/// bitsets.
308
void computeKnownBitsForTargetNode(const SDValue Op,
309
KnownBits &Known,
310
const APInt &DemandedElts,
311
const SelectionDAG &DAG,
312
unsigned Depth = 0) const override;
313
314
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
315
const SelectionDAG &DAG,
316
unsigned Depth = 0) const override;
317
318
unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
319
Register R,
320
const APInt &DemandedElts,
321
const MachineRegisterInfo &MRI,
322
unsigned Depth = 0) const override;
323
324
bool isKnownNeverNaNForTargetNode(SDValue Op,
325
const SelectionDAG &DAG,
326
bool SNaN = false,
327
unsigned Depth = 0) const override;
328
329
bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0,
330
Register N1) const override;
331
332
/// Helper function that adds Reg to the LiveIn list of the DAG's
333
/// MachineFunction.
334
///
335
/// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
336
/// a copy from the register.
337
SDValue CreateLiveInRegister(SelectionDAG &DAG,
338
const TargetRegisterClass *RC,
339
Register Reg, EVT VT,
340
const SDLoc &SL,
341
bool RawReg = false) const;
342
SDValue CreateLiveInRegister(SelectionDAG &DAG,
343
const TargetRegisterClass *RC,
344
Register Reg, EVT VT) const {
345
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
346
}
347
348
// Returns the raw live in register rather than a copy from it.
349
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
350
const TargetRegisterClass *RC,
351
Register Reg, EVT VT) const {
352
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
353
}
354
355
/// Similar to CreateLiveInRegister, except value maybe loaded from a stack
356
/// slot rather than passed in a register.
357
SDValue loadStackInputValue(SelectionDAG &DAG,
358
EVT VT,
359
const SDLoc &SL,
360
int64_t Offset) const;
361
362
SDValue storeStackInputValue(SelectionDAG &DAG,
363
const SDLoc &SL,
364
SDValue Chain,
365
SDValue ArgVal,
366
int64_t Offset) const;
367
368
SDValue loadInputValue(SelectionDAG &DAG,
369
const TargetRegisterClass *RC,
370
EVT VT, const SDLoc &SL,
371
const ArgDescriptor &Arg) const;
372
373
enum ImplicitParameter {
374
FIRST_IMPLICIT,
375
PRIVATE_BASE,
376
SHARED_BASE,
377
QUEUE_PTR,
378
};
379
380
/// Helper function that returns the byte offset of the given
381
/// type of implicit parameter.
382
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
383
const ImplicitParameter Param) const;
384
uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize,
385
const ImplicitParameter Param) const;
386
387
MVT getFenceOperandTy(const DataLayout &DL) const override {
388
return MVT::i32;
389
}
390
391
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
392
393
bool shouldSinkOperands(Instruction *I,
394
SmallVectorImpl<Use *> &Ops) const override;
395
};
396
397
namespace AMDGPUISD {
398
399
enum NodeType : unsigned {
400
// AMDIL ISD Opcodes
401
FIRST_NUMBER = ISD::BUILTIN_OP_END,
402
UMUL, // 32bit unsigned multiplication
403
BRANCH_COND,
404
// End AMDIL ISD Opcodes
405
406
// Function call.
407
CALL,
408
TC_RETURN,
409
TC_RETURN_GFX,
410
TC_RETURN_CHAIN,
411
TRAP,
412
413
// Masked control flow nodes.
414
IF,
415
ELSE,
416
LOOP,
417
418
// A uniform kernel return that terminates the wavefront.
419
ENDPGM,
420
421
// s_endpgm, but we may want to insert it in the middle of the block.
422
ENDPGM_TRAP,
423
424
// "s_trap 2" equivalent on hardware that does not support it.
425
SIMULATED_TRAP,
426
427
// Return to a shader part's epilog code.
428
RETURN_TO_EPILOG,
429
430
// Return with values from a non-entry function.
431
RET_GLUE,
432
433
// Convert a unswizzled wave uniform stack address to an address compatible
434
// with a vector offset for use in stack access.
435
WAVE_ADDRESS,
436
437
DWORDADDR,
438
FRACT,
439
440
/// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
441
/// modifier behavior with dx10_enable.
442
CLAMP,
443
444
// This is SETCC with the full mask result which is used for a compare with a
445
// result bit per item in the wavefront.
446
SETCC,
447
SETREG,
448
449
DENORM_MODE,
450
451
// FP ops with input and output chain.
452
FMA_W_CHAIN,
453
FMUL_W_CHAIN,
454
455
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
456
// Denormals handled on some parts.
457
COS_HW,
458
SIN_HW,
459
FMAX_LEGACY,
460
FMIN_LEGACY,
461
462
FMAX3,
463
SMAX3,
464
UMAX3,
465
FMIN3,
466
SMIN3,
467
UMIN3,
468
FMED3,
469
SMED3,
470
UMED3,
471
FMAXIMUM3,
472
FMINIMUM3,
473
FDOT2,
474
URECIP,
475
DIV_SCALE,
476
DIV_FMAS,
477
DIV_FIXUP,
478
// For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
479
// treated as an illegal operation.
480
FMAD_FTZ,
481
482
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
483
// For f64, max error 2^29 ULP, handles denormals.
484
RCP,
485
RSQ,
486
RCP_LEGACY,
487
RCP_IFLAG,
488
489
// log2, no denormal handling for f32.
490
LOG,
491
492
// exp2, no denormal handling for f32.
493
EXP,
494
495
FMUL_LEGACY,
496
RSQ_CLAMP,
497
FP_CLASS,
498
DOT4,
499
CARRY,
500
BORROW,
501
BFE_U32, // Extract range of bits with zero extension to 32-bits.
502
BFE_I32, // Extract range of bits with sign extension to 32-bits.
503
BFI, // (src0 & src1) | (~src0 & src2)
504
BFM, // Insert a range of bits into a 32-bit word.
505
FFBH_U32, // ctlz with -1 if input is zero.
506
FFBH_I32,
507
FFBL_B32, // cttz with -1 if input is zero.
508
MUL_U24,
509
MUL_I24,
510
MULHI_U24,
511
MULHI_I24,
512
MAD_U24,
513
MAD_I24,
514
MAD_U64_U32,
515
MAD_I64_I32,
516
PERM,
517
TEXTURE_FETCH,
518
R600_EXPORT,
519
CONST_ADDRESS,
520
REGISTER_LOAD,
521
REGISTER_STORE,
522
SAMPLE,
523
SAMPLEB,
524
SAMPLED,
525
SAMPLEL,
526
527
// These cvt_f32_ubyte* nodes need to remain consecutive and in order.
528
CVT_F32_UBYTE0,
529
CVT_F32_UBYTE1,
530
CVT_F32_UBYTE2,
531
CVT_F32_UBYTE3,
532
533
// Convert two float 32 numbers into a single register holding two packed f16
534
// with round to zero.
535
CVT_PKRTZ_F16_F32,
536
CVT_PKNORM_I16_F32,
537
CVT_PKNORM_U16_F32,
538
CVT_PK_I16_I32,
539
CVT_PK_U16_U32,
540
541
// Same as the standard node, except the high bits of the resulting integer
542
// are known 0.
543
FP_TO_FP16,
544
545
/// This node is for VLIW targets and it is used to represent a vector
546
/// that is stored in consecutive registers with the same channel.
547
/// For example:
548
/// |X |Y|Z|W|
549
/// T0|v.x| | | |
550
/// T1|v.y| | | |
551
/// T2|v.z| | | |
552
/// T3|v.w| | | |
553
BUILD_VERTICAL_VECTOR,
554
/// Pointer to the start of the shader's constant data.
555
CONST_DATA_PTR,
556
PC_ADD_REL_OFFSET,
557
LDS,
558
FPTRUNC_ROUND_UPWARD,
559
FPTRUNC_ROUND_DOWNWARD,
560
561
DUMMY_CHAIN,
562
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
563
LOAD_D16_HI,
564
LOAD_D16_LO,
565
LOAD_D16_HI_I8,
566
LOAD_D16_HI_U8,
567
LOAD_D16_LO_I8,
568
LOAD_D16_LO_U8,
569
570
STORE_MSKOR,
571
LOAD_CONSTANT,
572
TBUFFER_STORE_FORMAT,
573
TBUFFER_STORE_FORMAT_D16,
574
TBUFFER_LOAD_FORMAT,
575
TBUFFER_LOAD_FORMAT_D16,
576
DS_ORDERED_COUNT,
577
ATOMIC_CMP_SWAP,
578
BUFFER_LOAD,
579
BUFFER_LOAD_UBYTE,
580
BUFFER_LOAD_USHORT,
581
BUFFER_LOAD_BYTE,
582
BUFFER_LOAD_SHORT,
583
BUFFER_LOAD_TFE,
584
BUFFER_LOAD_UBYTE_TFE,
585
BUFFER_LOAD_USHORT_TFE,
586
BUFFER_LOAD_BYTE_TFE,
587
BUFFER_LOAD_SHORT_TFE,
588
BUFFER_LOAD_FORMAT,
589
BUFFER_LOAD_FORMAT_TFE,
590
BUFFER_LOAD_FORMAT_D16,
591
SBUFFER_LOAD,
592
SBUFFER_LOAD_BYTE,
593
SBUFFER_LOAD_UBYTE,
594
SBUFFER_LOAD_SHORT,
595
SBUFFER_LOAD_USHORT,
596
BUFFER_STORE,
597
BUFFER_STORE_BYTE,
598
BUFFER_STORE_SHORT,
599
BUFFER_STORE_FORMAT,
600
BUFFER_STORE_FORMAT_D16,
601
BUFFER_ATOMIC_SWAP,
602
BUFFER_ATOMIC_ADD,
603
BUFFER_ATOMIC_SUB,
604
BUFFER_ATOMIC_SMIN,
605
BUFFER_ATOMIC_UMIN,
606
BUFFER_ATOMIC_SMAX,
607
BUFFER_ATOMIC_UMAX,
608
BUFFER_ATOMIC_AND,
609
BUFFER_ATOMIC_OR,
610
BUFFER_ATOMIC_XOR,
611
BUFFER_ATOMIC_INC,
612
BUFFER_ATOMIC_DEC,
613
BUFFER_ATOMIC_CMPSWAP,
614
BUFFER_ATOMIC_CSUB,
615
BUFFER_ATOMIC_FADD,
616
BUFFER_ATOMIC_FMIN,
617
BUFFER_ATOMIC_FMAX,
618
BUFFER_ATOMIC_COND_SUB_U32,
619
620
LAST_AMDGPU_ISD_NUMBER
621
};
622
623
} // End namespace AMDGPUISD
624
625
} // End namespace llvm
626
627
#endif
628
629