Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
35267 views
1
//=- LoongArchISelLowering.cpp - LoongArch DAG Lowering Implementation ---===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines the interfaces that LoongArch uses to lower LLVM code into
10
// a selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "LoongArchISelLowering.h"
15
#include "LoongArch.h"
16
#include "LoongArchMachineFunctionInfo.h"
17
#include "LoongArchRegisterInfo.h"
18
#include "LoongArchSubtarget.h"
19
#include "LoongArchTargetMachine.h"
20
#include "MCTargetDesc/LoongArchBaseInfo.h"
21
#include "MCTargetDesc/LoongArchMCTargetDesc.h"
22
#include "llvm/ADT/Statistic.h"
23
#include "llvm/ADT/StringExtras.h"
24
#include "llvm/CodeGen/ISDOpcodes.h"
25
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
26
#include "llvm/CodeGen/SelectionDAGNodes.h"
27
#include "llvm/IR/IRBuilder.h"
28
#include "llvm/IR/IntrinsicsLoongArch.h"
29
#include "llvm/Support/CodeGen.h"
30
#include "llvm/Support/Debug.h"
31
#include "llvm/Support/ErrorHandling.h"
32
#include "llvm/Support/KnownBits.h"
33
#include "llvm/Support/MathExtras.h"
34
35
using namespace llvm;
36
37
#define DEBUG_TYPE "loongarch-isel-lowering"
38
39
STATISTIC(NumTailCalls, "Number of tail calls");
40
41
static cl::opt<bool> ZeroDivCheck("loongarch-check-zero-division", cl::Hidden,
42
cl::desc("Trap on integer division by zero."),
43
cl::init(false));
44
45
LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
46
const LoongArchSubtarget &STI)
47
: TargetLowering(TM), Subtarget(STI) {
48
49
MVT GRLenVT = Subtarget.getGRLenVT();
50
51
// Set up the register classes.
52
53
addRegisterClass(GRLenVT, &LoongArch::GPRRegClass);
54
if (Subtarget.hasBasicF())
55
addRegisterClass(MVT::f32, &LoongArch::FPR32RegClass);
56
if (Subtarget.hasBasicD())
57
addRegisterClass(MVT::f64, &LoongArch::FPR64RegClass);
58
59
static const MVT::SimpleValueType LSXVTs[] = {
60
MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32, MVT::v2f64};
61
static const MVT::SimpleValueType LASXVTs[] = {
62
MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, MVT::v8f32, MVT::v4f64};
63
64
if (Subtarget.hasExtLSX())
65
for (MVT VT : LSXVTs)
66
addRegisterClass(VT, &LoongArch::LSX128RegClass);
67
68
if (Subtarget.hasExtLASX())
69
for (MVT VT : LASXVTs)
70
addRegisterClass(VT, &LoongArch::LASX256RegClass);
71
72
// Set operations for LA32 and LA64.
73
74
setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, GRLenVT,
75
MVT::i1, Promote);
76
77
setOperationAction(ISD::SHL_PARTS, GRLenVT, Custom);
78
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
79
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
80
setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
81
setOperationAction(ISD::ROTL, GRLenVT, Expand);
82
setOperationAction(ISD::CTPOP, GRLenVT, Expand);
83
84
setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
85
ISD::JumpTable, ISD::GlobalTLSAddress},
86
GRLenVT, Custom);
87
88
setOperationAction(ISD::EH_DWARF_CFA, GRLenVT, Custom);
89
90
setOperationAction(ISD::DYNAMIC_STACKALLOC, GRLenVT, Expand);
91
setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
92
setOperationAction(ISD::VASTART, MVT::Other, Custom);
93
setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
94
95
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
96
setOperationAction(ISD::TRAP, MVT::Other, Legal);
97
98
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
99
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
100
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
101
102
// Expand bitreverse.i16 with native-width bitrev and shift for now, before
103
// we get to know which of sll and revb.2h is faster.
104
setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
105
setOperationAction(ISD::BITREVERSE, GRLenVT, Legal);
106
107
// LA32 does not have REVB.2W and REVB.D due to the 64-bit operands, and
108
// the narrower REVB.W does not exist. But LA32 does have REVB.2H, so i16
109
// and i32 could still be byte-swapped relatively cheaply.
110
setOperationAction(ISD::BSWAP, MVT::i16, Custom);
111
112
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
113
setOperationAction(ISD::BR_CC, GRLenVT, Expand);
114
setOperationAction(ISD::SELECT_CC, GRLenVT, Expand);
115
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
116
setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, GRLenVT, Expand);
117
118
setOperationAction(ISD::FP_TO_UINT, GRLenVT, Custom);
119
setOperationAction(ISD::UINT_TO_FP, GRLenVT, Expand);
120
121
// Set operations for LA64 only.
122
123
if (Subtarget.is64Bit()) {
124
setOperationAction(ISD::ADD, MVT::i32, Custom);
125
setOperationAction(ISD::SUB, MVT::i32, Custom);
126
setOperationAction(ISD::SHL, MVT::i32, Custom);
127
setOperationAction(ISD::SRA, MVT::i32, Custom);
128
setOperationAction(ISD::SRL, MVT::i32, Custom);
129
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
130
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
131
setOperationAction(ISD::ROTR, MVT::i32, Custom);
132
setOperationAction(ISD::ROTL, MVT::i32, Custom);
133
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
134
setOperationAction(ISD::CTLZ, MVT::i32, Custom);
135
setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
136
setOperationAction(ISD::READ_REGISTER, MVT::i32, Custom);
137
setOperationAction(ISD::WRITE_REGISTER, MVT::i32, Custom);
138
setOperationAction(ISD::INTRINSIC_VOID, MVT::i32, Custom);
139
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
140
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
141
142
setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
143
setOperationAction(ISD::BSWAP, MVT::i32, Custom);
144
setOperationAction({ISD::UDIV, ISD::UREM}, MVT::i32, Custom);
145
}
146
147
// Set operations for LA32 only.
148
149
if (!Subtarget.is64Bit()) {
150
setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
151
setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
152
setOperationAction(ISD::INTRINSIC_VOID, MVT::i64, Custom);
153
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
154
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
155
}
156
157
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
158
159
static const ISD::CondCode FPCCToExpand[] = {
160
ISD::SETOGT, ISD::SETOGE, ISD::SETUGT, ISD::SETUGE,
161
ISD::SETGE, ISD::SETNE, ISD::SETGT};
162
163
// Set operations for 'F' feature.
164
165
if (Subtarget.hasBasicF()) {
166
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
167
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
168
setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
169
170
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
171
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
172
setOperationAction(ISD::FMA, MVT::f32, Legal);
173
setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
174
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
175
setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
176
setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
177
setOperationAction(ISD::IS_FPCLASS, MVT::f32, Legal);
178
setOperationAction(ISD::FSIN, MVT::f32, Expand);
179
setOperationAction(ISD::FCOS, MVT::f32, Expand);
180
setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
181
setOperationAction(ISD::FPOW, MVT::f32, Expand);
182
setOperationAction(ISD::FREM, MVT::f32, Expand);
183
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
184
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
185
186
if (Subtarget.is64Bit())
187
setOperationAction(ISD::FRINT, MVT::f32, Legal);
188
189
if (!Subtarget.hasBasicD()) {
190
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
191
if (Subtarget.is64Bit()) {
192
setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
193
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
194
}
195
}
196
}
197
198
// Set operations for 'D' feature.
199
200
if (Subtarget.hasBasicD()) {
201
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
202
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
203
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
204
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
205
setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
206
207
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
208
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
209
setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
210
setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
211
setOperationAction(ISD::FMA, MVT::f64, Legal);
212
setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
213
setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
214
setOperationAction(ISD::IS_FPCLASS, MVT::f64, Legal);
215
setOperationAction(ISD::FSIN, MVT::f64, Expand);
216
setOperationAction(ISD::FCOS, MVT::f64, Expand);
217
setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
218
setOperationAction(ISD::FPOW, MVT::f64, Expand);
219
setOperationAction(ISD::FREM, MVT::f64, Expand);
220
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
221
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
222
223
if (Subtarget.is64Bit())
224
setOperationAction(ISD::FRINT, MVT::f64, Legal);
225
}
226
227
// Set operations for 'LSX' feature.
228
229
if (Subtarget.hasExtLSX()) {
230
for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
231
// Expand all truncating stores and extending loads.
232
for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
233
setTruncStoreAction(VT, InnerVT, Expand);
234
setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
235
setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
236
setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
237
}
238
// By default everything must be expanded. Then we will selectively turn
239
// on ones that can be effectively codegen'd.
240
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
241
setOperationAction(Op, VT, Expand);
242
}
243
244
for (MVT VT : LSXVTs) {
245
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
246
setOperationAction(ISD::BITCAST, VT, Legal);
247
setOperationAction(ISD::UNDEF, VT, Legal);
248
249
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
250
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
251
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
252
253
setOperationAction(ISD::SETCC, VT, Legal);
254
setOperationAction(ISD::VSELECT, VT, Legal);
255
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
256
}
257
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) {
258
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
259
setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
260
Legal);
261
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
262
VT, Legal);
263
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
264
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
265
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
266
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
267
setCondCodeAction(
268
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
269
Expand);
270
}
271
for (MVT VT : {MVT::v4i32, MVT::v2i64}) {
272
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
273
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
274
}
275
for (MVT VT : {MVT::v4f32, MVT::v2f64}) {
276
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
277
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
278
setOperationAction(ISD::FMA, VT, Legal);
279
setOperationAction(ISD::FSQRT, VT, Legal);
280
setOperationAction(ISD::FNEG, VT, Legal);
281
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
282
ISD::SETUGE, ISD::SETUGT},
283
VT, Expand);
284
}
285
}
286
287
// Set operations for 'LASX' feature.
288
289
if (Subtarget.hasExtLASX()) {
290
for (MVT VT : LASXVTs) {
291
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Legal);
292
setOperationAction(ISD::BITCAST, VT, Legal);
293
setOperationAction(ISD::UNDEF, VT, Legal);
294
295
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
296
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
297
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
298
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
299
300
setOperationAction(ISD::SETCC, VT, Legal);
301
setOperationAction(ISD::VSELECT, VT, Legal);
302
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
303
}
304
for (MVT VT : {MVT::v4i64, MVT::v8i32, MVT::v16i16, MVT::v32i8}) {
305
setOperationAction({ISD::ADD, ISD::SUB}, VT, Legal);
306
setOperationAction({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN}, VT,
307
Legal);
308
setOperationAction({ISD::MUL, ISD::SDIV, ISD::SREM, ISD::UDIV, ISD::UREM},
309
VT, Legal);
310
setOperationAction({ISD::AND, ISD::OR, ISD::XOR}, VT, Legal);
311
setOperationAction({ISD::SHL, ISD::SRA, ISD::SRL}, VT, Legal);
312
setOperationAction({ISD::CTPOP, ISD::CTLZ}, VT, Legal);
313
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Legal);
314
setCondCodeAction(
315
{ISD::SETNE, ISD::SETGE, ISD::SETGT, ISD::SETUGE, ISD::SETUGT}, VT,
316
Expand);
317
}
318
for (MVT VT : {MVT::v8i32, MVT::v4i32, MVT::v4i64}) {
319
setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP}, VT, Legal);
320
setOperationAction({ISD::FP_TO_SINT, ISD::FP_TO_UINT}, VT, Legal);
321
}
322
for (MVT VT : {MVT::v8f32, MVT::v4f64}) {
323
setOperationAction({ISD::FADD, ISD::FSUB}, VT, Legal);
324
setOperationAction({ISD::FMUL, ISD::FDIV}, VT, Legal);
325
setOperationAction(ISD::FMA, VT, Legal);
326
setOperationAction(ISD::FSQRT, VT, Legal);
327
setOperationAction(ISD::FNEG, VT, Legal);
328
setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT,
329
ISD::SETUGE, ISD::SETUGT},
330
VT, Expand);
331
}
332
}
333
334
// Set DAG combine for LA32 and LA64.
335
336
setTargetDAGCombine(ISD::AND);
337
setTargetDAGCombine(ISD::OR);
338
setTargetDAGCombine(ISD::SRL);
339
setTargetDAGCombine(ISD::SETCC);
340
341
// Set DAG combine for 'LSX' feature.
342
343
if (Subtarget.hasExtLSX())
344
setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
345
346
// Compute derived properties from the register classes.
347
computeRegisterProperties(Subtarget.getRegisterInfo());
348
349
setStackPointerRegisterToSaveRestore(LoongArch::R3);
350
351
setBooleanContents(ZeroOrOneBooleanContent);
352
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
353
354
setMaxAtomicSizeInBitsSupported(Subtarget.getGRLen());
355
356
setMinCmpXchgSizeInBits(32);
357
358
// Function alignments.
359
setMinFunctionAlignment(Align(4));
360
// Set preferred alignments.
361
setPrefFunctionAlignment(Subtarget.getPrefFunctionAlignment());
362
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
363
setMaxBytesForAlignment(Subtarget.getMaxBytesForAlignment());
364
}
365
366
bool LoongArchTargetLowering::isOffsetFoldingLegal(
367
const GlobalAddressSDNode *GA) const {
368
// In order to maximise the opportunity for common subexpression elimination,
369
// keep a separate ADD node for the global address offset instead of folding
370
// it in the global address node. Later peephole optimisations may choose to
371
// fold it back in when profitable.
372
return false;
373
}
374
375
SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
376
SelectionDAG &DAG) const {
377
switch (Op.getOpcode()) {
378
case ISD::ATOMIC_FENCE:
379
return lowerATOMIC_FENCE(Op, DAG);
380
case ISD::EH_DWARF_CFA:
381
return lowerEH_DWARF_CFA(Op, DAG);
382
case ISD::GlobalAddress:
383
return lowerGlobalAddress(Op, DAG);
384
case ISD::GlobalTLSAddress:
385
return lowerGlobalTLSAddress(Op, DAG);
386
case ISD::INTRINSIC_WO_CHAIN:
387
return lowerINTRINSIC_WO_CHAIN(Op, DAG);
388
case ISD::INTRINSIC_W_CHAIN:
389
return lowerINTRINSIC_W_CHAIN(Op, DAG);
390
case ISD::INTRINSIC_VOID:
391
return lowerINTRINSIC_VOID(Op, DAG);
392
case ISD::BlockAddress:
393
return lowerBlockAddress(Op, DAG);
394
case ISD::JumpTable:
395
return lowerJumpTable(Op, DAG);
396
case ISD::SHL_PARTS:
397
return lowerShiftLeftParts(Op, DAG);
398
case ISD::SRA_PARTS:
399
return lowerShiftRightParts(Op, DAG, true);
400
case ISD::SRL_PARTS:
401
return lowerShiftRightParts(Op, DAG, false);
402
case ISD::ConstantPool:
403
return lowerConstantPool(Op, DAG);
404
case ISD::FP_TO_SINT:
405
return lowerFP_TO_SINT(Op, DAG);
406
case ISD::BITCAST:
407
return lowerBITCAST(Op, DAG);
408
case ISD::UINT_TO_FP:
409
return lowerUINT_TO_FP(Op, DAG);
410
case ISD::SINT_TO_FP:
411
return lowerSINT_TO_FP(Op, DAG);
412
case ISD::VASTART:
413
return lowerVASTART(Op, DAG);
414
case ISD::FRAMEADDR:
415
return lowerFRAMEADDR(Op, DAG);
416
case ISD::RETURNADDR:
417
return lowerRETURNADDR(Op, DAG);
418
case ISD::WRITE_REGISTER:
419
return lowerWRITE_REGISTER(Op, DAG);
420
case ISD::INSERT_VECTOR_ELT:
421
return lowerINSERT_VECTOR_ELT(Op, DAG);
422
case ISD::EXTRACT_VECTOR_ELT:
423
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
424
case ISD::BUILD_VECTOR:
425
return lowerBUILD_VECTOR(Op, DAG);
426
case ISD::VECTOR_SHUFFLE:
427
return lowerVECTOR_SHUFFLE(Op, DAG);
428
}
429
return SDValue();
430
}
431
432
/// Determine whether a range fits a regular pattern of values.
433
/// This function accounts for the possibility of jumping over the End iterator.
434
template <typename ValType>
435
static bool
436
fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin,
437
unsigned CheckStride,
438
typename SmallVectorImpl<ValType>::const_iterator End,
439
ValType ExpectedIndex, unsigned ExpectedIndexStride) {
440
auto &I = Begin;
441
442
while (I != End) {
443
if (*I != -1 && *I != ExpectedIndex)
444
return false;
445
ExpectedIndex += ExpectedIndexStride;
446
447
// Incrementing past End is undefined behaviour so we must increment one
448
// step at a time and check for End at each step.
449
for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I)
450
; // Empty loop body.
451
}
452
return true;
453
}
454
455
/// Lower VECTOR_SHUFFLE into VREPLVEI (if possible).
456
///
457
/// VREPLVEI performs vector broadcast based on an element specified by an
458
/// integer immediate, with its mask being similar to:
459
/// <x, x, x, ...>
460
/// where x is any valid index.
461
///
462
/// When undef's appear in the mask they are treated as if they were whatever
463
/// value is necessary in order to fit the above form.
464
static SDValue lowerVECTOR_SHUFFLE_VREPLVEI(const SDLoc &DL, ArrayRef<int> Mask,
465
MVT VT, SDValue V1, SDValue V2,
466
SelectionDAG &DAG) {
467
int SplatIndex = -1;
468
for (const auto &M : Mask) {
469
if (M != -1) {
470
SplatIndex = M;
471
break;
472
}
473
}
474
475
if (SplatIndex == -1)
476
return DAG.getUNDEF(VT);
477
478
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
479
if (fitsRegularPattern<int>(Mask.begin(), 1, Mask.end(), SplatIndex, 0)) {
480
APInt Imm(64, SplatIndex);
481
return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
482
DAG.getConstant(Imm, DL, MVT::i64));
483
}
484
485
return SDValue();
486
}
487
488
/// Lower VECTOR_SHUFFLE into VSHUF4I (if possible).
489
///
490
/// VSHUF4I splits the vector into blocks of four elements, then shuffles these
491
/// elements according to a <4 x i2> constant (encoded as an integer immediate).
492
///
493
/// It is therefore possible to lower into VSHUF4I when the mask takes the form:
494
/// <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...>
495
/// When undef's appear they are treated as if they were whatever value is
496
/// necessary in order to fit the above forms.
497
///
498
/// For example:
499
/// %2 = shufflevector <8 x i16> %0, <8 x i16> undef,
500
/// <8 x i32> <i32 3, i32 2, i32 1, i32 0,
501
/// i32 7, i32 6, i32 5, i32 4>
502
/// is lowered to:
503
/// (VSHUF4I_H $v0, $v1, 27)
504
/// where the 27 comes from:
505
/// 3 + (2 << 2) + (1 << 4) + (0 << 6)
506
static SDValue lowerVECTOR_SHUFFLE_VSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
507
MVT VT, SDValue V1, SDValue V2,
508
SelectionDAG &DAG) {
509
510
// When the size is less than 4, lower cost instructions may be used.
511
if (Mask.size() < 4)
512
return SDValue();
513
514
int SubMask[4] = {-1, -1, -1, -1};
515
for (unsigned i = 0; i < 4; ++i) {
516
for (unsigned j = i; j < Mask.size(); j += 4) {
517
int Idx = Mask[j];
518
519
// Convert from vector index to 4-element subvector index
520
// If an index refers to an element outside of the subvector then give up
521
if (Idx != -1) {
522
Idx -= 4 * (j / 4);
523
if (Idx < 0 || Idx >= 4)
524
return SDValue();
525
}
526
527
// If the mask has an undef, replace it with the current index.
528
// Note that it might still be undef if the current index is also undef
529
if (SubMask[i] == -1)
530
SubMask[i] = Idx;
531
// Check that non-undef values are the same as in the mask. If they
532
// aren't then give up
533
else if (Idx != -1 && Idx != SubMask[i])
534
return SDValue();
535
}
536
}
537
538
// Calculate the immediate. Replace any remaining undefs with zero
539
APInt Imm(64, 0);
540
for (int i = 3; i >= 0; --i) {
541
int Idx = SubMask[i];
542
543
if (Idx == -1)
544
Idx = 0;
545
546
Imm <<= 2;
547
Imm |= Idx & 0x3;
548
}
549
550
return DAG.getNode(LoongArchISD::VSHUF4I, DL, VT, V1,
551
DAG.getConstant(Imm, DL, MVT::i64));
552
}
553
554
/// Lower VECTOR_SHUFFLE into VPACKEV (if possible).
555
///
556
/// VPACKEV interleaves the even elements from each vector.
557
///
558
/// It is possible to lower into VPACKEV when the mask consists of two of the
559
/// following forms interleaved:
560
/// <0, 2, 4, ...>
561
/// <n, n+2, n+4, ...>
562
/// where n is the number of elements in the vector.
563
/// For example:
564
/// <0, 0, 2, 2, 4, 4, ...>
565
/// <0, n, 2, n+2, 4, n+4, ...>
566
///
567
/// When undef's appear in the mask they are treated as if they were whatever
568
/// value is necessary in order to fit the above forms.
569
static SDValue lowerVECTOR_SHUFFLE_VPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
570
MVT VT, SDValue V1, SDValue V2,
571
SelectionDAG &DAG) {
572
573
const auto &Begin = Mask.begin();
574
const auto &End = Mask.end();
575
SDValue OriV1 = V1, OriV2 = V2;
576
577
if (fitsRegularPattern<int>(Begin, 2, End, 0, 2))
578
V1 = OriV1;
579
else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 2))
580
V1 = OriV2;
581
else
582
return SDValue();
583
584
if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2))
585
V2 = OriV1;
586
else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 2))
587
V2 = OriV2;
588
else
589
return SDValue();
590
591
return DAG.getNode(LoongArchISD::VPACKEV, DL, VT, V2, V1);
592
}
593
594
/// Lower VECTOR_SHUFFLE into VPACKOD (if possible).
595
///
596
/// VPACKOD interleaves the odd elements from each vector.
597
///
598
/// It is possible to lower into VPACKOD when the mask consists of two of the
599
/// following forms interleaved:
600
/// <1, 3, 5, ...>
601
/// <n+1, n+3, n+5, ...>
602
/// where n is the number of elements in the vector.
603
/// For example:
604
/// <1, 1, 3, 3, 5, 5, ...>
605
/// <1, n+1, 3, n+3, 5, n+5, ...>
606
///
607
/// When undef's appear in the mask they are treated as if they were whatever
608
/// value is necessary in order to fit the above forms.
609
static SDValue lowerVECTOR_SHUFFLE_VPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
610
MVT VT, SDValue V1, SDValue V2,
611
SelectionDAG &DAG) {
612
613
const auto &Begin = Mask.begin();
614
const auto &End = Mask.end();
615
SDValue OriV1 = V1, OriV2 = V2;
616
617
if (fitsRegularPattern<int>(Begin, 2, End, 1, 2))
618
V1 = OriV1;
619
else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + 1, 2))
620
V1 = OriV2;
621
else
622
return SDValue();
623
624
if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2))
625
V2 = OriV1;
626
else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + 1, 2))
627
V2 = OriV2;
628
else
629
return SDValue();
630
631
return DAG.getNode(LoongArchISD::VPACKOD, DL, VT, V2, V1);
632
}
633
634
/// Lower VECTOR_SHUFFLE into VILVH (if possible).
635
///
636
/// VILVH interleaves consecutive elements from the left (highest-indexed) half
637
/// of each vector.
638
///
639
/// It is possible to lower into VILVH when the mask consists of two of the
640
/// following forms interleaved:
641
/// <x, x+1, x+2, ...>
642
/// <n+x, n+x+1, n+x+2, ...>
643
/// where n is the number of elements in the vector and x is half n.
644
/// For example:
645
/// <x, x, x+1, x+1, x+2, x+2, ...>
646
/// <x, n+x, x+1, n+x+1, x+2, n+x+2, ...>
647
///
648
/// When undef's appear in the mask they are treated as if they were whatever
649
/// value is necessary in order to fit the above forms.
650
static SDValue lowerVECTOR_SHUFFLE_VILVH(const SDLoc &DL, ArrayRef<int> Mask,
651
MVT VT, SDValue V1, SDValue V2,
652
SelectionDAG &DAG) {
653
654
const auto &Begin = Mask.begin();
655
const auto &End = Mask.end();
656
unsigned HalfSize = Mask.size() / 2;
657
SDValue OriV1 = V1, OriV2 = V2;
658
659
if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1))
660
V1 = OriV1;
661
else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size() + HalfSize, 1))
662
V1 = OriV2;
663
else
664
return SDValue();
665
666
if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1))
667
V2 = OriV1;
668
else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size() + HalfSize,
669
1))
670
V2 = OriV2;
671
else
672
return SDValue();
673
674
return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
675
}
676
677
/// Lower VECTOR_SHUFFLE into VILVL (if possible).
678
///
679
/// VILVL interleaves consecutive elements from the right (lowest-indexed) half
680
/// of each vector.
681
///
682
/// It is possible to lower into VILVL when the mask consists of two of the
683
/// following forms interleaved:
684
/// <0, 1, 2, ...>
685
/// <n, n+1, n+2, ...>
686
/// where n is the number of elements in the vector.
687
/// For example:
688
/// <0, 0, 1, 1, 2, 2, ...>
689
/// <0, n, 1, n+1, 2, n+2, ...>
690
///
691
/// When undef's appear in the mask they are treated as if they were whatever
692
/// value is necessary in order to fit the above forms.
693
static SDValue lowerVECTOR_SHUFFLE_VILVL(const SDLoc &DL, ArrayRef<int> Mask,
694
MVT VT, SDValue V1, SDValue V2,
695
SelectionDAG &DAG) {
696
697
const auto &Begin = Mask.begin();
698
const auto &End = Mask.end();
699
SDValue OriV1 = V1, OriV2 = V2;
700
701
if (fitsRegularPattern<int>(Begin, 2, End, 0, 1))
702
V1 = OriV1;
703
else if (fitsRegularPattern<int>(Begin, 2, End, Mask.size(), 1))
704
V1 = OriV2;
705
else
706
return SDValue();
707
708
if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1))
709
V2 = OriV1;
710
else if (fitsRegularPattern<int>(Begin + 1, 2, End, Mask.size(), 1))
711
V2 = OriV2;
712
else
713
return SDValue();
714
715
return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
716
}
717
718
/// Lower VECTOR_SHUFFLE into VPICKEV (if possible).
719
///
720
/// VPICKEV copies the even elements of each vector into the result vector.
721
///
722
/// It is possible to lower into VPICKEV when the mask consists of two of the
723
/// following forms concatenated:
724
/// <0, 2, 4, ...>
725
/// <n, n+2, n+4, ...>
726
/// where n is the number of elements in the vector.
727
/// For example:
728
/// <0, 2, 4, ..., 0, 2, 4, ...>
729
/// <0, 2, 4, ..., n, n+2, n+4, ...>
730
///
731
/// When undef's appear in the mask they are treated as if they were whatever
732
/// value is necessary in order to fit the above forms.
733
static SDValue lowerVECTOR_SHUFFLE_VPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
734
MVT VT, SDValue V1, SDValue V2,
735
SelectionDAG &DAG) {
736
737
const auto &Begin = Mask.begin();
738
const auto &Mid = Mask.begin() + Mask.size() / 2;
739
const auto &End = Mask.end();
740
SDValue OriV1 = V1, OriV2 = V2;
741
742
if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2))
743
V1 = OriV1;
744
else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size(), 2))
745
V1 = OriV2;
746
else
747
return SDValue();
748
749
if (fitsRegularPattern<int>(Mid, 1, End, 0, 2))
750
V2 = OriV1;
751
else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size(), 2))
752
V2 = OriV2;
753
754
else
755
return SDValue();
756
757
return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
758
}
759
760
/// Lower VECTOR_SHUFFLE into VPICKOD (if possible).
761
///
762
/// VPICKOD copies the odd elements of each vector into the result vector.
763
///
764
/// It is possible to lower into VPICKOD when the mask consists of two of the
765
/// following forms concatenated:
766
/// <1, 3, 5, ...>
767
/// <n+1, n+3, n+5, ...>
768
/// where n is the number of elements in the vector.
769
/// For example:
770
/// <1, 3, 5, ..., 1, 3, 5, ...>
771
/// <1, 3, 5, ..., n+1, n+3, n+5, ...>
772
///
773
/// When undef's appear in the mask they are treated as if they were whatever
774
/// value is necessary in order to fit the above forms.
775
static SDValue lowerVECTOR_SHUFFLE_VPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
776
MVT VT, SDValue V1, SDValue V2,
777
SelectionDAG &DAG) {
778
779
const auto &Begin = Mask.begin();
780
const auto &Mid = Mask.begin() + Mask.size() / 2;
781
const auto &End = Mask.end();
782
SDValue OriV1 = V1, OriV2 = V2;
783
784
if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2))
785
V1 = OriV1;
786
else if (fitsRegularPattern<int>(Begin, 1, Mid, Mask.size() + 1, 2))
787
V1 = OriV2;
788
else
789
return SDValue();
790
791
if (fitsRegularPattern<int>(Mid, 1, End, 1, 2))
792
V2 = OriV1;
793
else if (fitsRegularPattern<int>(Mid, 1, End, Mask.size() + 1, 2))
794
V2 = OriV2;
795
else
796
return SDValue();
797
798
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
799
}
800
801
/// Lower VECTOR_SHUFFLE into VSHUF.
802
///
803
/// This mostly consists of converting the shuffle mask into a BUILD_VECTOR and
804
/// adding it as an operand to the resulting VSHUF.
805
static SDValue lowerVECTOR_SHUFFLE_VSHUF(const SDLoc &DL, ArrayRef<int> Mask,
806
MVT VT, SDValue V1, SDValue V2,
807
SelectionDAG &DAG) {
808
809
SmallVector<SDValue, 16> Ops;
810
for (auto M : Mask)
811
Ops.push_back(DAG.getConstant(M, DL, MVT::i64));
812
813
EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
814
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops);
815
816
// VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion.
817
// <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11>
818
// VSHF concatenates the vectors in a bitwise fashion:
819
// <0b00, 0b01> + <0b10, 0b11> ->
820
// 0b0100 + 0b1110 -> 0b01001110
821
// <0b10, 0b11, 0b00, 0b01>
822
// We must therefore swap the operands to get the correct result.
823
return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
824
}
825
826
/// Dispatching routine to lower various 128-bit LoongArch vector shuffles.
827
///
828
/// This routine breaks down the specific type of 128-bit shuffle and
829
/// dispatches to the lowering routines accordingly.
830
static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
831
SDValue V1, SDValue V2, SelectionDAG &DAG) {
832
assert((VT.SimpleTy == MVT::v16i8 || VT.SimpleTy == MVT::v8i16 ||
833
VT.SimpleTy == MVT::v4i32 || VT.SimpleTy == MVT::v2i64 ||
834
VT.SimpleTy == MVT::v4f32 || VT.SimpleTy == MVT::v2f64) &&
835
"Vector type is unsupported for lsx!");
836
assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
837
"Two operands have different types!");
838
assert(VT.getVectorNumElements() == Mask.size() &&
839
"Unexpected mask size for shuffle!");
840
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
841
842
SDValue Result;
843
// TODO: Add more comparison patterns.
844
if (V2.isUndef()) {
845
if ((Result = lowerVECTOR_SHUFFLE_VREPLVEI(DL, Mask, VT, V1, V2, DAG)))
846
return Result;
847
if ((Result = lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG)))
848
return Result;
849
850
// TODO: This comment may be enabled in the future to better match the
851
// pattern for instruction selection.
852
/* V2 = V1; */
853
}
854
855
// It is recommended not to change the pattern comparison order for better
856
// performance.
857
if ((Result = lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG)))
858
return Result;
859
if ((Result = lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG)))
860
return Result;
861
if ((Result = lowerVECTOR_SHUFFLE_VILVH(DL, Mask, VT, V1, V2, DAG)))
862
return Result;
863
if ((Result = lowerVECTOR_SHUFFLE_VILVL(DL, Mask, VT, V1, V2, DAG)))
864
return Result;
865
if ((Result = lowerVECTOR_SHUFFLE_VPICKEV(DL, Mask, VT, V1, V2, DAG)))
866
return Result;
867
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD(DL, Mask, VT, V1, V2, DAG)))
868
return Result;
869
if ((Result = lowerVECTOR_SHUFFLE_VSHUF(DL, Mask, VT, V1, V2, DAG)))
870
return Result;
871
872
return SDValue();
873
}
874
875
/// Lower VECTOR_SHUFFLE into XVREPLVEI (if possible).
876
///
877
/// It is a XVREPLVEI when the mask is:
878
/// <x, x, x, ..., x+n, x+n, x+n, ...>
879
/// where the number of x is equal to n and n is half the length of vector.
880
///
881
/// When undef's appear in the mask they are treated as if they were whatever
882
/// value is necessary in order to fit the above form.
883
static SDValue lowerVECTOR_SHUFFLE_XVREPLVEI(const SDLoc &DL,
884
ArrayRef<int> Mask, MVT VT,
885
SDValue V1, SDValue V2,
886
SelectionDAG &DAG) {
887
int SplatIndex = -1;
888
for (const auto &M : Mask) {
889
if (M != -1) {
890
SplatIndex = M;
891
break;
892
}
893
}
894
895
if (SplatIndex == -1)
896
return DAG.getUNDEF(VT);
897
898
const auto &Begin = Mask.begin();
899
const auto &End = Mask.end();
900
unsigned HalfSize = Mask.size() / 2;
901
902
assert(SplatIndex < (int)Mask.size() && "Out of bounds mask index");
903
if (fitsRegularPattern<int>(Begin, 1, End - HalfSize, SplatIndex, 0) &&
904
fitsRegularPattern<int>(Begin + HalfSize, 1, End, SplatIndex + HalfSize,
905
0)) {
906
APInt Imm(64, SplatIndex);
907
return DAG.getNode(LoongArchISD::VREPLVEI, DL, VT, V1,
908
DAG.getConstant(Imm, DL, MVT::i64));
909
}
910
911
return SDValue();
912
}
913
914
/// Lower VECTOR_SHUFFLE into XVSHUF4I (if possible).
915
static SDValue lowerVECTOR_SHUFFLE_XVSHUF4I(const SDLoc &DL, ArrayRef<int> Mask,
916
MVT VT, SDValue V1, SDValue V2,
917
SelectionDAG &DAG) {
918
// When the size is less than or equal to 4, lower cost instructions may be
919
// used.
920
if (Mask.size() <= 4)
921
return SDValue();
922
return lowerVECTOR_SHUFFLE_VSHUF4I(DL, Mask, VT, V1, V2, DAG);
923
}
924
925
/// Lower VECTOR_SHUFFLE into XVPACKEV (if possible).
926
static SDValue lowerVECTOR_SHUFFLE_XVPACKEV(const SDLoc &DL, ArrayRef<int> Mask,
927
MVT VT, SDValue V1, SDValue V2,
928
SelectionDAG &DAG) {
929
return lowerVECTOR_SHUFFLE_VPACKEV(DL, Mask, VT, V1, V2, DAG);
930
}
931
932
/// Lower VECTOR_SHUFFLE into XVPACKOD (if possible).
933
static SDValue lowerVECTOR_SHUFFLE_XVPACKOD(const SDLoc &DL, ArrayRef<int> Mask,
934
MVT VT, SDValue V1, SDValue V2,
935
SelectionDAG &DAG) {
936
return lowerVECTOR_SHUFFLE_VPACKOD(DL, Mask, VT, V1, V2, DAG);
937
}
938
939
/// Lower VECTOR_SHUFFLE into XVILVH (if possible).
940
static SDValue lowerVECTOR_SHUFFLE_XVILVH(const SDLoc &DL, ArrayRef<int> Mask,
941
MVT VT, SDValue V1, SDValue V2,
942
SelectionDAG &DAG) {
943
944
const auto &Begin = Mask.begin();
945
const auto &End = Mask.end();
946
unsigned HalfSize = Mask.size() / 2;
947
unsigned LeftSize = HalfSize / 2;
948
SDValue OriV1 = V1, OriV2 = V2;
949
950
if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, HalfSize - LeftSize,
951
1) &&
952
fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize + LeftSize, 1))
953
V1 = OriV1;
954
else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize,
955
Mask.size() + HalfSize - LeftSize, 1) &&
956
fitsRegularPattern<int>(Begin + HalfSize, 2, End,
957
Mask.size() + HalfSize + LeftSize, 1))
958
V1 = OriV2;
959
else
960
return SDValue();
961
962
if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, HalfSize - LeftSize,
963
1) &&
964
fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize + LeftSize,
965
1))
966
V2 = OriV1;
967
else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize,
968
Mask.size() + HalfSize - LeftSize, 1) &&
969
fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
970
Mask.size() + HalfSize + LeftSize, 1))
971
V2 = OriV2;
972
else
973
return SDValue();
974
975
return DAG.getNode(LoongArchISD::VILVH, DL, VT, V2, V1);
976
}
977
978
/// Lower VECTOR_SHUFFLE into XVILVL (if possible).
979
static SDValue lowerVECTOR_SHUFFLE_XVILVL(const SDLoc &DL, ArrayRef<int> Mask,
980
MVT VT, SDValue V1, SDValue V2,
981
SelectionDAG &DAG) {
982
983
const auto &Begin = Mask.begin();
984
const auto &End = Mask.end();
985
unsigned HalfSize = Mask.size() / 2;
986
SDValue OriV1 = V1, OriV2 = V2;
987
988
if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, 0, 1) &&
989
fitsRegularPattern<int>(Begin + HalfSize, 2, End, HalfSize, 1))
990
V1 = OriV1;
991
else if (fitsRegularPattern<int>(Begin, 2, End - HalfSize, Mask.size(), 1) &&
992
fitsRegularPattern<int>(Begin + HalfSize, 2, End,
993
Mask.size() + HalfSize, 1))
994
V1 = OriV2;
995
else
996
return SDValue();
997
998
if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, 0, 1) &&
999
fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End, HalfSize, 1))
1000
V2 = OriV1;
1001
else if (fitsRegularPattern<int>(Begin + 1, 2, End - HalfSize, Mask.size(),
1002
1) &&
1003
fitsRegularPattern<int>(Begin + 1 + HalfSize, 2, End,
1004
Mask.size() + HalfSize, 1))
1005
V2 = OriV2;
1006
else
1007
return SDValue();
1008
1009
return DAG.getNode(LoongArchISD::VILVL, DL, VT, V2, V1);
1010
}
1011
1012
/// Lower VECTOR_SHUFFLE into XVPICKEV (if possible).
1013
static SDValue lowerVECTOR_SHUFFLE_XVPICKEV(const SDLoc &DL, ArrayRef<int> Mask,
1014
MVT VT, SDValue V1, SDValue V2,
1015
SelectionDAG &DAG) {
1016
1017
const auto &Begin = Mask.begin();
1018
const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1019
const auto &Mid = Mask.begin() + Mask.size() / 2;
1020
const auto &RightMid = Mask.end() - Mask.size() / 4;
1021
const auto &End = Mask.end();
1022
unsigned HalfSize = Mask.size() / 2;
1023
SDValue OriV1 = V1, OriV2 = V2;
1024
1025
if (fitsRegularPattern<int>(Begin, 1, LeftMid, 0, 2) &&
1026
fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize, 2))
1027
V1 = OriV1;
1028
else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size(), 2) &&
1029
fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize, 2))
1030
V1 = OriV2;
1031
else
1032
return SDValue();
1033
1034
if (fitsRegularPattern<int>(LeftMid, 1, Mid, 0, 2) &&
1035
fitsRegularPattern<int>(RightMid, 1, End, HalfSize, 2))
1036
V2 = OriV1;
1037
else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size(), 2) &&
1038
fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize, 2))
1039
V2 = OriV2;
1040
1041
else
1042
return SDValue();
1043
1044
return DAG.getNode(LoongArchISD::VPICKEV, DL, VT, V2, V1);
1045
}
1046
1047
/// Lower VECTOR_SHUFFLE into XVPICKOD (if possible).
1048
static SDValue lowerVECTOR_SHUFFLE_XVPICKOD(const SDLoc &DL, ArrayRef<int> Mask,
1049
MVT VT, SDValue V1, SDValue V2,
1050
SelectionDAG &DAG) {
1051
1052
const auto &Begin = Mask.begin();
1053
const auto &LeftMid = Mask.begin() + Mask.size() / 4;
1054
const auto &Mid = Mask.begin() + Mask.size() / 2;
1055
const auto &RightMid = Mask.end() - Mask.size() / 4;
1056
const auto &End = Mask.end();
1057
unsigned HalfSize = Mask.size() / 2;
1058
SDValue OriV1 = V1, OriV2 = V2;
1059
1060
if (fitsRegularPattern<int>(Begin, 1, LeftMid, 1, 2) &&
1061
fitsRegularPattern<int>(Mid, 1, RightMid, HalfSize + 1, 2))
1062
V1 = OriV1;
1063
else if (fitsRegularPattern<int>(Begin, 1, LeftMid, Mask.size() + 1, 2) &&
1064
fitsRegularPattern<int>(Mid, 1, RightMid, Mask.size() + HalfSize + 1,
1065
2))
1066
V1 = OriV2;
1067
else
1068
return SDValue();
1069
1070
if (fitsRegularPattern<int>(LeftMid, 1, Mid, 1, 2) &&
1071
fitsRegularPattern<int>(RightMid, 1, End, HalfSize + 1, 2))
1072
V2 = OriV1;
1073
else if (fitsRegularPattern<int>(LeftMid, 1, Mid, Mask.size() + 1, 2) &&
1074
fitsRegularPattern<int>(RightMid, 1, End, Mask.size() + HalfSize + 1,
1075
2))
1076
V2 = OriV2;
1077
else
1078
return SDValue();
1079
1080
return DAG.getNode(LoongArchISD::VPICKOD, DL, VT, V2, V1);
1081
}
1082
1083
/// Lower VECTOR_SHUFFLE into XVSHUF (if possible).
1084
static SDValue lowerVECTOR_SHUFFLE_XVSHUF(const SDLoc &DL, ArrayRef<int> Mask,
1085
MVT VT, SDValue V1, SDValue V2,
1086
SelectionDAG &DAG) {
1087
1088
int MaskSize = Mask.size();
1089
int HalfSize = Mask.size() / 2;
1090
const auto &Begin = Mask.begin();
1091
const auto &Mid = Mask.begin() + HalfSize;
1092
const auto &End = Mask.end();
1093
1094
// VECTOR_SHUFFLE concatenates the vectors:
1095
// <0, 1, 2, 3, 4, 5, 6, 7> + <8, 9, 10, 11, 12, 13, 14, 15>
1096
// shuffling ->
1097
// <0, 1, 2, 3, 8, 9, 10, 11> <4, 5, 6, 7, 12, 13, 14, 15>
1098
//
1099
// XVSHUF concatenates the vectors:
1100
// <a0, a1, a2, a3, b0, b1, b2, b3> + <a4, a5, a6, a7, b4, b5, b6, b7>
1101
// shuffling ->
1102
// <a0, a1, a2, a3, a4, a5, a6, a7> + <b0, b1, b2, b3, b4, b5, b6, b7>
1103
SmallVector<SDValue, 8> MaskAlloc;
1104
for (auto it = Begin; it < Mid; it++) {
1105
if (*it < 0) // UNDEF
1106
MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1107
else if ((*it >= 0 && *it < HalfSize) ||
1108
(*it >= MaskSize && *it <= MaskSize + HalfSize)) {
1109
int M = *it < HalfSize ? *it : *it - HalfSize;
1110
MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1111
} else
1112
return SDValue();
1113
}
1114
assert((int)MaskAlloc.size() == HalfSize && "xvshuf convert failed!");
1115
1116
for (auto it = Mid; it < End; it++) {
1117
if (*it < 0) // UNDEF
1118
MaskAlloc.push_back(DAG.getTargetConstant(0, DL, MVT::i64));
1119
else if ((*it >= HalfSize && *it < MaskSize) ||
1120
(*it >= MaskSize + HalfSize && *it < MaskSize * 2)) {
1121
int M = *it < MaskSize ? *it - HalfSize : *it - MaskSize;
1122
MaskAlloc.push_back(DAG.getTargetConstant(M, DL, MVT::i64));
1123
} else
1124
return SDValue();
1125
}
1126
assert((int)MaskAlloc.size() == MaskSize && "xvshuf convert failed!");
1127
1128
EVT MaskVecTy = VT.changeVectorElementTypeToInteger();
1129
SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, MaskAlloc);
1130
return DAG.getNode(LoongArchISD::VSHUF, DL, VT, MaskVec, V2, V1);
1131
}
1132
1133
/// Shuffle vectors by lane to generate more optimized instructions.
1134
/// 256-bit shuffles are always considered as 2-lane 128-bit shuffles.
1135
///
1136
/// Therefore, except for the following four cases, other cases are regarded
1137
/// as cross-lane shuffles, where optimization is relatively limited.
1138
///
1139
/// - Shuffle high, low lanes of two inputs vector
1140
/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 3, 6>
1141
/// - Shuffle low, high lanes of two inputs vector
1142
/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 0, 5>
1143
/// - Shuffle low, low lanes of two inputs vector
1144
/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <3, 6, 3, 6>
1145
/// - Shuffle high, high lanes of two inputs vector
1146
/// <0, 1, 2, 3> + <4, 5, 6, 7> --- <0, 5, 0, 5>
1147
///
1148
/// The first case is the closest to LoongArch instructions and the other
1149
/// cases need to be converted to it for processing.
1150
///
1151
/// This function may modify V1, V2 and Mask
1152
static void canonicalizeShuffleVectorByLane(const SDLoc &DL,
1153
MutableArrayRef<int> Mask, MVT VT,
1154
SDValue &V1, SDValue &V2,
1155
SelectionDAG &DAG) {
1156
1157
enum HalfMaskType { HighLaneTy, LowLaneTy, None };
1158
1159
int MaskSize = Mask.size();
1160
int HalfSize = Mask.size() / 2;
1161
1162
HalfMaskType preMask = None, postMask = None;
1163
1164
if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1165
return M < 0 || (M >= 0 && M < HalfSize) ||
1166
(M >= MaskSize && M < MaskSize + HalfSize);
1167
}))
1168
preMask = HighLaneTy;
1169
else if (std::all_of(Mask.begin(), Mask.begin() + HalfSize, [&](int M) {
1170
return M < 0 || (M >= HalfSize && M < MaskSize) ||
1171
(M >= MaskSize + HalfSize && M < MaskSize * 2);
1172
}))
1173
preMask = LowLaneTy;
1174
1175
if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1176
return M < 0 || (M >= 0 && M < HalfSize) ||
1177
(M >= MaskSize && M < MaskSize + HalfSize);
1178
}))
1179
postMask = HighLaneTy;
1180
else if (std::all_of(Mask.begin() + HalfSize, Mask.end(), [&](int M) {
1181
return M < 0 || (M >= HalfSize && M < MaskSize) ||
1182
(M >= MaskSize + HalfSize && M < MaskSize * 2);
1183
}))
1184
postMask = LowLaneTy;
1185
1186
// The pre-half of mask is high lane type, and the post-half of mask
1187
// is low lane type, which is closest to the LoongArch instructions.
1188
//
1189
// Note: In the LoongArch architecture, the high lane of mask corresponds
1190
// to the lower 128-bit of vector register, and the low lane of mask
1191
// corresponds the higher 128-bit of vector register.
1192
if (preMask == HighLaneTy && postMask == LowLaneTy) {
1193
return;
1194
}
1195
if (preMask == LowLaneTy && postMask == HighLaneTy) {
1196
V1 = DAG.getBitcast(MVT::v4i64, V1);
1197
V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1198
DAG.getConstant(0b01001110, DL, MVT::i64));
1199
V1 = DAG.getBitcast(VT, V1);
1200
1201
if (!V2.isUndef()) {
1202
V2 = DAG.getBitcast(MVT::v4i64, V2);
1203
V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1204
DAG.getConstant(0b01001110, DL, MVT::i64));
1205
V2 = DAG.getBitcast(VT, V2);
1206
}
1207
1208
for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1209
*it = *it < 0 ? *it : *it - HalfSize;
1210
}
1211
for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1212
*it = *it < 0 ? *it : *it + HalfSize;
1213
}
1214
} else if (preMask == LowLaneTy && postMask == LowLaneTy) {
1215
V1 = DAG.getBitcast(MVT::v4i64, V1);
1216
V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1217
DAG.getConstant(0b11101110, DL, MVT::i64));
1218
V1 = DAG.getBitcast(VT, V1);
1219
1220
if (!V2.isUndef()) {
1221
V2 = DAG.getBitcast(MVT::v4i64, V2);
1222
V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1223
DAG.getConstant(0b11101110, DL, MVT::i64));
1224
V2 = DAG.getBitcast(VT, V2);
1225
}
1226
1227
for (auto it = Mask.begin(); it < Mask.begin() + HalfSize; it++) {
1228
*it = *it < 0 ? *it : *it - HalfSize;
1229
}
1230
} else if (preMask == HighLaneTy && postMask == HighLaneTy) {
1231
V1 = DAG.getBitcast(MVT::v4i64, V1);
1232
V1 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V1,
1233
DAG.getConstant(0b01000100, DL, MVT::i64));
1234
V1 = DAG.getBitcast(VT, V1);
1235
1236
if (!V2.isUndef()) {
1237
V2 = DAG.getBitcast(MVT::v4i64, V2);
1238
V2 = DAG.getNode(LoongArchISD::XVPERMI, DL, MVT::v4i64, V2,
1239
DAG.getConstant(0b01000100, DL, MVT::i64));
1240
V2 = DAG.getBitcast(VT, V2);
1241
}
1242
1243
for (auto it = Mask.begin() + HalfSize; it < Mask.end(); it++) {
1244
*it = *it < 0 ? *it : *it + HalfSize;
1245
}
1246
} else { // cross-lane
1247
return;
1248
}
1249
}
1250
1251
/// Dispatching routine to lower various 256-bit LoongArch vector shuffles.
1252
///
1253
/// This routine breaks down the specific type of 256-bit shuffle and
1254
/// dispatches to the lowering routines accordingly.
1255
static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1256
SDValue V1, SDValue V2, SelectionDAG &DAG) {
1257
assert((VT.SimpleTy == MVT::v32i8 || VT.SimpleTy == MVT::v16i16 ||
1258
VT.SimpleTy == MVT::v8i32 || VT.SimpleTy == MVT::v4i64 ||
1259
VT.SimpleTy == MVT::v8f32 || VT.SimpleTy == MVT::v4f64) &&
1260
"Vector type is unsupported for lasx!");
1261
assert(V1.getSimpleValueType() == V2.getSimpleValueType() &&
1262
"Two operands have different types!");
1263
assert(VT.getVectorNumElements() == Mask.size() &&
1264
"Unexpected mask size for shuffle!");
1265
assert(Mask.size() % 2 == 0 && "Expected even mask size.");
1266
assert(Mask.size() >= 4 && "Mask size is less than 4.");
1267
1268
// canonicalize non cross-lane shuffle vector
1269
SmallVector<int> NewMask(Mask);
1270
canonicalizeShuffleVectorByLane(DL, NewMask, VT, V1, V2, DAG);
1271
1272
SDValue Result;
1273
// TODO: Add more comparison patterns.
1274
if (V2.isUndef()) {
1275
if ((Result = lowerVECTOR_SHUFFLE_XVREPLVEI(DL, NewMask, VT, V1, V2, DAG)))
1276
return Result;
1277
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF4I(DL, NewMask, VT, V1, V2, DAG)))
1278
return Result;
1279
1280
// TODO: This comment may be enabled in the future to better match the
1281
// pattern for instruction selection.
1282
/* V2 = V1; */
1283
}
1284
1285
// It is recommended not to change the pattern comparison order for better
1286
// performance.
1287
if ((Result = lowerVECTOR_SHUFFLE_XVPACKEV(DL, NewMask, VT, V1, V2, DAG)))
1288
return Result;
1289
if ((Result = lowerVECTOR_SHUFFLE_XVPACKOD(DL, NewMask, VT, V1, V2, DAG)))
1290
return Result;
1291
if ((Result = lowerVECTOR_SHUFFLE_XVILVH(DL, NewMask, VT, V1, V2, DAG)))
1292
return Result;
1293
if ((Result = lowerVECTOR_SHUFFLE_XVILVL(DL, NewMask, VT, V1, V2, DAG)))
1294
return Result;
1295
if ((Result = lowerVECTOR_SHUFFLE_XVPICKEV(DL, NewMask, VT, V1, V2, DAG)))
1296
return Result;
1297
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD(DL, NewMask, VT, V1, V2, DAG)))
1298
return Result;
1299
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF(DL, NewMask, VT, V1, V2, DAG)))
1300
return Result;
1301
1302
return SDValue();
1303
}
1304
1305
SDValue LoongArchTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
1306
SelectionDAG &DAG) const {
1307
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
1308
ArrayRef<int> OrigMask = SVOp->getMask();
1309
SDValue V1 = Op.getOperand(0);
1310
SDValue V2 = Op.getOperand(1);
1311
MVT VT = Op.getSimpleValueType();
1312
int NumElements = VT.getVectorNumElements();
1313
SDLoc DL(Op);
1314
1315
bool V1IsUndef = V1.isUndef();
1316
bool V2IsUndef = V2.isUndef();
1317
if (V1IsUndef && V2IsUndef)
1318
return DAG.getUNDEF(VT);
1319
1320
// When we create a shuffle node we put the UNDEF node to second operand,
1321
// but in some cases the first operand may be transformed to UNDEF.
1322
// In this case we should just commute the node.
1323
if (V1IsUndef)
1324
return DAG.getCommutedVectorShuffle(*SVOp);
1325
1326
// Check for non-undef masks pointing at an undef vector and make the masks
1327
// undef as well. This makes it easier to match the shuffle based solely on
1328
// the mask.
1329
if (V2IsUndef &&
1330
any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) {
1331
SmallVector<int, 8> NewMask(OrigMask);
1332
for (int &M : NewMask)
1333
if (M >= NumElements)
1334
M = -1;
1335
return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
1336
}
1337
1338
// Check for illegal shuffle mask element index values.
1339
int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2);
1340
(void)MaskUpperLimit;
1341
assert(llvm::all_of(OrigMask,
1342
[&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
1343
"Out of bounds shuffle index");
1344
1345
// For each vector width, delegate to a specialized lowering routine.
1346
if (VT.is128BitVector())
1347
return lower128BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1348
1349
if (VT.is256BitVector())
1350
return lower256BitShuffle(DL, OrigMask, VT, V1, V2, DAG);
1351
1352
return SDValue();
1353
}
1354
1355
static bool isConstantOrUndef(const SDValue Op) {
1356
if (Op->isUndef())
1357
return true;
1358
if (isa<ConstantSDNode>(Op))
1359
return true;
1360
if (isa<ConstantFPSDNode>(Op))
1361
return true;
1362
return false;
1363
}
1364
1365
static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) {
1366
for (unsigned i = 0; i < Op->getNumOperands(); ++i)
1367
if (isConstantOrUndef(Op->getOperand(i)))
1368
return true;
1369
return false;
1370
}
1371
1372
SDValue LoongArchTargetLowering::lowerBUILD_VECTOR(SDValue Op,
1373
SelectionDAG &DAG) const {
1374
BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op);
1375
EVT ResTy = Op->getValueType(0);
1376
SDLoc DL(Op);
1377
APInt SplatValue, SplatUndef;
1378
unsigned SplatBitSize;
1379
bool HasAnyUndefs;
1380
bool Is128Vec = ResTy.is128BitVector();
1381
bool Is256Vec = ResTy.is256BitVector();
1382
1383
if ((!Subtarget.hasExtLSX() || !Is128Vec) &&
1384
(!Subtarget.hasExtLASX() || !Is256Vec))
1385
return SDValue();
1386
1387
if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs,
1388
/*MinSplatBits=*/8) &&
1389
SplatBitSize <= 64) {
1390
// We can only cope with 8, 16, 32, or 64-bit elements.
1391
if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 &&
1392
SplatBitSize != 64)
1393
return SDValue();
1394
1395
EVT ViaVecTy;
1396
1397
switch (SplatBitSize) {
1398
default:
1399
return SDValue();
1400
case 8:
1401
ViaVecTy = Is128Vec ? MVT::v16i8 : MVT::v32i8;
1402
break;
1403
case 16:
1404
ViaVecTy = Is128Vec ? MVT::v8i16 : MVT::v16i16;
1405
break;
1406
case 32:
1407
ViaVecTy = Is128Vec ? MVT::v4i32 : MVT::v8i32;
1408
break;
1409
case 64:
1410
ViaVecTy = Is128Vec ? MVT::v2i64 : MVT::v4i64;
1411
break;
1412
}
1413
1414
// SelectionDAG::getConstant will promote SplatValue appropriately.
1415
SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy);
1416
1417
// Bitcast to the type we originally wanted.
1418
if (ViaVecTy != ResTy)
1419
Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result);
1420
1421
return Result;
1422
}
1423
1424
if (DAG.isSplatValue(Op, /*AllowUndefs=*/false))
1425
return Op;
1426
1427
if (!isConstantOrUndefBUILD_VECTOR(Node)) {
1428
// Use INSERT_VECTOR_ELT operations rather than expand to stores.
1429
// The resulting code is the same length as the expansion, but it doesn't
1430
// use memory operations.
1431
EVT ResTy = Node->getValueType(0);
1432
1433
assert(ResTy.isVector());
1434
1435
unsigned NumElts = ResTy.getVectorNumElements();
1436
SDValue Vector = DAG.getUNDEF(ResTy);
1437
for (unsigned i = 0; i < NumElts; ++i) {
1438
Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector,
1439
Node->getOperand(i),
1440
DAG.getConstant(i, DL, Subtarget.getGRLenVT()));
1441
}
1442
return Vector;
1443
}
1444
1445
return SDValue();
1446
}
1447
1448
SDValue
1449
LoongArchTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
1450
SelectionDAG &DAG) const {
1451
EVT VecTy = Op->getOperand(0)->getValueType(0);
1452
SDValue Idx = Op->getOperand(1);
1453
EVT EltTy = VecTy.getVectorElementType();
1454
unsigned NumElts = VecTy.getVectorNumElements();
1455
1456
if (isa<ConstantSDNode>(Idx) &&
1457
(EltTy == MVT::i32 || EltTy == MVT::i64 || EltTy == MVT::f32 ||
1458
EltTy == MVT::f64 || Idx->getAsZExtVal() < NumElts / 2))
1459
return Op;
1460
1461
return SDValue();
1462
}
1463
1464
SDValue
1465
LoongArchTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
1466
SelectionDAG &DAG) const {
1467
if (isa<ConstantSDNode>(Op->getOperand(2)))
1468
return Op;
1469
return SDValue();
1470
}
1471
1472
SDValue LoongArchTargetLowering::lowerATOMIC_FENCE(SDValue Op,
1473
SelectionDAG &DAG) const {
1474
SDLoc DL(Op);
1475
SyncScope::ID FenceSSID =
1476
static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1477
1478
// singlethread fences only synchronize with signal handlers on the same
1479
// thread and thus only need to preserve instruction order, not actually
1480
// enforce memory ordering.
1481
if (FenceSSID == SyncScope::SingleThread)
1482
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1483
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1484
1485
return Op;
1486
}
1487
1488
SDValue LoongArchTargetLowering::lowerWRITE_REGISTER(SDValue Op,
1489
SelectionDAG &DAG) const {
1490
1491
if (Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i32) {
1492
DAG.getContext()->emitError(
1493
"On LA64, only 64-bit registers can be written.");
1494
return Op.getOperand(0);
1495
}
1496
1497
if (!Subtarget.is64Bit() && Op.getOperand(2).getValueType() == MVT::i64) {
1498
DAG.getContext()->emitError(
1499
"On LA32, only 32-bit registers can be written.");
1500
return Op.getOperand(0);
1501
}
1502
1503
return Op;
1504
}
1505
1506
SDValue LoongArchTargetLowering::lowerFRAMEADDR(SDValue Op,
1507
SelectionDAG &DAG) const {
1508
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
1509
DAG.getContext()->emitError("argument to '__builtin_frame_address' must "
1510
"be a constant integer");
1511
return SDValue();
1512
}
1513
1514
MachineFunction &MF = DAG.getMachineFunction();
1515
MF.getFrameInfo().setFrameAddressIsTaken(true);
1516
Register FrameReg = Subtarget.getRegisterInfo()->getFrameRegister(MF);
1517
EVT VT = Op.getValueType();
1518
SDLoc DL(Op);
1519
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
1520
unsigned Depth = Op.getConstantOperandVal(0);
1521
int GRLenInBytes = Subtarget.getGRLen() / 8;
1522
1523
while (Depth--) {
1524
int Offset = -(GRLenInBytes * 2);
1525
SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1526
DAG.getIntPtrConstant(Offset, DL));
1527
FrameAddr =
1528
DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1529
}
1530
return FrameAddr;
1531
}
1532
1533
SDValue LoongArchTargetLowering::lowerRETURNADDR(SDValue Op,
1534
SelectionDAG &DAG) const {
1535
if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1536
return SDValue();
1537
1538
// Currently only support lowering return address for current frame.
1539
if (Op.getConstantOperandVal(0) != 0) {
1540
DAG.getContext()->emitError(
1541
"return address can only be determined for the current frame");
1542
return SDValue();
1543
}
1544
1545
MachineFunction &MF = DAG.getMachineFunction();
1546
MF.getFrameInfo().setReturnAddressIsTaken(true);
1547
MVT GRLenVT = Subtarget.getGRLenVT();
1548
1549
// Return the value of the return address register, marking it an implicit
1550
// live-in.
1551
Register Reg = MF.addLiveIn(Subtarget.getRegisterInfo()->getRARegister(),
1552
getRegClassFor(GRLenVT));
1553
return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), Reg, GRLenVT);
1554
}
1555
1556
SDValue LoongArchTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
1557
SelectionDAG &DAG) const {
1558
MachineFunction &MF = DAG.getMachineFunction();
1559
auto Size = Subtarget.getGRLen() / 8;
1560
auto FI = MF.getFrameInfo().CreateFixedObject(Size, 0, false);
1561
return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1562
}
1563
1564
SDValue LoongArchTargetLowering::lowerVASTART(SDValue Op,
1565
SelectionDAG &DAG) const {
1566
MachineFunction &MF = DAG.getMachineFunction();
1567
auto *FuncInfo = MF.getInfo<LoongArchMachineFunctionInfo>();
1568
1569
SDLoc DL(Op);
1570
SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1571
getPointerTy(MF.getDataLayout()));
1572
1573
// vastart just stores the address of the VarArgsFrameIndex slot into the
1574
// memory location argument.
1575
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1576
return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1577
MachinePointerInfo(SV));
1578
}
1579
1580
SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
1581
SelectionDAG &DAG) const {
1582
assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1583
!Subtarget.hasBasicD() && "unexpected target features");
1584
1585
SDLoc DL(Op);
1586
SDValue Op0 = Op.getOperand(0);
1587
if (Op0->getOpcode() == ISD::AND) {
1588
auto *C = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
1589
if (C && C->getZExtValue() < UINT64_C(0xFFFFFFFF))
1590
return Op;
1591
}
1592
1593
if (Op0->getOpcode() == LoongArchISD::BSTRPICK &&
1594
Op0.getConstantOperandVal(1) < UINT64_C(0X1F) &&
1595
Op0.getConstantOperandVal(2) == UINT64_C(0))
1596
return Op;
1597
1598
if (Op0.getOpcode() == ISD::AssertZext &&
1599
dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLT(MVT::i32))
1600
return Op;
1601
1602
EVT OpVT = Op0.getValueType();
1603
EVT RetVT = Op.getValueType();
1604
RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
1605
MakeLibCallOptions CallOptions;
1606
CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1607
SDValue Chain = SDValue();
1608
SDValue Result;
1609
std::tie(Result, Chain) =
1610
makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1611
return Result;
1612
}
1613
1614
SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
1615
SelectionDAG &DAG) const {
1616
assert(Subtarget.is64Bit() && Subtarget.hasBasicF() &&
1617
!Subtarget.hasBasicD() && "unexpected target features");
1618
1619
SDLoc DL(Op);
1620
SDValue Op0 = Op.getOperand(0);
1621
1622
if ((Op0.getOpcode() == ISD::AssertSext ||
1623
Op0.getOpcode() == ISD::SIGN_EXTEND_INREG) &&
1624
dyn_cast<VTSDNode>(Op0.getOperand(1))->getVT().bitsLE(MVT::i32))
1625
return Op;
1626
1627
EVT OpVT = Op0.getValueType();
1628
EVT RetVT = Op.getValueType();
1629
RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
1630
MakeLibCallOptions CallOptions;
1631
CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
1632
SDValue Chain = SDValue();
1633
SDValue Result;
1634
std::tie(Result, Chain) =
1635
makeLibCall(DAG, LC, Op.getValueType(), Op0, CallOptions, DL, Chain);
1636
return Result;
1637
}
1638
1639
SDValue LoongArchTargetLowering::lowerBITCAST(SDValue Op,
1640
SelectionDAG &DAG) const {
1641
1642
SDLoc DL(Op);
1643
SDValue Op0 = Op.getOperand(0);
1644
1645
if (Op.getValueType() == MVT::f32 && Op0.getValueType() == MVT::i32 &&
1646
Subtarget.is64Bit() && Subtarget.hasBasicF()) {
1647
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
1648
return DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, NewOp0);
1649
}
1650
return Op;
1651
}
1652
1653
SDValue LoongArchTargetLowering::lowerFP_TO_SINT(SDValue Op,
1654
SelectionDAG &DAG) const {
1655
1656
SDLoc DL(Op);
1657
1658
if (Op.getValueSizeInBits() > 32 && Subtarget.hasBasicF() &&
1659
!Subtarget.hasBasicD()) {
1660
SDValue Dst =
1661
DAG.getNode(LoongArchISD::FTINT, DL, MVT::f32, Op.getOperand(0));
1662
return DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Dst);
1663
}
1664
1665
EVT FPTy = EVT::getFloatingPointVT(Op.getValueSizeInBits());
1666
SDValue Trunc = DAG.getNode(LoongArchISD::FTINT, DL, FPTy, Op.getOperand(0));
1667
return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Trunc);
1668
}
1669
1670
static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
1671
SelectionDAG &DAG, unsigned Flags) {
1672
return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
1673
}
1674
1675
static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
1676
SelectionDAG &DAG, unsigned Flags) {
1677
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
1678
Flags);
1679
}
1680
1681
static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
1682
SelectionDAG &DAG, unsigned Flags) {
1683
return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
1684
N->getOffset(), Flags);
1685
}
1686
1687
static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
1688
SelectionDAG &DAG, unsigned Flags) {
1689
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
1690
}
1691
1692
template <class NodeTy>
1693
SDValue LoongArchTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
1694
CodeModel::Model M,
1695
bool IsLocal) const {
1696
SDLoc DL(N);
1697
EVT Ty = getPointerTy(DAG.getDataLayout());
1698
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
1699
SDValue Load;
1700
1701
switch (M) {
1702
default:
1703
report_fatal_error("Unsupported code model");
1704
1705
case CodeModel::Large: {
1706
assert(Subtarget.is64Bit() && "Large code model requires LA64");
1707
1708
// This is not actually used, but is necessary for successfully matching
1709
// the PseudoLA_*_LARGE nodes.
1710
SDValue Tmp = DAG.getConstant(0, DL, Ty);
1711
if (IsLocal) {
1712
// This generates the pattern (PseudoLA_PCREL_LARGE tmp sym), that
1713
// eventually becomes the desired 5-insn code sequence.
1714
Load = SDValue(DAG.getMachineNode(LoongArch::PseudoLA_PCREL_LARGE, DL, Ty,
1715
Tmp, Addr),
1716
0);
1717
} else {
1718
// This generates the pattern (PseudoLA_GOT_LARGE tmp sym), that
1719
// eventually becomes the desired 5-insn code sequence.
1720
Load = SDValue(
1721
DAG.getMachineNode(LoongArch::PseudoLA_GOT_LARGE, DL, Ty, Tmp, Addr),
1722
0);
1723
}
1724
break;
1725
}
1726
1727
case CodeModel::Small:
1728
case CodeModel::Medium:
1729
if (IsLocal) {
1730
// This generates the pattern (PseudoLA_PCREL sym), which expands to
1731
// (addi.w/d (pcalau12i %pc_hi20(sym)) %pc_lo12(sym)).
1732
Load = SDValue(
1733
DAG.getMachineNode(LoongArch::PseudoLA_PCREL, DL, Ty, Addr), 0);
1734
} else {
1735
// This generates the pattern (PseudoLA_GOT sym), which expands to (ld.w/d
1736
// (pcalau12i %got_pc_hi20(sym)) %got_pc_lo12(sym)).
1737
Load =
1738
SDValue(DAG.getMachineNode(LoongArch::PseudoLA_GOT, DL, Ty, Addr), 0);
1739
}
1740
}
1741
1742
if (!IsLocal) {
1743
// Mark the load instruction as invariant to enable hoisting in MachineLICM.
1744
MachineFunction &MF = DAG.getMachineFunction();
1745
MachineMemOperand *MemOp = MF.getMachineMemOperand(
1746
MachinePointerInfo::getGOT(MF),
1747
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1748
MachineMemOperand::MOInvariant,
1749
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1750
DAG.setNodeMemRefs(cast<MachineSDNode>(Load.getNode()), {MemOp});
1751
}
1752
1753
return Load;
1754
}
1755
1756
SDValue LoongArchTargetLowering::lowerBlockAddress(SDValue Op,
1757
SelectionDAG &DAG) const {
1758
return getAddr(cast<BlockAddressSDNode>(Op), DAG,
1759
DAG.getTarget().getCodeModel());
1760
}
1761
1762
SDValue LoongArchTargetLowering::lowerJumpTable(SDValue Op,
1763
SelectionDAG &DAG) const {
1764
return getAddr(cast<JumpTableSDNode>(Op), DAG,
1765
DAG.getTarget().getCodeModel());
1766
}
1767
1768
SDValue LoongArchTargetLowering::lowerConstantPool(SDValue Op,
1769
SelectionDAG &DAG) const {
1770
return getAddr(cast<ConstantPoolSDNode>(Op), DAG,
1771
DAG.getTarget().getCodeModel());
1772
}
1773
1774
SDValue LoongArchTargetLowering::lowerGlobalAddress(SDValue Op,
1775
SelectionDAG &DAG) const {
1776
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1777
assert(N->getOffset() == 0 && "unexpected offset in global node");
1778
auto CM = DAG.getTarget().getCodeModel();
1779
const GlobalValue *GV = N->getGlobal();
1780
1781
if (GV->isDSOLocal() && isa<GlobalVariable>(GV)) {
1782
if (auto GCM = dyn_cast<GlobalVariable>(GV)->getCodeModel())
1783
CM = *GCM;
1784
}
1785
1786
return getAddr(N, DAG, CM, GV->isDSOLocal());
1787
}
1788
1789
SDValue LoongArchTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
1790
SelectionDAG &DAG,
1791
unsigned Opc, bool UseGOT,
1792
bool Large) const {
1793
SDLoc DL(N);
1794
EVT Ty = getPointerTy(DAG.getDataLayout());
1795
MVT GRLenVT = Subtarget.getGRLenVT();
1796
1797
// This is not actually used, but is necessary for successfully matching the
1798
// PseudoLA_*_LARGE nodes.
1799
SDValue Tmp = DAG.getConstant(0, DL, Ty);
1800
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1801
SDValue Offset = Large
1802
? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1803
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1804
if (UseGOT) {
1805
// Mark the load instruction as invariant to enable hoisting in MachineLICM.
1806
MachineFunction &MF = DAG.getMachineFunction();
1807
MachineMemOperand *MemOp = MF.getMachineMemOperand(
1808
MachinePointerInfo::getGOT(MF),
1809
MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
1810
MachineMemOperand::MOInvariant,
1811
LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
1812
DAG.setNodeMemRefs(cast<MachineSDNode>(Offset.getNode()), {MemOp});
1813
}
1814
1815
// Add the thread pointer.
1816
return DAG.getNode(ISD::ADD, DL, Ty, Offset,
1817
DAG.getRegister(LoongArch::R2, GRLenVT));
1818
}
1819
1820
SDValue LoongArchTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
1821
SelectionDAG &DAG,
1822
unsigned Opc,
1823
bool Large) const {
1824
SDLoc DL(N);
1825
EVT Ty = getPointerTy(DAG.getDataLayout());
1826
IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
1827
1828
// This is not actually used, but is necessary for successfully matching the
1829
// PseudoLA_*_LARGE nodes.
1830
SDValue Tmp = DAG.getConstant(0, DL, Ty);
1831
1832
// Use a PC-relative addressing mode to access the dynamic GOT address.
1833
SDValue Addr = DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, 0);
1834
SDValue Load = Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1835
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1836
1837
// Prepare argument list to generate call.
1838
ArgListTy Args;
1839
ArgListEntry Entry;
1840
Entry.Node = Load;
1841
Entry.Ty = CallTy;
1842
Args.push_back(Entry);
1843
1844
// Setup call to __tls_get_addr.
1845
TargetLowering::CallLoweringInfo CLI(DAG);
1846
CLI.setDebugLoc(DL)
1847
.setChain(DAG.getEntryNode())
1848
.setLibCallee(CallingConv::C, CallTy,
1849
DAG.getExternalSymbol("__tls_get_addr", Ty),
1850
std::move(Args));
1851
1852
return LowerCallTo(CLI).first;
1853
}
1854
1855
SDValue LoongArchTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
1856
SelectionDAG &DAG, unsigned Opc,
1857
bool Large) const {
1858
SDLoc DL(N);
1859
EVT Ty = getPointerTy(DAG.getDataLayout());
1860
const GlobalValue *GV = N->getGlobal();
1861
1862
// This is not actually used, but is necessary for successfully matching the
1863
// PseudoLA_*_LARGE nodes.
1864
SDValue Tmp = DAG.getConstant(0, DL, Ty);
1865
1866
// Use a PC-relative addressing mode to access the global dynamic GOT address.
1867
// This generates the pattern (PseudoLA_TLS_DESC_PC{,LARGE} sym).
1868
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
1869
return Large ? SDValue(DAG.getMachineNode(Opc, DL, Ty, Tmp, Addr), 0)
1870
: SDValue(DAG.getMachineNode(Opc, DL, Ty, Addr), 0);
1871
}
1872
1873
SDValue
1874
LoongArchTargetLowering::lowerGlobalTLSAddress(SDValue Op,
1875
SelectionDAG &DAG) const {
1876
if (DAG.getMachineFunction().getFunction().getCallingConv() ==
1877
CallingConv::GHC)
1878
report_fatal_error("In GHC calling convention TLS is not supported");
1879
1880
bool Large = DAG.getTarget().getCodeModel() == CodeModel::Large;
1881
assert((!Large || Subtarget.is64Bit()) && "Large code model requires LA64");
1882
1883
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
1884
assert(N->getOffset() == 0 && "unexpected offset in global node");
1885
1886
if (DAG.getTarget().useEmulatedTLS())
1887
report_fatal_error("the emulated TLS is prohibited",
1888
/*GenCrashDiag=*/false);
1889
1890
bool IsDesc = DAG.getTarget().useTLSDESC();
1891
1892
switch (getTargetMachine().getTLSModel(N->getGlobal())) {
1893
case TLSModel::GeneralDynamic:
1894
// In this model, application code calls the dynamic linker function
1895
// __tls_get_addr to locate TLS offsets into the dynamic thread vector at
1896
// runtime.
1897
if (!IsDesc)
1898
return getDynamicTLSAddr(N, DAG,
1899
Large ? LoongArch::PseudoLA_TLS_GD_LARGE
1900
: LoongArch::PseudoLA_TLS_GD,
1901
Large);
1902
break;
1903
case TLSModel::LocalDynamic:
1904
// Same as GeneralDynamic, except for assembly modifiers and relocation
1905
// records.
1906
if (!IsDesc)
1907
return getDynamicTLSAddr(N, DAG,
1908
Large ? LoongArch::PseudoLA_TLS_LD_LARGE
1909
: LoongArch::PseudoLA_TLS_LD,
1910
Large);
1911
break;
1912
case TLSModel::InitialExec:
1913
// This model uses the GOT to resolve TLS offsets.
1914
return getStaticTLSAddr(N, DAG,
1915
Large ? LoongArch::PseudoLA_TLS_IE_LARGE
1916
: LoongArch::PseudoLA_TLS_IE,
1917
/*UseGOT=*/true, Large);
1918
case TLSModel::LocalExec:
1919
// This model is used when static linking as the TLS offsets are resolved
1920
// during program linking.
1921
//
1922
// This node doesn't need an extra argument for the large code model.
1923
return getStaticTLSAddr(N, DAG, LoongArch::PseudoLA_TLS_LE,
1924
/*UseGOT=*/false);
1925
}
1926
1927
return getTLSDescAddr(N, DAG,
1928
Large ? LoongArch::PseudoLA_TLS_DESC_PC_LARGE
1929
: LoongArch::PseudoLA_TLS_DESC_PC,
1930
Large);
1931
}
1932
1933
template <unsigned N>
1934
static SDValue checkIntrinsicImmArg(SDValue Op, unsigned ImmOp,
1935
SelectionDAG &DAG, bool IsSigned = false) {
1936
auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp));
1937
// Check the ImmArg.
1938
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
1939
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
1940
DAG.getContext()->emitError(Op->getOperationName(0) +
1941
": argument out of range.");
1942
return DAG.getNode(ISD::UNDEF, SDLoc(Op), Op.getValueType());
1943
}
1944
return SDValue();
1945
}
1946
1947
SDValue
1948
LoongArchTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1949
SelectionDAG &DAG) const {
1950
SDLoc DL(Op);
1951
switch (Op.getConstantOperandVal(0)) {
1952
default:
1953
return SDValue(); // Don't custom lower most intrinsics.
1954
case Intrinsic::thread_pointer: {
1955
EVT PtrVT = getPointerTy(DAG.getDataLayout());
1956
return DAG.getRegister(LoongArch::R2, PtrVT);
1957
}
1958
case Intrinsic::loongarch_lsx_vpickve2gr_d:
1959
case Intrinsic::loongarch_lsx_vpickve2gr_du:
1960
case Intrinsic::loongarch_lsx_vreplvei_d:
1961
case Intrinsic::loongarch_lasx_xvrepl128vei_d:
1962
return checkIntrinsicImmArg<1>(Op, 2, DAG);
1963
case Intrinsic::loongarch_lsx_vreplvei_w:
1964
case Intrinsic::loongarch_lasx_xvrepl128vei_w:
1965
case Intrinsic::loongarch_lasx_xvpickve2gr_d:
1966
case Intrinsic::loongarch_lasx_xvpickve2gr_du:
1967
case Intrinsic::loongarch_lasx_xvpickve_d:
1968
case Intrinsic::loongarch_lasx_xvpickve_d_f:
1969
return checkIntrinsicImmArg<2>(Op, 2, DAG);
1970
case Intrinsic::loongarch_lasx_xvinsve0_d:
1971
return checkIntrinsicImmArg<2>(Op, 3, DAG);
1972
case Intrinsic::loongarch_lsx_vsat_b:
1973
case Intrinsic::loongarch_lsx_vsat_bu:
1974
case Intrinsic::loongarch_lsx_vrotri_b:
1975
case Intrinsic::loongarch_lsx_vsllwil_h_b:
1976
case Intrinsic::loongarch_lsx_vsllwil_hu_bu:
1977
case Intrinsic::loongarch_lsx_vsrlri_b:
1978
case Intrinsic::loongarch_lsx_vsrari_b:
1979
case Intrinsic::loongarch_lsx_vreplvei_h:
1980
case Intrinsic::loongarch_lasx_xvsat_b:
1981
case Intrinsic::loongarch_lasx_xvsat_bu:
1982
case Intrinsic::loongarch_lasx_xvrotri_b:
1983
case Intrinsic::loongarch_lasx_xvsllwil_h_b:
1984
case Intrinsic::loongarch_lasx_xvsllwil_hu_bu:
1985
case Intrinsic::loongarch_lasx_xvsrlri_b:
1986
case Intrinsic::loongarch_lasx_xvsrari_b:
1987
case Intrinsic::loongarch_lasx_xvrepl128vei_h:
1988
case Intrinsic::loongarch_lasx_xvpickve_w:
1989
case Intrinsic::loongarch_lasx_xvpickve_w_f:
1990
return checkIntrinsicImmArg<3>(Op, 2, DAG);
1991
case Intrinsic::loongarch_lasx_xvinsve0_w:
1992
return checkIntrinsicImmArg<3>(Op, 3, DAG);
1993
case Intrinsic::loongarch_lsx_vsat_h:
1994
case Intrinsic::loongarch_lsx_vsat_hu:
1995
case Intrinsic::loongarch_lsx_vrotri_h:
1996
case Intrinsic::loongarch_lsx_vsllwil_w_h:
1997
case Intrinsic::loongarch_lsx_vsllwil_wu_hu:
1998
case Intrinsic::loongarch_lsx_vsrlri_h:
1999
case Intrinsic::loongarch_lsx_vsrari_h:
2000
case Intrinsic::loongarch_lsx_vreplvei_b:
2001
case Intrinsic::loongarch_lasx_xvsat_h:
2002
case Intrinsic::loongarch_lasx_xvsat_hu:
2003
case Intrinsic::loongarch_lasx_xvrotri_h:
2004
case Intrinsic::loongarch_lasx_xvsllwil_w_h:
2005
case Intrinsic::loongarch_lasx_xvsllwil_wu_hu:
2006
case Intrinsic::loongarch_lasx_xvsrlri_h:
2007
case Intrinsic::loongarch_lasx_xvsrari_h:
2008
case Intrinsic::loongarch_lasx_xvrepl128vei_b:
2009
return checkIntrinsicImmArg<4>(Op, 2, DAG);
2010
case Intrinsic::loongarch_lsx_vsrlni_b_h:
2011
case Intrinsic::loongarch_lsx_vsrani_b_h:
2012
case Intrinsic::loongarch_lsx_vsrlrni_b_h:
2013
case Intrinsic::loongarch_lsx_vsrarni_b_h:
2014
case Intrinsic::loongarch_lsx_vssrlni_b_h:
2015
case Intrinsic::loongarch_lsx_vssrani_b_h:
2016
case Intrinsic::loongarch_lsx_vssrlni_bu_h:
2017
case Intrinsic::loongarch_lsx_vssrani_bu_h:
2018
case Intrinsic::loongarch_lsx_vssrlrni_b_h:
2019
case Intrinsic::loongarch_lsx_vssrarni_b_h:
2020
case Intrinsic::loongarch_lsx_vssrlrni_bu_h:
2021
case Intrinsic::loongarch_lsx_vssrarni_bu_h:
2022
case Intrinsic::loongarch_lasx_xvsrlni_b_h:
2023
case Intrinsic::loongarch_lasx_xvsrani_b_h:
2024
case Intrinsic::loongarch_lasx_xvsrlrni_b_h:
2025
case Intrinsic::loongarch_lasx_xvsrarni_b_h:
2026
case Intrinsic::loongarch_lasx_xvssrlni_b_h:
2027
case Intrinsic::loongarch_lasx_xvssrani_b_h:
2028
case Intrinsic::loongarch_lasx_xvssrlni_bu_h:
2029
case Intrinsic::loongarch_lasx_xvssrani_bu_h:
2030
case Intrinsic::loongarch_lasx_xvssrlrni_b_h:
2031
case Intrinsic::loongarch_lasx_xvssrarni_b_h:
2032
case Intrinsic::loongarch_lasx_xvssrlrni_bu_h:
2033
case Intrinsic::loongarch_lasx_xvssrarni_bu_h:
2034
return checkIntrinsicImmArg<4>(Op, 3, DAG);
2035
case Intrinsic::loongarch_lsx_vsat_w:
2036
case Intrinsic::loongarch_lsx_vsat_wu:
2037
case Intrinsic::loongarch_lsx_vrotri_w:
2038
case Intrinsic::loongarch_lsx_vsllwil_d_w:
2039
case Intrinsic::loongarch_lsx_vsllwil_du_wu:
2040
case Intrinsic::loongarch_lsx_vsrlri_w:
2041
case Intrinsic::loongarch_lsx_vsrari_w:
2042
case Intrinsic::loongarch_lsx_vslei_bu:
2043
case Intrinsic::loongarch_lsx_vslei_hu:
2044
case Intrinsic::loongarch_lsx_vslei_wu:
2045
case Intrinsic::loongarch_lsx_vslei_du:
2046
case Intrinsic::loongarch_lsx_vslti_bu:
2047
case Intrinsic::loongarch_lsx_vslti_hu:
2048
case Intrinsic::loongarch_lsx_vslti_wu:
2049
case Intrinsic::loongarch_lsx_vslti_du:
2050
case Intrinsic::loongarch_lsx_vbsll_v:
2051
case Intrinsic::loongarch_lsx_vbsrl_v:
2052
case Intrinsic::loongarch_lasx_xvsat_w:
2053
case Intrinsic::loongarch_lasx_xvsat_wu:
2054
case Intrinsic::loongarch_lasx_xvrotri_w:
2055
case Intrinsic::loongarch_lasx_xvsllwil_d_w:
2056
case Intrinsic::loongarch_lasx_xvsllwil_du_wu:
2057
case Intrinsic::loongarch_lasx_xvsrlri_w:
2058
case Intrinsic::loongarch_lasx_xvsrari_w:
2059
case Intrinsic::loongarch_lasx_xvslei_bu:
2060
case Intrinsic::loongarch_lasx_xvslei_hu:
2061
case Intrinsic::loongarch_lasx_xvslei_wu:
2062
case Intrinsic::loongarch_lasx_xvslei_du:
2063
case Intrinsic::loongarch_lasx_xvslti_bu:
2064
case Intrinsic::loongarch_lasx_xvslti_hu:
2065
case Intrinsic::loongarch_lasx_xvslti_wu:
2066
case Intrinsic::loongarch_lasx_xvslti_du:
2067
case Intrinsic::loongarch_lasx_xvbsll_v:
2068
case Intrinsic::loongarch_lasx_xvbsrl_v:
2069
return checkIntrinsicImmArg<5>(Op, 2, DAG);
2070
case Intrinsic::loongarch_lsx_vseqi_b:
2071
case Intrinsic::loongarch_lsx_vseqi_h:
2072
case Intrinsic::loongarch_lsx_vseqi_w:
2073
case Intrinsic::loongarch_lsx_vseqi_d:
2074
case Intrinsic::loongarch_lsx_vslei_b:
2075
case Intrinsic::loongarch_lsx_vslei_h:
2076
case Intrinsic::loongarch_lsx_vslei_w:
2077
case Intrinsic::loongarch_lsx_vslei_d:
2078
case Intrinsic::loongarch_lsx_vslti_b:
2079
case Intrinsic::loongarch_lsx_vslti_h:
2080
case Intrinsic::loongarch_lsx_vslti_w:
2081
case Intrinsic::loongarch_lsx_vslti_d:
2082
case Intrinsic::loongarch_lasx_xvseqi_b:
2083
case Intrinsic::loongarch_lasx_xvseqi_h:
2084
case Intrinsic::loongarch_lasx_xvseqi_w:
2085
case Intrinsic::loongarch_lasx_xvseqi_d:
2086
case Intrinsic::loongarch_lasx_xvslei_b:
2087
case Intrinsic::loongarch_lasx_xvslei_h:
2088
case Intrinsic::loongarch_lasx_xvslei_w:
2089
case Intrinsic::loongarch_lasx_xvslei_d:
2090
case Intrinsic::loongarch_lasx_xvslti_b:
2091
case Intrinsic::loongarch_lasx_xvslti_h:
2092
case Intrinsic::loongarch_lasx_xvslti_w:
2093
case Intrinsic::loongarch_lasx_xvslti_d:
2094
return checkIntrinsicImmArg<5>(Op, 2, DAG, /*IsSigned=*/true);
2095
case Intrinsic::loongarch_lsx_vsrlni_h_w:
2096
case Intrinsic::loongarch_lsx_vsrani_h_w:
2097
case Intrinsic::loongarch_lsx_vsrlrni_h_w:
2098
case Intrinsic::loongarch_lsx_vsrarni_h_w:
2099
case Intrinsic::loongarch_lsx_vssrlni_h_w:
2100
case Intrinsic::loongarch_lsx_vssrani_h_w:
2101
case Intrinsic::loongarch_lsx_vssrlni_hu_w:
2102
case Intrinsic::loongarch_lsx_vssrani_hu_w:
2103
case Intrinsic::loongarch_lsx_vssrlrni_h_w:
2104
case Intrinsic::loongarch_lsx_vssrarni_h_w:
2105
case Intrinsic::loongarch_lsx_vssrlrni_hu_w:
2106
case Intrinsic::loongarch_lsx_vssrarni_hu_w:
2107
case Intrinsic::loongarch_lsx_vfrstpi_b:
2108
case Intrinsic::loongarch_lsx_vfrstpi_h:
2109
case Intrinsic::loongarch_lasx_xvsrlni_h_w:
2110
case Intrinsic::loongarch_lasx_xvsrani_h_w:
2111
case Intrinsic::loongarch_lasx_xvsrlrni_h_w:
2112
case Intrinsic::loongarch_lasx_xvsrarni_h_w:
2113
case Intrinsic::loongarch_lasx_xvssrlni_h_w:
2114
case Intrinsic::loongarch_lasx_xvssrani_h_w:
2115
case Intrinsic::loongarch_lasx_xvssrlni_hu_w:
2116
case Intrinsic::loongarch_lasx_xvssrani_hu_w:
2117
case Intrinsic::loongarch_lasx_xvssrlrni_h_w:
2118
case Intrinsic::loongarch_lasx_xvssrarni_h_w:
2119
case Intrinsic::loongarch_lasx_xvssrlrni_hu_w:
2120
case Intrinsic::loongarch_lasx_xvssrarni_hu_w:
2121
case Intrinsic::loongarch_lasx_xvfrstpi_b:
2122
case Intrinsic::loongarch_lasx_xvfrstpi_h:
2123
return checkIntrinsicImmArg<5>(Op, 3, DAG);
2124
case Intrinsic::loongarch_lsx_vsat_d:
2125
case Intrinsic::loongarch_lsx_vsat_du:
2126
case Intrinsic::loongarch_lsx_vrotri_d:
2127
case Intrinsic::loongarch_lsx_vsrlri_d:
2128
case Intrinsic::loongarch_lsx_vsrari_d:
2129
case Intrinsic::loongarch_lasx_xvsat_d:
2130
case Intrinsic::loongarch_lasx_xvsat_du:
2131
case Intrinsic::loongarch_lasx_xvrotri_d:
2132
case Intrinsic::loongarch_lasx_xvsrlri_d:
2133
case Intrinsic::loongarch_lasx_xvsrari_d:
2134
return checkIntrinsicImmArg<6>(Op, 2, DAG);
2135
case Intrinsic::loongarch_lsx_vsrlni_w_d:
2136
case Intrinsic::loongarch_lsx_vsrani_w_d:
2137
case Intrinsic::loongarch_lsx_vsrlrni_w_d:
2138
case Intrinsic::loongarch_lsx_vsrarni_w_d:
2139
case Intrinsic::loongarch_lsx_vssrlni_w_d:
2140
case Intrinsic::loongarch_lsx_vssrani_w_d:
2141
case Intrinsic::loongarch_lsx_vssrlni_wu_d:
2142
case Intrinsic::loongarch_lsx_vssrani_wu_d:
2143
case Intrinsic::loongarch_lsx_vssrlrni_w_d:
2144
case Intrinsic::loongarch_lsx_vssrarni_w_d:
2145
case Intrinsic::loongarch_lsx_vssrlrni_wu_d:
2146
case Intrinsic::loongarch_lsx_vssrarni_wu_d:
2147
case Intrinsic::loongarch_lasx_xvsrlni_w_d:
2148
case Intrinsic::loongarch_lasx_xvsrani_w_d:
2149
case Intrinsic::loongarch_lasx_xvsrlrni_w_d:
2150
case Intrinsic::loongarch_lasx_xvsrarni_w_d:
2151
case Intrinsic::loongarch_lasx_xvssrlni_w_d:
2152
case Intrinsic::loongarch_lasx_xvssrani_w_d:
2153
case Intrinsic::loongarch_lasx_xvssrlni_wu_d:
2154
case Intrinsic::loongarch_lasx_xvssrani_wu_d:
2155
case Intrinsic::loongarch_lasx_xvssrlrni_w_d:
2156
case Intrinsic::loongarch_lasx_xvssrarni_w_d:
2157
case Intrinsic::loongarch_lasx_xvssrlrni_wu_d:
2158
case Intrinsic::loongarch_lasx_xvssrarni_wu_d:
2159
return checkIntrinsicImmArg<6>(Op, 3, DAG);
2160
case Intrinsic::loongarch_lsx_vsrlni_d_q:
2161
case Intrinsic::loongarch_lsx_vsrani_d_q:
2162
case Intrinsic::loongarch_lsx_vsrlrni_d_q:
2163
case Intrinsic::loongarch_lsx_vsrarni_d_q:
2164
case Intrinsic::loongarch_lsx_vssrlni_d_q:
2165
case Intrinsic::loongarch_lsx_vssrani_d_q:
2166
case Intrinsic::loongarch_lsx_vssrlni_du_q:
2167
case Intrinsic::loongarch_lsx_vssrani_du_q:
2168
case Intrinsic::loongarch_lsx_vssrlrni_d_q:
2169
case Intrinsic::loongarch_lsx_vssrarni_d_q:
2170
case Intrinsic::loongarch_lsx_vssrlrni_du_q:
2171
case Intrinsic::loongarch_lsx_vssrarni_du_q:
2172
case Intrinsic::loongarch_lasx_xvsrlni_d_q:
2173
case Intrinsic::loongarch_lasx_xvsrani_d_q:
2174
case Intrinsic::loongarch_lasx_xvsrlrni_d_q:
2175
case Intrinsic::loongarch_lasx_xvsrarni_d_q:
2176
case Intrinsic::loongarch_lasx_xvssrlni_d_q:
2177
case Intrinsic::loongarch_lasx_xvssrani_d_q:
2178
case Intrinsic::loongarch_lasx_xvssrlni_du_q:
2179
case Intrinsic::loongarch_lasx_xvssrani_du_q:
2180
case Intrinsic::loongarch_lasx_xvssrlrni_d_q:
2181
case Intrinsic::loongarch_lasx_xvssrarni_d_q:
2182
case Intrinsic::loongarch_lasx_xvssrlrni_du_q:
2183
case Intrinsic::loongarch_lasx_xvssrarni_du_q:
2184
return checkIntrinsicImmArg<7>(Op, 3, DAG);
2185
case Intrinsic::loongarch_lsx_vnori_b:
2186
case Intrinsic::loongarch_lsx_vshuf4i_b:
2187
case Intrinsic::loongarch_lsx_vshuf4i_h:
2188
case Intrinsic::loongarch_lsx_vshuf4i_w:
2189
case Intrinsic::loongarch_lasx_xvnori_b:
2190
case Intrinsic::loongarch_lasx_xvshuf4i_b:
2191
case Intrinsic::loongarch_lasx_xvshuf4i_h:
2192
case Intrinsic::loongarch_lasx_xvshuf4i_w:
2193
case Intrinsic::loongarch_lasx_xvpermi_d:
2194
return checkIntrinsicImmArg<8>(Op, 2, DAG);
2195
case Intrinsic::loongarch_lsx_vshuf4i_d:
2196
case Intrinsic::loongarch_lsx_vpermi_w:
2197
case Intrinsic::loongarch_lsx_vbitseli_b:
2198
case Intrinsic::loongarch_lsx_vextrins_b:
2199
case Intrinsic::loongarch_lsx_vextrins_h:
2200
case Intrinsic::loongarch_lsx_vextrins_w:
2201
case Intrinsic::loongarch_lsx_vextrins_d:
2202
case Intrinsic::loongarch_lasx_xvshuf4i_d:
2203
case Intrinsic::loongarch_lasx_xvpermi_w:
2204
case Intrinsic::loongarch_lasx_xvpermi_q:
2205
case Intrinsic::loongarch_lasx_xvbitseli_b:
2206
case Intrinsic::loongarch_lasx_xvextrins_b:
2207
case Intrinsic::loongarch_lasx_xvextrins_h:
2208
case Intrinsic::loongarch_lasx_xvextrins_w:
2209
case Intrinsic::loongarch_lasx_xvextrins_d:
2210
return checkIntrinsicImmArg<8>(Op, 3, DAG);
2211
case Intrinsic::loongarch_lsx_vrepli_b:
2212
case Intrinsic::loongarch_lsx_vrepli_h:
2213
case Intrinsic::loongarch_lsx_vrepli_w:
2214
case Intrinsic::loongarch_lsx_vrepli_d:
2215
case Intrinsic::loongarch_lasx_xvrepli_b:
2216
case Intrinsic::loongarch_lasx_xvrepli_h:
2217
case Intrinsic::loongarch_lasx_xvrepli_w:
2218
case Intrinsic::loongarch_lasx_xvrepli_d:
2219
return checkIntrinsicImmArg<10>(Op, 1, DAG, /*IsSigned=*/true);
2220
case Intrinsic::loongarch_lsx_vldi:
2221
case Intrinsic::loongarch_lasx_xvldi:
2222
return checkIntrinsicImmArg<13>(Op, 1, DAG, /*IsSigned=*/true);
2223
}
2224
}
2225
2226
// Helper function that emits error message for intrinsics with chain and return
2227
// merge values of a UNDEF and the chain.
2228
static SDValue emitIntrinsicWithChainErrorMessage(SDValue Op,
2229
StringRef ErrorMsg,
2230
SelectionDAG &DAG) {
2231
DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2232
return DAG.getMergeValues({DAG.getUNDEF(Op.getValueType()), Op.getOperand(0)},
2233
SDLoc(Op));
2234
}
2235
2236
SDValue
2237
LoongArchTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
2238
SelectionDAG &DAG) const {
2239
SDLoc DL(Op);
2240
MVT GRLenVT = Subtarget.getGRLenVT();
2241
EVT VT = Op.getValueType();
2242
SDValue Chain = Op.getOperand(0);
2243
const StringRef ErrorMsgOOR = "argument out of range";
2244
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2245
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2246
2247
switch (Op.getConstantOperandVal(1)) {
2248
default:
2249
return Op;
2250
case Intrinsic::loongarch_crc_w_b_w:
2251
case Intrinsic::loongarch_crc_w_h_w:
2252
case Intrinsic::loongarch_crc_w_w_w:
2253
case Intrinsic::loongarch_crc_w_d_w:
2254
case Intrinsic::loongarch_crcc_w_b_w:
2255
case Intrinsic::loongarch_crcc_w_h_w:
2256
case Intrinsic::loongarch_crcc_w_w_w:
2257
case Intrinsic::loongarch_crcc_w_d_w:
2258
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqLA64, DAG);
2259
case Intrinsic::loongarch_csrrd_w:
2260
case Intrinsic::loongarch_csrrd_d: {
2261
unsigned Imm = Op.getConstantOperandVal(2);
2262
return !isUInt<14>(Imm)
2263
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2264
: DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
2265
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2266
}
2267
case Intrinsic::loongarch_csrwr_w:
2268
case Intrinsic::loongarch_csrwr_d: {
2269
unsigned Imm = Op.getConstantOperandVal(3);
2270
return !isUInt<14>(Imm)
2271
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2272
: DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
2273
{Chain, Op.getOperand(2),
2274
DAG.getConstant(Imm, DL, GRLenVT)});
2275
}
2276
case Intrinsic::loongarch_csrxchg_w:
2277
case Intrinsic::loongarch_csrxchg_d: {
2278
unsigned Imm = Op.getConstantOperandVal(4);
2279
return !isUInt<14>(Imm)
2280
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2281
: DAG.getNode(LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
2282
{Chain, Op.getOperand(2), Op.getOperand(3),
2283
DAG.getConstant(Imm, DL, GRLenVT)});
2284
}
2285
case Intrinsic::loongarch_iocsrrd_d: {
2286
return DAG.getNode(
2287
LoongArchISD::IOCSRRD_D, DL, {GRLenVT, MVT::Other},
2288
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op.getOperand(2))});
2289
}
2290
#define IOCSRRD_CASE(NAME, NODE) \
2291
case Intrinsic::loongarch_##NAME: { \
2292
return DAG.getNode(LoongArchISD::NODE, DL, {GRLenVT, MVT::Other}, \
2293
{Chain, Op.getOperand(2)}); \
2294
}
2295
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
2296
IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
2297
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
2298
#undef IOCSRRD_CASE
2299
case Intrinsic::loongarch_cpucfg: {
2300
return DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
2301
{Chain, Op.getOperand(2)});
2302
}
2303
case Intrinsic::loongarch_lddir_d: {
2304
unsigned Imm = Op.getConstantOperandVal(3);
2305
return !isUInt<8>(Imm)
2306
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2307
: Op;
2308
}
2309
case Intrinsic::loongarch_movfcsr2gr: {
2310
if (!Subtarget.hasBasicF())
2311
return emitIntrinsicWithChainErrorMessage(Op, ErrorMsgReqF, DAG);
2312
unsigned Imm = Op.getConstantOperandVal(2);
2313
return !isUInt<2>(Imm)
2314
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2315
: DAG.getNode(LoongArchISD::MOVFCSR2GR, DL, {VT, MVT::Other},
2316
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2317
}
2318
case Intrinsic::loongarch_lsx_vld:
2319
case Intrinsic::loongarch_lsx_vldrepl_b:
2320
case Intrinsic::loongarch_lasx_xvld:
2321
case Intrinsic::loongarch_lasx_xvldrepl_b:
2322
return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2323
? emitIntrinsicWithChainErrorMessage(Op, ErrorMsgOOR, DAG)
2324
: SDValue();
2325
case Intrinsic::loongarch_lsx_vldrepl_h:
2326
case Intrinsic::loongarch_lasx_xvldrepl_h:
2327
return !isShiftedInt<11, 1>(
2328
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2329
? emitIntrinsicWithChainErrorMessage(
2330
Op, "argument out of range or not a multiple of 2", DAG)
2331
: SDValue();
2332
case Intrinsic::loongarch_lsx_vldrepl_w:
2333
case Intrinsic::loongarch_lasx_xvldrepl_w:
2334
return !isShiftedInt<10, 2>(
2335
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2336
? emitIntrinsicWithChainErrorMessage(
2337
Op, "argument out of range or not a multiple of 4", DAG)
2338
: SDValue();
2339
case Intrinsic::loongarch_lsx_vldrepl_d:
2340
case Intrinsic::loongarch_lasx_xvldrepl_d:
2341
return !isShiftedInt<9, 3>(
2342
cast<ConstantSDNode>(Op.getOperand(3))->getSExtValue())
2343
? emitIntrinsicWithChainErrorMessage(
2344
Op, "argument out of range or not a multiple of 8", DAG)
2345
: SDValue();
2346
}
2347
}
2348
2349
// Helper function that emits error message for intrinsics with void return
2350
// value and return the chain.
2351
static SDValue emitIntrinsicErrorMessage(SDValue Op, StringRef ErrorMsg,
2352
SelectionDAG &DAG) {
2353
2354
DAG.getContext()->emitError(Op->getOperationName(0) + ": " + ErrorMsg + ".");
2355
return Op.getOperand(0);
2356
}
2357
2358
SDValue LoongArchTargetLowering::lowerINTRINSIC_VOID(SDValue Op,
2359
SelectionDAG &DAG) const {
2360
SDLoc DL(Op);
2361
MVT GRLenVT = Subtarget.getGRLenVT();
2362
SDValue Chain = Op.getOperand(0);
2363
uint64_t IntrinsicEnum = Op.getConstantOperandVal(1);
2364
SDValue Op2 = Op.getOperand(2);
2365
const StringRef ErrorMsgOOR = "argument out of range";
2366
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2367
const StringRef ErrorMsgReqLA32 = "requires loongarch32";
2368
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2369
2370
switch (IntrinsicEnum) {
2371
default:
2372
// TODO: Add more Intrinsics.
2373
return SDValue();
2374
case Intrinsic::loongarch_cacop_d:
2375
case Intrinsic::loongarch_cacop_w: {
2376
if (IntrinsicEnum == Intrinsic::loongarch_cacop_d && !Subtarget.is64Bit())
2377
return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG);
2378
if (IntrinsicEnum == Intrinsic::loongarch_cacop_w && Subtarget.is64Bit())
2379
return emitIntrinsicErrorMessage(Op, ErrorMsgReqLA32, DAG);
2380
// call void @llvm.loongarch.cacop.[d/w](uimm5, rj, simm12)
2381
unsigned Imm1 = Op2->getAsZExtVal();
2382
int Imm2 = cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue();
2383
if (!isUInt<5>(Imm1) || !isInt<12>(Imm2))
2384
return emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG);
2385
return Op;
2386
}
2387
case Intrinsic::loongarch_dbar: {
2388
unsigned Imm = Op2->getAsZExtVal();
2389
return !isUInt<15>(Imm)
2390
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2391
: DAG.getNode(LoongArchISD::DBAR, DL, MVT::Other, Chain,
2392
DAG.getConstant(Imm, DL, GRLenVT));
2393
}
2394
case Intrinsic::loongarch_ibar: {
2395
unsigned Imm = Op2->getAsZExtVal();
2396
return !isUInt<15>(Imm)
2397
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2398
: DAG.getNode(LoongArchISD::IBAR, DL, MVT::Other, Chain,
2399
DAG.getConstant(Imm, DL, GRLenVT));
2400
}
2401
case Intrinsic::loongarch_break: {
2402
unsigned Imm = Op2->getAsZExtVal();
2403
return !isUInt<15>(Imm)
2404
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2405
: DAG.getNode(LoongArchISD::BREAK, DL, MVT::Other, Chain,
2406
DAG.getConstant(Imm, DL, GRLenVT));
2407
}
2408
case Intrinsic::loongarch_movgr2fcsr: {
2409
if (!Subtarget.hasBasicF())
2410
return emitIntrinsicErrorMessage(Op, ErrorMsgReqF, DAG);
2411
unsigned Imm = Op2->getAsZExtVal();
2412
return !isUInt<2>(Imm)
2413
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2414
: DAG.getNode(LoongArchISD::MOVGR2FCSR, DL, MVT::Other, Chain,
2415
DAG.getConstant(Imm, DL, GRLenVT),
2416
DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT,
2417
Op.getOperand(3)));
2418
}
2419
case Intrinsic::loongarch_syscall: {
2420
unsigned Imm = Op2->getAsZExtVal();
2421
return !isUInt<15>(Imm)
2422
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2423
: DAG.getNode(LoongArchISD::SYSCALL, DL, MVT::Other, Chain,
2424
DAG.getConstant(Imm, DL, GRLenVT));
2425
}
2426
#define IOCSRWR_CASE(NAME, NODE) \
2427
case Intrinsic::loongarch_##NAME: { \
2428
SDValue Op3 = Op.getOperand(3); \
2429
return Subtarget.is64Bit() \
2430
? DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, \
2431
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2432
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op3)) \
2433
: DAG.getNode(LoongArchISD::NODE, DL, MVT::Other, Chain, Op2, \
2434
Op3); \
2435
}
2436
IOCSRWR_CASE(iocsrwr_b, IOCSRWR_B);
2437
IOCSRWR_CASE(iocsrwr_h, IOCSRWR_H);
2438
IOCSRWR_CASE(iocsrwr_w, IOCSRWR_W);
2439
#undef IOCSRWR_CASE
2440
case Intrinsic::loongarch_iocsrwr_d: {
2441
return !Subtarget.is64Bit()
2442
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2443
: DAG.getNode(LoongArchISD::IOCSRWR_D, DL, MVT::Other, Chain,
2444
Op2,
2445
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
2446
Op.getOperand(3)));
2447
}
2448
#define ASRT_LE_GT_CASE(NAME) \
2449
case Intrinsic::loongarch_##NAME: { \
2450
return !Subtarget.is64Bit() \
2451
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG) \
2452
: Op; \
2453
}
2454
ASRT_LE_GT_CASE(asrtle_d)
2455
ASRT_LE_GT_CASE(asrtgt_d)
2456
#undef ASRT_LE_GT_CASE
2457
case Intrinsic::loongarch_ldpte_d: {
2458
unsigned Imm = Op.getConstantOperandVal(3);
2459
return !Subtarget.is64Bit()
2460
? emitIntrinsicErrorMessage(Op, ErrorMsgReqLA64, DAG)
2461
: !isUInt<8>(Imm) ? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2462
: Op;
2463
}
2464
case Intrinsic::loongarch_lsx_vst:
2465
case Intrinsic::loongarch_lasx_xvst:
2466
return !isInt<12>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue())
2467
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2468
: SDValue();
2469
case Intrinsic::loongarch_lasx_xvstelm_b:
2470
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2471
!isUInt<5>(Op.getConstantOperandVal(5)))
2472
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2473
: SDValue();
2474
case Intrinsic::loongarch_lsx_vstelm_b:
2475
return (!isInt<8>(cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2476
!isUInt<4>(Op.getConstantOperandVal(5)))
2477
? emitIntrinsicErrorMessage(Op, ErrorMsgOOR, DAG)
2478
: SDValue();
2479
case Intrinsic::loongarch_lasx_xvstelm_h:
2480
return (!isShiftedInt<8, 1>(
2481
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2482
!isUInt<4>(Op.getConstantOperandVal(5)))
2483
? emitIntrinsicErrorMessage(
2484
Op, "argument out of range or not a multiple of 2", DAG)
2485
: SDValue();
2486
case Intrinsic::loongarch_lsx_vstelm_h:
2487
return (!isShiftedInt<8, 1>(
2488
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2489
!isUInt<3>(Op.getConstantOperandVal(5)))
2490
? emitIntrinsicErrorMessage(
2491
Op, "argument out of range or not a multiple of 2", DAG)
2492
: SDValue();
2493
case Intrinsic::loongarch_lasx_xvstelm_w:
2494
return (!isShiftedInt<8, 2>(
2495
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2496
!isUInt<3>(Op.getConstantOperandVal(5)))
2497
? emitIntrinsicErrorMessage(
2498
Op, "argument out of range or not a multiple of 4", DAG)
2499
: SDValue();
2500
case Intrinsic::loongarch_lsx_vstelm_w:
2501
return (!isShiftedInt<8, 2>(
2502
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2503
!isUInt<2>(Op.getConstantOperandVal(5)))
2504
? emitIntrinsicErrorMessage(
2505
Op, "argument out of range or not a multiple of 4", DAG)
2506
: SDValue();
2507
case Intrinsic::loongarch_lasx_xvstelm_d:
2508
return (!isShiftedInt<8, 3>(
2509
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2510
!isUInt<2>(Op.getConstantOperandVal(5)))
2511
? emitIntrinsicErrorMessage(
2512
Op, "argument out of range or not a multiple of 8", DAG)
2513
: SDValue();
2514
case Intrinsic::loongarch_lsx_vstelm_d:
2515
return (!isShiftedInt<8, 3>(
2516
cast<ConstantSDNode>(Op.getOperand(4))->getSExtValue()) ||
2517
!isUInt<1>(Op.getConstantOperandVal(5)))
2518
? emitIntrinsicErrorMessage(
2519
Op, "argument out of range or not a multiple of 8", DAG)
2520
: SDValue();
2521
}
2522
}
2523
2524
SDValue LoongArchTargetLowering::lowerShiftLeftParts(SDValue Op,
2525
SelectionDAG &DAG) const {
2526
SDLoc DL(Op);
2527
SDValue Lo = Op.getOperand(0);
2528
SDValue Hi = Op.getOperand(1);
2529
SDValue Shamt = Op.getOperand(2);
2530
EVT VT = Lo.getValueType();
2531
2532
// if Shamt-GRLen < 0: // Shamt < GRLen
2533
// Lo = Lo << Shamt
2534
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (GRLen-1 ^ Shamt))
2535
// else:
2536
// Lo = 0
2537
// Hi = Lo << (Shamt-GRLen)
2538
2539
SDValue Zero = DAG.getConstant(0, DL, VT);
2540
SDValue One = DAG.getConstant(1, DL, VT);
2541
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2542
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2543
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2544
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2545
2546
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
2547
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
2548
SDValue ShiftRightLo =
2549
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, GRLenMinus1Shamt);
2550
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
2551
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
2552
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusGRLen);
2553
2554
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2555
2556
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
2557
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2558
2559
SDValue Parts[2] = {Lo, Hi};
2560
return DAG.getMergeValues(Parts, DL);
2561
}
2562
2563
SDValue LoongArchTargetLowering::lowerShiftRightParts(SDValue Op,
2564
SelectionDAG &DAG,
2565
bool IsSRA) const {
2566
SDLoc DL(Op);
2567
SDValue Lo = Op.getOperand(0);
2568
SDValue Hi = Op.getOperand(1);
2569
SDValue Shamt = Op.getOperand(2);
2570
EVT VT = Lo.getValueType();
2571
2572
// SRA expansion:
2573
// if Shamt-GRLen < 0: // Shamt < GRLen
2574
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2575
// Hi = Hi >>s Shamt
2576
// else:
2577
// Lo = Hi >>s (Shamt-GRLen);
2578
// Hi = Hi >>s (GRLen-1)
2579
//
2580
// SRL expansion:
2581
// if Shamt-GRLen < 0: // Shamt < GRLen
2582
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ GRLen-1))
2583
// Hi = Hi >>u Shamt
2584
// else:
2585
// Lo = Hi >>u (Shamt-GRLen);
2586
// Hi = 0;
2587
2588
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
2589
2590
SDValue Zero = DAG.getConstant(0, DL, VT);
2591
SDValue One = DAG.getConstant(1, DL, VT);
2592
SDValue MinusGRLen = DAG.getConstant(-(int)Subtarget.getGRLen(), DL, VT);
2593
SDValue GRLenMinus1 = DAG.getConstant(Subtarget.getGRLen() - 1, DL, VT);
2594
SDValue ShamtMinusGRLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusGRLen);
2595
SDValue GRLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, GRLenMinus1);
2596
2597
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
2598
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
2599
SDValue ShiftLeftHi =
2600
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, GRLenMinus1Shamt);
2601
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
2602
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
2603
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusGRLen);
2604
SDValue HiFalse =
2605
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, GRLenMinus1) : Zero;
2606
2607
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusGRLen, Zero, ISD::SETLT);
2608
2609
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
2610
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
2611
2612
SDValue Parts[2] = {Lo, Hi};
2613
return DAG.getMergeValues(Parts, DL);
2614
}
2615
2616
// Returns the opcode of the target-specific SDNode that implements the 32-bit
2617
// form of the given Opcode.
2618
static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
2619
switch (Opcode) {
2620
default:
2621
llvm_unreachable("Unexpected opcode");
2622
case ISD::UDIV:
2623
return LoongArchISD::DIV_WU;
2624
case ISD::UREM:
2625
return LoongArchISD::MOD_WU;
2626
case ISD::SHL:
2627
return LoongArchISD::SLL_W;
2628
case ISD::SRA:
2629
return LoongArchISD::SRA_W;
2630
case ISD::SRL:
2631
return LoongArchISD::SRL_W;
2632
case ISD::ROTL:
2633
case ISD::ROTR:
2634
return LoongArchISD::ROTR_W;
2635
case ISD::CTTZ:
2636
return LoongArchISD::CTZ_W;
2637
case ISD::CTLZ:
2638
return LoongArchISD::CLZ_W;
2639
}
2640
}
2641
2642
// Converts the given i8/i16/i32 operation to a target-specific SelectionDAG
2643
// node. Because i8/i16/i32 isn't a legal type for LA64, these operations would
2644
// otherwise be promoted to i64, making it difficult to select the
2645
// SLL_W/.../*W later one because the fact the operation was originally of
2646
// type i8/i16/i32 is lost.
2647
static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG, int NumOp,
2648
unsigned ExtOpc = ISD::ANY_EXTEND) {
2649
SDLoc DL(N);
2650
LoongArchISD::NodeType WOpcode = getLoongArchWOpcode(N->getOpcode());
2651
SDValue NewOp0, NewRes;
2652
2653
switch (NumOp) {
2654
default:
2655
llvm_unreachable("Unexpected NumOp");
2656
case 1: {
2657
NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2658
NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0);
2659
break;
2660
}
2661
case 2: {
2662
NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
2663
SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
2664
if (N->getOpcode() == ISD::ROTL) {
2665
SDValue TmpOp = DAG.getConstant(32, DL, MVT::i64);
2666
NewOp1 = DAG.getNode(ISD::SUB, DL, MVT::i64, TmpOp, NewOp1);
2667
}
2668
NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
2669
break;
2670
}
2671
// TODO:Handle more NumOp.
2672
}
2673
2674
// ReplaceNodeResults requires we maintain the same type for the return
2675
// value.
2676
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
2677
}
2678
2679
// Converts the given 32-bit operation to a i64 operation with signed extension
2680
// semantic to reduce the signed extension instructions.
2681
static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
2682
SDLoc DL(N);
2683
SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
2684
SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
2685
SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
2686
SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
2687
DAG.getValueType(MVT::i32));
2688
return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
2689
}
2690
2691
// Helper function that emits error message for intrinsics with/without chain
2692
// and return a UNDEF or and the chain as the results.
2693
static void emitErrorAndReplaceIntrinsicResults(
2694
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG,
2695
StringRef ErrorMsg, bool WithChain = true) {
2696
DAG.getContext()->emitError(N->getOperationName(0) + ": " + ErrorMsg + ".");
2697
Results.push_back(DAG.getUNDEF(N->getValueType(0)));
2698
if (!WithChain)
2699
return;
2700
Results.push_back(N->getOperand(0));
2701
}
2702
2703
template <unsigned N>
2704
static void
2705
replaceVPICKVE2GRResults(SDNode *Node, SmallVectorImpl<SDValue> &Results,
2706
SelectionDAG &DAG, const LoongArchSubtarget &Subtarget,
2707
unsigned ResOp) {
2708
const StringRef ErrorMsgOOR = "argument out of range";
2709
unsigned Imm = Node->getConstantOperandVal(2);
2710
if (!isUInt<N>(Imm)) {
2711
emitErrorAndReplaceIntrinsicResults(Node, Results, DAG, ErrorMsgOOR,
2712
/*WithChain=*/false);
2713
return;
2714
}
2715
SDLoc DL(Node);
2716
SDValue Vec = Node->getOperand(1);
2717
2718
SDValue PickElt =
2719
DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec,
2720
DAG.getConstant(Imm, DL, Subtarget.getGRLenVT()),
2721
DAG.getValueType(Vec.getValueType().getVectorElementType()));
2722
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, Node->getValueType(0),
2723
PickElt.getValue(0)));
2724
}
2725
2726
static void replaceVecCondBranchResults(SDNode *N,
2727
SmallVectorImpl<SDValue> &Results,
2728
SelectionDAG &DAG,
2729
const LoongArchSubtarget &Subtarget,
2730
unsigned ResOp) {
2731
SDLoc DL(N);
2732
SDValue Vec = N->getOperand(1);
2733
2734
SDValue CB = DAG.getNode(ResOp, DL, Subtarget.getGRLenVT(), Vec);
2735
Results.push_back(
2736
DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), CB.getValue(0)));
2737
}
2738
2739
static void
2740
replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
2741
SelectionDAG &DAG,
2742
const LoongArchSubtarget &Subtarget) {
2743
switch (N->getConstantOperandVal(0)) {
2744
default:
2745
llvm_unreachable("Unexpected Intrinsic.");
2746
case Intrinsic::loongarch_lsx_vpickve2gr_b:
2747
replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2748
LoongArchISD::VPICK_SEXT_ELT);
2749
break;
2750
case Intrinsic::loongarch_lsx_vpickve2gr_h:
2751
case Intrinsic::loongarch_lasx_xvpickve2gr_w:
2752
replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2753
LoongArchISD::VPICK_SEXT_ELT);
2754
break;
2755
case Intrinsic::loongarch_lsx_vpickve2gr_w:
2756
replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2757
LoongArchISD::VPICK_SEXT_ELT);
2758
break;
2759
case Intrinsic::loongarch_lsx_vpickve2gr_bu:
2760
replaceVPICKVE2GRResults<4>(N, Results, DAG, Subtarget,
2761
LoongArchISD::VPICK_ZEXT_ELT);
2762
break;
2763
case Intrinsic::loongarch_lsx_vpickve2gr_hu:
2764
case Intrinsic::loongarch_lasx_xvpickve2gr_wu:
2765
replaceVPICKVE2GRResults<3>(N, Results, DAG, Subtarget,
2766
LoongArchISD::VPICK_ZEXT_ELT);
2767
break;
2768
case Intrinsic::loongarch_lsx_vpickve2gr_wu:
2769
replaceVPICKVE2GRResults<2>(N, Results, DAG, Subtarget,
2770
LoongArchISD::VPICK_ZEXT_ELT);
2771
break;
2772
case Intrinsic::loongarch_lsx_bz_b:
2773
case Intrinsic::loongarch_lsx_bz_h:
2774
case Intrinsic::loongarch_lsx_bz_w:
2775
case Intrinsic::loongarch_lsx_bz_d:
2776
case Intrinsic::loongarch_lasx_xbz_b:
2777
case Intrinsic::loongarch_lasx_xbz_h:
2778
case Intrinsic::loongarch_lasx_xbz_w:
2779
case Intrinsic::loongarch_lasx_xbz_d:
2780
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2781
LoongArchISD::VALL_ZERO);
2782
break;
2783
case Intrinsic::loongarch_lsx_bz_v:
2784
case Intrinsic::loongarch_lasx_xbz_v:
2785
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2786
LoongArchISD::VANY_ZERO);
2787
break;
2788
case Intrinsic::loongarch_lsx_bnz_b:
2789
case Intrinsic::loongarch_lsx_bnz_h:
2790
case Intrinsic::loongarch_lsx_bnz_w:
2791
case Intrinsic::loongarch_lsx_bnz_d:
2792
case Intrinsic::loongarch_lasx_xbnz_b:
2793
case Intrinsic::loongarch_lasx_xbnz_h:
2794
case Intrinsic::loongarch_lasx_xbnz_w:
2795
case Intrinsic::loongarch_lasx_xbnz_d:
2796
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2797
LoongArchISD::VALL_NONZERO);
2798
break;
2799
case Intrinsic::loongarch_lsx_bnz_v:
2800
case Intrinsic::loongarch_lasx_xbnz_v:
2801
replaceVecCondBranchResults(N, Results, DAG, Subtarget,
2802
LoongArchISD::VANY_NONZERO);
2803
break;
2804
}
2805
}
2806
2807
void LoongArchTargetLowering::ReplaceNodeResults(
2808
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
2809
SDLoc DL(N);
2810
EVT VT = N->getValueType(0);
2811
switch (N->getOpcode()) {
2812
default:
2813
llvm_unreachable("Don't know how to legalize this operation");
2814
case ISD::ADD:
2815
case ISD::SUB:
2816
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
2817
"Unexpected custom legalisation");
2818
Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
2819
break;
2820
case ISD::UDIV:
2821
case ISD::UREM:
2822
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2823
"Unexpected custom legalisation");
2824
Results.push_back(customLegalizeToWOp(N, DAG, 2, ISD::SIGN_EXTEND));
2825
break;
2826
case ISD::SHL:
2827
case ISD::SRA:
2828
case ISD::SRL:
2829
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2830
"Unexpected custom legalisation");
2831
if (N->getOperand(1).getOpcode() != ISD::Constant) {
2832
Results.push_back(customLegalizeToWOp(N, DAG, 2));
2833
break;
2834
}
2835
break;
2836
case ISD::ROTL:
2837
case ISD::ROTR:
2838
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2839
"Unexpected custom legalisation");
2840
Results.push_back(customLegalizeToWOp(N, DAG, 2));
2841
break;
2842
case ISD::FP_TO_SINT: {
2843
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2844
"Unexpected custom legalisation");
2845
SDValue Src = N->getOperand(0);
2846
EVT FVT = EVT::getFloatingPointVT(N->getValueSizeInBits(0));
2847
if (getTypeAction(*DAG.getContext(), Src.getValueType()) !=
2848
TargetLowering::TypeSoftenFloat) {
2849
SDValue Dst = DAG.getNode(LoongArchISD::FTINT, DL, FVT, Src);
2850
Results.push_back(DAG.getNode(ISD::BITCAST, DL, VT, Dst));
2851
return;
2852
}
2853
// If the FP type needs to be softened, emit a library call using the 'si'
2854
// version. If we left it to default legalization we'd end up with 'di'.
2855
RTLIB::Libcall LC;
2856
LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
2857
MakeLibCallOptions CallOptions;
2858
EVT OpVT = Src.getValueType();
2859
CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
2860
SDValue Chain = SDValue();
2861
SDValue Result;
2862
std::tie(Result, Chain) =
2863
makeLibCall(DAG, LC, VT, Src, CallOptions, DL, Chain);
2864
Results.push_back(Result);
2865
break;
2866
}
2867
case ISD::BITCAST: {
2868
SDValue Src = N->getOperand(0);
2869
EVT SrcVT = Src.getValueType();
2870
if (VT == MVT::i32 && SrcVT == MVT::f32 && Subtarget.is64Bit() &&
2871
Subtarget.hasBasicF()) {
2872
SDValue Dst =
2873
DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Src);
2874
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Dst));
2875
}
2876
break;
2877
}
2878
case ISD::FP_TO_UINT: {
2879
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2880
"Unexpected custom legalisation");
2881
auto &TLI = DAG.getTargetLoweringInfo();
2882
SDValue Tmp1, Tmp2;
2883
TLI.expandFP_TO_UINT(N, Tmp1, Tmp2, DAG);
2884
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Tmp1));
2885
break;
2886
}
2887
case ISD::BSWAP: {
2888
SDValue Src = N->getOperand(0);
2889
assert((VT == MVT::i16 || VT == MVT::i32) &&
2890
"Unexpected custom legalization");
2891
MVT GRLenVT = Subtarget.getGRLenVT();
2892
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2893
SDValue Tmp;
2894
switch (VT.getSizeInBits()) {
2895
default:
2896
llvm_unreachable("Unexpected operand width");
2897
case 16:
2898
Tmp = DAG.getNode(LoongArchISD::REVB_2H, DL, GRLenVT, NewSrc);
2899
break;
2900
case 32:
2901
// Only LA64 will get to here due to the size mismatch between VT and
2902
// GRLenVT, LA32 lowering is directly defined in LoongArchInstrInfo.
2903
Tmp = DAG.getNode(LoongArchISD::REVB_2W, DL, GRLenVT, NewSrc);
2904
break;
2905
}
2906
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2907
break;
2908
}
2909
case ISD::BITREVERSE: {
2910
SDValue Src = N->getOperand(0);
2911
assert((VT == MVT::i8 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
2912
"Unexpected custom legalization");
2913
MVT GRLenVT = Subtarget.getGRLenVT();
2914
SDValue NewSrc = DAG.getNode(ISD::ANY_EXTEND, DL, GRLenVT, Src);
2915
SDValue Tmp;
2916
switch (VT.getSizeInBits()) {
2917
default:
2918
llvm_unreachable("Unexpected operand width");
2919
case 8:
2920
Tmp = DAG.getNode(LoongArchISD::BITREV_4B, DL, GRLenVT, NewSrc);
2921
break;
2922
case 32:
2923
Tmp = DAG.getNode(LoongArchISD::BITREV_W, DL, GRLenVT, NewSrc);
2924
break;
2925
}
2926
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Tmp));
2927
break;
2928
}
2929
case ISD::CTLZ:
2930
case ISD::CTTZ: {
2931
assert(VT == MVT::i32 && Subtarget.is64Bit() &&
2932
"Unexpected custom legalisation");
2933
Results.push_back(customLegalizeToWOp(N, DAG, 1));
2934
break;
2935
}
2936
case ISD::INTRINSIC_W_CHAIN: {
2937
SDValue Chain = N->getOperand(0);
2938
SDValue Op2 = N->getOperand(2);
2939
MVT GRLenVT = Subtarget.getGRLenVT();
2940
const StringRef ErrorMsgOOR = "argument out of range";
2941
const StringRef ErrorMsgReqLA64 = "requires loongarch64";
2942
const StringRef ErrorMsgReqF = "requires basic 'f' target feature";
2943
2944
switch (N->getConstantOperandVal(1)) {
2945
default:
2946
llvm_unreachable("Unexpected Intrinsic.");
2947
case Intrinsic::loongarch_movfcsr2gr: {
2948
if (!Subtarget.hasBasicF()) {
2949
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqF);
2950
return;
2951
}
2952
unsigned Imm = Op2->getAsZExtVal();
2953
if (!isUInt<2>(Imm)) {
2954
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
2955
return;
2956
}
2957
SDValue MOVFCSR2GRResults = DAG.getNode(
2958
LoongArchISD::MOVFCSR2GR, SDLoc(N), {MVT::i64, MVT::Other},
2959
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
2960
Results.push_back(
2961
DAG.getNode(ISD::TRUNCATE, DL, VT, MOVFCSR2GRResults.getValue(0)));
2962
Results.push_back(MOVFCSR2GRResults.getValue(1));
2963
break;
2964
}
2965
#define CRC_CASE_EXT_BINARYOP(NAME, NODE) \
2966
case Intrinsic::loongarch_##NAME: { \
2967
SDValue NODE = DAG.getNode( \
2968
LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2969
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2), \
2970
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2971
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2972
Results.push_back(NODE.getValue(1)); \
2973
break; \
2974
}
2975
CRC_CASE_EXT_BINARYOP(crc_w_b_w, CRC_W_B_W)
2976
CRC_CASE_EXT_BINARYOP(crc_w_h_w, CRC_W_H_W)
2977
CRC_CASE_EXT_BINARYOP(crc_w_w_w, CRC_W_W_W)
2978
CRC_CASE_EXT_BINARYOP(crcc_w_b_w, CRCC_W_B_W)
2979
CRC_CASE_EXT_BINARYOP(crcc_w_h_w, CRCC_W_H_W)
2980
CRC_CASE_EXT_BINARYOP(crcc_w_w_w, CRCC_W_W_W)
2981
#undef CRC_CASE_EXT_BINARYOP
2982
2983
#define CRC_CASE_EXT_UNARYOP(NAME, NODE) \
2984
case Intrinsic::loongarch_##NAME: { \
2985
SDValue NODE = DAG.getNode( \
2986
LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
2987
{Chain, Op2, \
2988
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3))}); \
2989
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NODE.getValue(0))); \
2990
Results.push_back(NODE.getValue(1)); \
2991
break; \
2992
}
2993
CRC_CASE_EXT_UNARYOP(crc_w_d_w, CRC_W_D_W)
2994
CRC_CASE_EXT_UNARYOP(crcc_w_d_w, CRCC_W_D_W)
2995
#undef CRC_CASE_EXT_UNARYOP
2996
#define CSR_CASE(ID) \
2997
case Intrinsic::loongarch_##ID: { \
2998
if (!Subtarget.is64Bit()) \
2999
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64); \
3000
break; \
3001
}
3002
CSR_CASE(csrrd_d);
3003
CSR_CASE(csrwr_d);
3004
CSR_CASE(csrxchg_d);
3005
CSR_CASE(iocsrrd_d);
3006
#undef CSR_CASE
3007
case Intrinsic::loongarch_csrrd_w: {
3008
unsigned Imm = Op2->getAsZExtVal();
3009
if (!isUInt<14>(Imm)) {
3010
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3011
return;
3012
}
3013
SDValue CSRRDResults =
3014
DAG.getNode(LoongArchISD::CSRRD, DL, {GRLenVT, MVT::Other},
3015
{Chain, DAG.getConstant(Imm, DL, GRLenVT)});
3016
Results.push_back(
3017
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRRDResults.getValue(0)));
3018
Results.push_back(CSRRDResults.getValue(1));
3019
break;
3020
}
3021
case Intrinsic::loongarch_csrwr_w: {
3022
unsigned Imm = N->getConstantOperandVal(3);
3023
if (!isUInt<14>(Imm)) {
3024
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3025
return;
3026
}
3027
SDValue CSRWRResults =
3028
DAG.getNode(LoongArchISD::CSRWR, DL, {GRLenVT, MVT::Other},
3029
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3030
DAG.getConstant(Imm, DL, GRLenVT)});
3031
Results.push_back(
3032
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRWRResults.getValue(0)));
3033
Results.push_back(CSRWRResults.getValue(1));
3034
break;
3035
}
3036
case Intrinsic::loongarch_csrxchg_w: {
3037
unsigned Imm = N->getConstantOperandVal(4);
3038
if (!isUInt<14>(Imm)) {
3039
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgOOR);
3040
return;
3041
}
3042
SDValue CSRXCHGResults = DAG.getNode(
3043
LoongArchISD::CSRXCHG, DL, {GRLenVT, MVT::Other},
3044
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2),
3045
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3)),
3046
DAG.getConstant(Imm, DL, GRLenVT)});
3047
Results.push_back(
3048
DAG.getNode(ISD::TRUNCATE, DL, VT, CSRXCHGResults.getValue(0)));
3049
Results.push_back(CSRXCHGResults.getValue(1));
3050
break;
3051
}
3052
#define IOCSRRD_CASE(NAME, NODE) \
3053
case Intrinsic::loongarch_##NAME: { \
3054
SDValue IOCSRRDResults = \
3055
DAG.getNode(LoongArchISD::NODE, DL, {MVT::i64, MVT::Other}, \
3056
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)}); \
3057
Results.push_back( \
3058
DAG.getNode(ISD::TRUNCATE, DL, VT, IOCSRRDResults.getValue(0))); \
3059
Results.push_back(IOCSRRDResults.getValue(1)); \
3060
break; \
3061
}
3062
IOCSRRD_CASE(iocsrrd_b, IOCSRRD_B);
3063
IOCSRRD_CASE(iocsrrd_h, IOCSRRD_H);
3064
IOCSRRD_CASE(iocsrrd_w, IOCSRRD_W);
3065
#undef IOCSRRD_CASE
3066
case Intrinsic::loongarch_cpucfg: {
3067
SDValue CPUCFGResults =
3068
DAG.getNode(LoongArchISD::CPUCFG, DL, {GRLenVT, MVT::Other},
3069
{Chain, DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op2)});
3070
Results.push_back(
3071
DAG.getNode(ISD::TRUNCATE, DL, VT, CPUCFGResults.getValue(0)));
3072
Results.push_back(CPUCFGResults.getValue(1));
3073
break;
3074
}
3075
case Intrinsic::loongarch_lddir_d: {
3076
if (!Subtarget.is64Bit()) {
3077
emitErrorAndReplaceIntrinsicResults(N, Results, DAG, ErrorMsgReqLA64);
3078
return;
3079
}
3080
break;
3081
}
3082
}
3083
break;
3084
}
3085
case ISD::READ_REGISTER: {
3086
if (Subtarget.is64Bit())
3087
DAG.getContext()->emitError(
3088
"On LA64, only 64-bit registers can be read.");
3089
else
3090
DAG.getContext()->emitError(
3091
"On LA32, only 32-bit registers can be read.");
3092
Results.push_back(DAG.getUNDEF(VT));
3093
Results.push_back(N->getOperand(0));
3094
break;
3095
}
3096
case ISD::INTRINSIC_WO_CHAIN: {
3097
replaceINTRINSIC_WO_CHAINResults(N, Results, DAG, Subtarget);
3098
break;
3099
}
3100
}
3101
}
3102
3103
static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
3104
TargetLowering::DAGCombinerInfo &DCI,
3105
const LoongArchSubtarget &Subtarget) {
3106
if (DCI.isBeforeLegalizeOps())
3107
return SDValue();
3108
3109
SDValue FirstOperand = N->getOperand(0);
3110
SDValue SecondOperand = N->getOperand(1);
3111
unsigned FirstOperandOpc = FirstOperand.getOpcode();
3112
EVT ValTy = N->getValueType(0);
3113
SDLoc DL(N);
3114
uint64_t lsb, msb;
3115
unsigned SMIdx, SMLen;
3116
ConstantSDNode *CN;
3117
SDValue NewOperand;
3118
MVT GRLenVT = Subtarget.getGRLenVT();
3119
3120
// Op's second operand must be a shifted mask.
3121
if (!(CN = dyn_cast<ConstantSDNode>(SecondOperand)) ||
3122
!isShiftedMask_64(CN->getZExtValue(), SMIdx, SMLen))
3123
return SDValue();
3124
3125
if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL) {
3126
// Pattern match BSTRPICK.
3127
// $dst = and ((sra or srl) $src , lsb), (2**len - 1)
3128
// => BSTRPICK $dst, $src, msb, lsb
3129
// where msb = lsb + len - 1
3130
3131
// The second operand of the shift must be an immediate.
3132
if (!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))))
3133
return SDValue();
3134
3135
lsb = CN->getZExtValue();
3136
3137
// Return if the shifted mask does not start at bit 0 or the sum of its
3138
// length and lsb exceeds the word's size.
3139
if (SMIdx != 0 || lsb + SMLen > ValTy.getSizeInBits())
3140
return SDValue();
3141
3142
NewOperand = FirstOperand.getOperand(0);
3143
} else {
3144
// Pattern match BSTRPICK.
3145
// $dst = and $src, (2**len- 1) , if len > 12
3146
// => BSTRPICK $dst, $src, msb, lsb
3147
// where lsb = 0 and msb = len - 1
3148
3149
// If the mask is <= 0xfff, andi can be used instead.
3150
if (CN->getZExtValue() <= 0xfff)
3151
return SDValue();
3152
3153
// Return if the MSB exceeds.
3154
if (SMIdx + SMLen > ValTy.getSizeInBits())
3155
return SDValue();
3156
3157
if (SMIdx > 0) {
3158
// Omit if the constant has more than 2 uses. This a conservative
3159
// decision. Whether it is a win depends on the HW microarchitecture.
3160
// However it should always be better for 1 and 2 uses.
3161
if (CN->use_size() > 2)
3162
return SDValue();
3163
// Return if the constant can be composed by a single LU12I.W.
3164
if ((CN->getZExtValue() & 0xfff) == 0)
3165
return SDValue();
3166
// Return if the constand can be composed by a single ADDI with
3167
// the zero register.
3168
if (CN->getSExtValue() >= -2048 && CN->getSExtValue() < 0)
3169
return SDValue();
3170
}
3171
3172
lsb = SMIdx;
3173
NewOperand = FirstOperand;
3174
}
3175
3176
msb = lsb + SMLen - 1;
3177
SDValue NR0 = DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy, NewOperand,
3178
DAG.getConstant(msb, DL, GRLenVT),
3179
DAG.getConstant(lsb, DL, GRLenVT));
3180
if (FirstOperandOpc == ISD::SRA || FirstOperandOpc == ISD::SRL || lsb == 0)
3181
return NR0;
3182
// Try to optimize to
3183
// bstrpick $Rd, $Rs, msb, lsb
3184
// slli $Rd, $Rd, lsb
3185
return DAG.getNode(ISD::SHL, DL, ValTy, NR0,
3186
DAG.getConstant(lsb, DL, GRLenVT));
3187
}
3188
3189
static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG,
3190
TargetLowering::DAGCombinerInfo &DCI,
3191
const LoongArchSubtarget &Subtarget) {
3192
if (DCI.isBeforeLegalizeOps())
3193
return SDValue();
3194
3195
// $dst = srl (and $src, Mask), Shamt
3196
// =>
3197
// BSTRPICK $dst, $src, MaskIdx+MaskLen-1, Shamt
3198
// when Mask is a shifted mask, and MaskIdx <= Shamt <= MaskIdx+MaskLen-1
3199
//
3200
3201
SDValue FirstOperand = N->getOperand(0);
3202
ConstantSDNode *CN;
3203
EVT ValTy = N->getValueType(0);
3204
SDLoc DL(N);
3205
MVT GRLenVT = Subtarget.getGRLenVT();
3206
unsigned MaskIdx, MaskLen;
3207
uint64_t Shamt;
3208
3209
// The first operand must be an AND and the second operand of the AND must be
3210
// a shifted mask.
3211
if (FirstOperand.getOpcode() != ISD::AND ||
3212
!(CN = dyn_cast<ConstantSDNode>(FirstOperand.getOperand(1))) ||
3213
!isShiftedMask_64(CN->getZExtValue(), MaskIdx, MaskLen))
3214
return SDValue();
3215
3216
// The second operand (shift amount) must be an immediate.
3217
if (!(CN = dyn_cast<ConstantSDNode>(N->getOperand(1))))
3218
return SDValue();
3219
3220
Shamt = CN->getZExtValue();
3221
if (MaskIdx <= Shamt && Shamt <= MaskIdx + MaskLen - 1)
3222
return DAG.getNode(LoongArchISD::BSTRPICK, DL, ValTy,
3223
FirstOperand->getOperand(0),
3224
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3225
DAG.getConstant(Shamt, DL, GRLenVT));
3226
3227
return SDValue();
3228
}
3229
3230
static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
3231
TargetLowering::DAGCombinerInfo &DCI,
3232
const LoongArchSubtarget &Subtarget) {
3233
MVT GRLenVT = Subtarget.getGRLenVT();
3234
EVT ValTy = N->getValueType(0);
3235
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3236
ConstantSDNode *CN0, *CN1;
3237
SDLoc DL(N);
3238
unsigned ValBits = ValTy.getSizeInBits();
3239
unsigned MaskIdx0, MaskLen0, MaskIdx1, MaskLen1;
3240
unsigned Shamt;
3241
bool SwapAndRetried = false;
3242
3243
if (DCI.isBeforeLegalizeOps())
3244
return SDValue();
3245
3246
if (ValBits != 32 && ValBits != 64)
3247
return SDValue();
3248
3249
Retry:
3250
// 1st pattern to match BSTRINS:
3251
// R = or (and X, mask0), (and (shl Y, lsb), mask1)
3252
// where mask1 = (2**size - 1) << lsb, mask0 = ~mask1
3253
// =>
3254
// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3255
if (N0.getOpcode() == ISD::AND &&
3256
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3257
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3258
N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL &&
3259
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3260
isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3261
MaskIdx0 == MaskIdx1 && MaskLen0 == MaskLen1 &&
3262
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3263
(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3264
(MaskIdx0 + MaskLen0 <= ValBits)) {
3265
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 1\n");
3266
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3267
N1.getOperand(0).getOperand(0),
3268
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3269
DAG.getConstant(MaskIdx0, DL, GRLenVT));
3270
}
3271
3272
// 2nd pattern to match BSTRINS:
3273
// R = or (and X, mask0), (shl (and Y, mask1), lsb)
3274
// where mask1 = (2**size - 1), mask0 = ~(mask1 << lsb)
3275
// =>
3276
// R = BSTRINS X, Y, msb, lsb (where msb = lsb + size - 1)
3277
if (N0.getOpcode() == ISD::AND &&
3278
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3279
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3280
N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3281
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3282
(Shamt = CN1->getZExtValue()) == MaskIdx0 &&
3283
(CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3284
isShiftedMask_64(CN1->getZExtValue(), MaskIdx1, MaskLen1) &&
3285
MaskLen0 == MaskLen1 && MaskIdx1 == 0 &&
3286
(MaskIdx0 + MaskLen0 <= ValBits)) {
3287
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 2\n");
3288
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3289
N1.getOperand(0).getOperand(0),
3290
DAG.getConstant((MaskIdx0 + MaskLen0 - 1), DL, GRLenVT),
3291
DAG.getConstant(MaskIdx0, DL, GRLenVT));
3292
}
3293
3294
// 3rd pattern to match BSTRINS:
3295
// R = or (and X, mask0), (and Y, mask1)
3296
// where ~mask0 = (2**size - 1) << lsb, mask0 & mask1 = 0
3297
// =>
3298
// R = BSTRINS X, (shr (and Y, mask1), lsb), msb, lsb
3299
// where msb = lsb + size - 1
3300
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
3301
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3302
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3303
(MaskIdx0 + MaskLen0 <= 64) &&
3304
(CN1 = dyn_cast<ConstantSDNode>(N1->getOperand(1))) &&
3305
(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3306
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 3\n");
3307
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3308
DAG.getNode(ISD::SRL, DL, N1->getValueType(0), N1,
3309
DAG.getConstant(MaskIdx0, DL, GRLenVT)),
3310
DAG.getConstant(ValBits == 32
3311
? (MaskIdx0 + (MaskLen0 & 31) - 1)
3312
: (MaskIdx0 + MaskLen0 - 1),
3313
DL, GRLenVT),
3314
DAG.getConstant(MaskIdx0, DL, GRLenVT));
3315
}
3316
3317
// 4th pattern to match BSTRINS:
3318
// R = or (and X, mask), (shl Y, shamt)
3319
// where mask = (2**shamt - 1)
3320
// =>
3321
// R = BSTRINS X, Y, ValBits - 1, shamt
3322
// where ValBits = 32 or 64
3323
if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::SHL &&
3324
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3325
isShiftedMask_64(CN0->getZExtValue(), MaskIdx0, MaskLen0) &&
3326
MaskIdx0 == 0 && (CN1 = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3327
(Shamt = CN1->getZExtValue()) == MaskLen0 &&
3328
(MaskIdx0 + MaskLen0 <= ValBits)) {
3329
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 4\n");
3330
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3331
N1.getOperand(0),
3332
DAG.getConstant((ValBits - 1), DL, GRLenVT),
3333
DAG.getConstant(Shamt, DL, GRLenVT));
3334
}
3335
3336
// 5th pattern to match BSTRINS:
3337
// R = or (and X, mask), const
3338
// where ~mask = (2**size - 1) << lsb, mask & const = 0
3339
// =>
3340
// R = BSTRINS X, (const >> lsb), msb, lsb
3341
// where msb = lsb + size - 1
3342
if (N0.getOpcode() == ISD::AND &&
3343
(CN0 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) &&
3344
isShiftedMask_64(~CN0->getSExtValue(), MaskIdx0, MaskLen0) &&
3345
(CN1 = dyn_cast<ConstantSDNode>(N1)) &&
3346
(CN1->getSExtValue() & CN0->getSExtValue()) == 0) {
3347
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 5\n");
3348
return DAG.getNode(
3349
LoongArchISD::BSTRINS, DL, ValTy, N0.getOperand(0),
3350
DAG.getConstant(CN1->getSExtValue() >> MaskIdx0, DL, ValTy),
3351
DAG.getConstant(ValBits == 32 ? (MaskIdx0 + (MaskLen0 & 31) - 1)
3352
: (MaskIdx0 + MaskLen0 - 1),
3353
DL, GRLenVT),
3354
DAG.getConstant(MaskIdx0, DL, GRLenVT));
3355
}
3356
3357
// 6th pattern.
3358
// a = b | ((c & mask) << shamt), where all positions in b to be overwritten
3359
// by the incoming bits are known to be zero.
3360
// =>
3361
// a = BSTRINS b, c, shamt + MaskLen - 1, shamt
3362
//
3363
// Note that the 1st pattern is a special situation of the 6th, i.e. the 6th
3364
// pattern is more common than the 1st. So we put the 1st before the 6th in
3365
// order to match as many nodes as possible.
3366
ConstantSDNode *CNMask, *CNShamt;
3367
unsigned MaskIdx, MaskLen;
3368
if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::AND &&
3369
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3370
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3371
MaskIdx == 0 && (CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3372
CNShamt->getZExtValue() + MaskLen <= ValBits) {
3373
Shamt = CNShamt->getZExtValue();
3374
APInt ShMask(ValBits, CNMask->getZExtValue() << Shamt);
3375
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3376
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 6\n");
3377
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3378
N1.getOperand(0).getOperand(0),
3379
DAG.getConstant(Shamt + MaskLen - 1, DL, GRLenVT),
3380
DAG.getConstant(Shamt, DL, GRLenVT));
3381
}
3382
}
3383
3384
// 7th pattern.
3385
// a = b | ((c << shamt) & shifted_mask), where all positions in b to be
3386
// overwritten by the incoming bits are known to be zero.
3387
// =>
3388
// a = BSTRINS b, c, MaskIdx + MaskLen - 1, MaskIdx
3389
//
3390
// Similarly, the 7th pattern is more common than the 2nd. So we put the 2nd
3391
// before the 7th in order to match as many nodes as possible.
3392
if (N1.getOpcode() == ISD::AND &&
3393
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3394
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen) &&
3395
N1.getOperand(0).getOpcode() == ISD::SHL &&
3396
(CNShamt = dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(1))) &&
3397
CNShamt->getZExtValue() == MaskIdx) {
3398
APInt ShMask(ValBits, CNMask->getZExtValue());
3399
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3400
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 7\n");
3401
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3402
N1.getOperand(0).getOperand(0),
3403
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3404
DAG.getConstant(MaskIdx, DL, GRLenVT));
3405
}
3406
}
3407
3408
// (or a, b) and (or b, a) are equivalent, so swap the operands and retry.
3409
if (!SwapAndRetried) {
3410
std::swap(N0, N1);
3411
SwapAndRetried = true;
3412
goto Retry;
3413
}
3414
3415
SwapAndRetried = false;
3416
Retry2:
3417
// 8th pattern.
3418
// a = b | (c & shifted_mask), where all positions in b to be overwritten by
3419
// the incoming bits are known to be zero.
3420
// =>
3421
// a = BSTRINS b, c >> MaskIdx, MaskIdx + MaskLen - 1, MaskIdx
3422
//
3423
// Similarly, the 8th pattern is more common than the 4th and 5th patterns. So
3424
// we put it here in order to match as many nodes as possible or generate less
3425
// instructions.
3426
if (N1.getOpcode() == ISD::AND &&
3427
(CNMask = dyn_cast<ConstantSDNode>(N1.getOperand(1))) &&
3428
isShiftedMask_64(CNMask->getZExtValue(), MaskIdx, MaskLen)) {
3429
APInt ShMask(ValBits, CNMask->getZExtValue());
3430
if (ShMask.isSubsetOf(DAG.computeKnownBits(N0).Zero)) {
3431
LLVM_DEBUG(dbgs() << "Perform OR combine: match pattern 8\n");
3432
return DAG.getNode(LoongArchISD::BSTRINS, DL, ValTy, N0,
3433
DAG.getNode(ISD::SRL, DL, N1->getValueType(0),
3434
N1->getOperand(0),
3435
DAG.getConstant(MaskIdx, DL, GRLenVT)),
3436
DAG.getConstant(MaskIdx + MaskLen - 1, DL, GRLenVT),
3437
DAG.getConstant(MaskIdx, DL, GRLenVT));
3438
}
3439
}
3440
// Swap N0/N1 and retry.
3441
if (!SwapAndRetried) {
3442
std::swap(N0, N1);
3443
SwapAndRetried = true;
3444
goto Retry2;
3445
}
3446
3447
return SDValue();
3448
}
3449
3450
static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
3451
ExtType = ISD::NON_EXTLOAD;
3452
3453
switch (V.getNode()->getOpcode()) {
3454
case ISD::LOAD: {
3455
LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
3456
if ((LoadNode->getMemoryVT() == MVT::i8) ||
3457
(LoadNode->getMemoryVT() == MVT::i16)) {
3458
ExtType = LoadNode->getExtensionType();
3459
return true;
3460
}
3461
return false;
3462
}
3463
case ISD::AssertSext: {
3464
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3465
if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3466
ExtType = ISD::SEXTLOAD;
3467
return true;
3468
}
3469
return false;
3470
}
3471
case ISD::AssertZext: {
3472
VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
3473
if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
3474
ExtType = ISD::ZEXTLOAD;
3475
return true;
3476
}
3477
return false;
3478
}
3479
default:
3480
return false;
3481
}
3482
3483
return false;
3484
}
3485
3486
// Eliminate redundant truncation and zero-extension nodes.
3487
// * Case 1:
3488
// +------------+ +------------+ +------------+
3489
// | Input1 | | Input2 | | CC |
3490
// +------------+ +------------+ +------------+
3491
// | | |
3492
// V V +----+
3493
// +------------+ +------------+ |
3494
// | TRUNCATE | | TRUNCATE | |
3495
// +------------+ +------------+ |
3496
// | | |
3497
// V V |
3498
// +------------+ +------------+ |
3499
// | ZERO_EXT | | ZERO_EXT | |
3500
// +------------+ +------------+ |
3501
// | | |
3502
// | +-------------+ |
3503
// V V | |
3504
// +----------------+ | |
3505
// | AND | | |
3506
// +----------------+ | |
3507
// | | |
3508
// +---------------+ | |
3509
// | | |
3510
// V V V
3511
// +-------------+
3512
// | CMP |
3513
// +-------------+
3514
// * Case 2:
3515
// +------------+ +------------+ +-------------+ +------------+ +------------+
3516
// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC |
3517
// +------------+ +------------+ +-------------+ +------------+ +------------+
3518
// | | | | |
3519
// V | | | |
3520
// +------------+ | | | |
3521
// | XOR |<---------------------+ | |
3522
// +------------+ | | |
3523
// | | | |
3524
// V V +---------------+ |
3525
// +------------+ +------------+ | |
3526
// | TRUNCATE | | TRUNCATE | | +-------------------------+
3527
// +------------+ +------------+ | |
3528
// | | | |
3529
// V V | |
3530
// +------------+ +------------+ | |
3531
// | ZERO_EXT | | ZERO_EXT | | |
3532
// +------------+ +------------+ | |
3533
// | | | |
3534
// V V | |
3535
// +----------------+ | |
3536
// | AND | | |
3537
// +----------------+ | |
3538
// | | |
3539
// +---------------+ | |
3540
// | | |
3541
// V V V
3542
// +-------------+
3543
// | CMP |
3544
// +-------------+
3545
static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
3546
TargetLowering::DAGCombinerInfo &DCI,
3547
const LoongArchSubtarget &Subtarget) {
3548
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3549
3550
SDNode *AndNode = N->getOperand(0).getNode();
3551
if (AndNode->getOpcode() != ISD::AND)
3552
return SDValue();
3553
3554
SDValue AndInputValue2 = AndNode->getOperand(1);
3555
if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
3556
return SDValue();
3557
3558
SDValue CmpInputValue = N->getOperand(1);
3559
SDValue AndInputValue1 = AndNode->getOperand(0);
3560
if (AndInputValue1.getOpcode() == ISD::XOR) {
3561
if (CC != ISD::SETEQ && CC != ISD::SETNE)
3562
return SDValue();
3563
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
3564
if (!CN || CN->getSExtValue() != -1)
3565
return SDValue();
3566
CN = dyn_cast<ConstantSDNode>(CmpInputValue);
3567
if (!CN || CN->getSExtValue() != 0)
3568
return SDValue();
3569
AndInputValue1 = AndInputValue1.getOperand(0);
3570
if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
3571
return SDValue();
3572
} else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
3573
if (AndInputValue2 != CmpInputValue)
3574
return SDValue();
3575
} else {
3576
return SDValue();
3577
}
3578
3579
SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
3580
if (TruncValue1.getOpcode() != ISD::TRUNCATE)
3581
return SDValue();
3582
3583
SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
3584
if (TruncValue2.getOpcode() != ISD::TRUNCATE)
3585
return SDValue();
3586
3587
SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
3588
SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
3589
ISD::LoadExtType ExtType1;
3590
ISD::LoadExtType ExtType2;
3591
3592
if (!checkValueWidth(TruncInputValue1, ExtType1) ||
3593
!checkValueWidth(TruncInputValue2, ExtType2))
3594
return SDValue();
3595
3596
if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
3597
AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
3598
return SDValue();
3599
3600
if ((ExtType2 != ISD::ZEXTLOAD) &&
3601
((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
3602
return SDValue();
3603
3604
// These truncation and zero-extension nodes are not necessary, remove them.
3605
SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
3606
TruncInputValue1, TruncInputValue2);
3607
SDValue NewSetCC =
3608
DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
3609
DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
3610
return SDValue(N, 0);
3611
}
3612
3613
// Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
3614
static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
3615
TargetLowering::DAGCombinerInfo &DCI,
3616
const LoongArchSubtarget &Subtarget) {
3617
if (DCI.isBeforeLegalizeOps())
3618
return SDValue();
3619
3620
SDValue Src = N->getOperand(0);
3621
if (Src.getOpcode() != LoongArchISD::REVB_2W)
3622
return SDValue();
3623
3624
return DAG.getNode(LoongArchISD::BITREV_4B, SDLoc(N), N->getValueType(0),
3625
Src.getOperand(0));
3626
}
3627
3628
template <unsigned N>
3629
static SDValue legalizeIntrinsicImmArg(SDNode *Node, unsigned ImmOp,
3630
SelectionDAG &DAG,
3631
const LoongArchSubtarget &Subtarget,
3632
bool IsSigned = false) {
3633
SDLoc DL(Node);
3634
auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3635
// Check the ImmArg.
3636
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3637
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3638
DAG.getContext()->emitError(Node->getOperationName(0) +
3639
": argument out of range.");
3640
return DAG.getNode(ISD::UNDEF, DL, Subtarget.getGRLenVT());
3641
}
3642
return DAG.getConstant(CImm->getZExtValue(), DL, Subtarget.getGRLenVT());
3643
}
3644
3645
template <unsigned N>
3646
static SDValue lowerVectorSplatImm(SDNode *Node, unsigned ImmOp,
3647
SelectionDAG &DAG, bool IsSigned = false) {
3648
SDLoc DL(Node);
3649
EVT ResTy = Node->getValueType(0);
3650
auto *CImm = cast<ConstantSDNode>(Node->getOperand(ImmOp));
3651
3652
// Check the ImmArg.
3653
if ((IsSigned && !isInt<N>(CImm->getSExtValue())) ||
3654
(!IsSigned && !isUInt<N>(CImm->getZExtValue()))) {
3655
DAG.getContext()->emitError(Node->getOperationName(0) +
3656
": argument out of range.");
3657
return DAG.getNode(ISD::UNDEF, DL, ResTy);
3658
}
3659
return DAG.getConstant(
3660
APInt(ResTy.getScalarType().getSizeInBits(),
3661
IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned),
3662
DL, ResTy);
3663
}
3664
3665
static SDValue truncateVecElts(SDNode *Node, SelectionDAG &DAG) {
3666
SDLoc DL(Node);
3667
EVT ResTy = Node->getValueType(0);
3668
SDValue Vec = Node->getOperand(2);
3669
SDValue Mask = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, DL, ResTy);
3670
return DAG.getNode(ISD::AND, DL, ResTy, Vec, Mask);
3671
}
3672
3673
static SDValue lowerVectorBitClear(SDNode *Node, SelectionDAG &DAG) {
3674
SDLoc DL(Node);
3675
EVT ResTy = Node->getValueType(0);
3676
SDValue One = DAG.getConstant(1, DL, ResTy);
3677
SDValue Bit =
3678
DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Node, DAG));
3679
3680
return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1),
3681
DAG.getNOT(DL, Bit, ResTy));
3682
}
3683
3684
template <unsigned N>
3685
static SDValue lowerVectorBitClearImm(SDNode *Node, SelectionDAG &DAG) {
3686
SDLoc DL(Node);
3687
EVT ResTy = Node->getValueType(0);
3688
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3689
// Check the unsigned ImmArg.
3690
if (!isUInt<N>(CImm->getZExtValue())) {
3691
DAG.getContext()->emitError(Node->getOperationName(0) +
3692
": argument out of range.");
3693
return DAG.getNode(ISD::UNDEF, DL, ResTy);
3694
}
3695
3696
APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3697
SDValue Mask = DAG.getConstant(~BitImm, DL, ResTy);
3698
3699
return DAG.getNode(ISD::AND, DL, ResTy, Node->getOperand(1), Mask);
3700
}
3701
3702
template <unsigned N>
3703
static SDValue lowerVectorBitSetImm(SDNode *Node, SelectionDAG &DAG) {
3704
SDLoc DL(Node);
3705
EVT ResTy = Node->getValueType(0);
3706
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3707
// Check the unsigned ImmArg.
3708
if (!isUInt<N>(CImm->getZExtValue())) {
3709
DAG.getContext()->emitError(Node->getOperationName(0) +
3710
": argument out of range.");
3711
return DAG.getNode(ISD::UNDEF, DL, ResTy);
3712
}
3713
3714
APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3715
SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3716
return DAG.getNode(ISD::OR, DL, ResTy, Node->getOperand(1), BitImm);
3717
}
3718
3719
template <unsigned N>
3720
static SDValue lowerVectorBitRevImm(SDNode *Node, SelectionDAG &DAG) {
3721
SDLoc DL(Node);
3722
EVT ResTy = Node->getValueType(0);
3723
auto *CImm = cast<ConstantSDNode>(Node->getOperand(2));
3724
// Check the unsigned ImmArg.
3725
if (!isUInt<N>(CImm->getZExtValue())) {
3726
DAG.getContext()->emitError(Node->getOperationName(0) +
3727
": argument out of range.");
3728
return DAG.getNode(ISD::UNDEF, DL, ResTy);
3729
}
3730
3731
APInt Imm = APInt(ResTy.getScalarSizeInBits(), 1) << CImm->getAPIntValue();
3732
SDValue BitImm = DAG.getConstant(Imm, DL, ResTy);
3733
return DAG.getNode(ISD::XOR, DL, ResTy, Node->getOperand(1), BitImm);
3734
}
3735
3736
static SDValue
3737
performINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG,
3738
TargetLowering::DAGCombinerInfo &DCI,
3739
const LoongArchSubtarget &Subtarget) {
3740
SDLoc DL(N);
3741
switch (N->getConstantOperandVal(0)) {
3742
default:
3743
break;
3744
case Intrinsic::loongarch_lsx_vadd_b:
3745
case Intrinsic::loongarch_lsx_vadd_h:
3746
case Intrinsic::loongarch_lsx_vadd_w:
3747
case Intrinsic::loongarch_lsx_vadd_d:
3748
case Intrinsic::loongarch_lasx_xvadd_b:
3749
case Intrinsic::loongarch_lasx_xvadd_h:
3750
case Intrinsic::loongarch_lasx_xvadd_w:
3751
case Intrinsic::loongarch_lasx_xvadd_d:
3752
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3753
N->getOperand(2));
3754
case Intrinsic::loongarch_lsx_vaddi_bu:
3755
case Intrinsic::loongarch_lsx_vaddi_hu:
3756
case Intrinsic::loongarch_lsx_vaddi_wu:
3757
case Intrinsic::loongarch_lsx_vaddi_du:
3758
case Intrinsic::loongarch_lasx_xvaddi_bu:
3759
case Intrinsic::loongarch_lasx_xvaddi_hu:
3760
case Intrinsic::loongarch_lasx_xvaddi_wu:
3761
case Intrinsic::loongarch_lasx_xvaddi_du:
3762
return DAG.getNode(ISD::ADD, DL, N->getValueType(0), N->getOperand(1),
3763
lowerVectorSplatImm<5>(N, 2, DAG));
3764
case Intrinsic::loongarch_lsx_vsub_b:
3765
case Intrinsic::loongarch_lsx_vsub_h:
3766
case Intrinsic::loongarch_lsx_vsub_w:
3767
case Intrinsic::loongarch_lsx_vsub_d:
3768
case Intrinsic::loongarch_lasx_xvsub_b:
3769
case Intrinsic::loongarch_lasx_xvsub_h:
3770
case Intrinsic::loongarch_lasx_xvsub_w:
3771
case Intrinsic::loongarch_lasx_xvsub_d:
3772
return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3773
N->getOperand(2));
3774
case Intrinsic::loongarch_lsx_vsubi_bu:
3775
case Intrinsic::loongarch_lsx_vsubi_hu:
3776
case Intrinsic::loongarch_lsx_vsubi_wu:
3777
case Intrinsic::loongarch_lsx_vsubi_du:
3778
case Intrinsic::loongarch_lasx_xvsubi_bu:
3779
case Intrinsic::loongarch_lasx_xvsubi_hu:
3780
case Intrinsic::loongarch_lasx_xvsubi_wu:
3781
case Intrinsic::loongarch_lasx_xvsubi_du:
3782
return DAG.getNode(ISD::SUB, DL, N->getValueType(0), N->getOperand(1),
3783
lowerVectorSplatImm<5>(N, 2, DAG));
3784
case Intrinsic::loongarch_lsx_vneg_b:
3785
case Intrinsic::loongarch_lsx_vneg_h:
3786
case Intrinsic::loongarch_lsx_vneg_w:
3787
case Intrinsic::loongarch_lsx_vneg_d:
3788
case Intrinsic::loongarch_lasx_xvneg_b:
3789
case Intrinsic::loongarch_lasx_xvneg_h:
3790
case Intrinsic::loongarch_lasx_xvneg_w:
3791
case Intrinsic::loongarch_lasx_xvneg_d:
3792
return DAG.getNode(
3793
ISD::SUB, DL, N->getValueType(0),
3794
DAG.getConstant(
3795
APInt(N->getValueType(0).getScalarType().getSizeInBits(), 0,
3796
/*isSigned=*/true),
3797
SDLoc(N), N->getValueType(0)),
3798
N->getOperand(1));
3799
case Intrinsic::loongarch_lsx_vmax_b:
3800
case Intrinsic::loongarch_lsx_vmax_h:
3801
case Intrinsic::loongarch_lsx_vmax_w:
3802
case Intrinsic::loongarch_lsx_vmax_d:
3803
case Intrinsic::loongarch_lasx_xvmax_b:
3804
case Intrinsic::loongarch_lasx_xvmax_h:
3805
case Intrinsic::loongarch_lasx_xvmax_w:
3806
case Intrinsic::loongarch_lasx_xvmax_d:
3807
return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3808
N->getOperand(2));
3809
case Intrinsic::loongarch_lsx_vmax_bu:
3810
case Intrinsic::loongarch_lsx_vmax_hu:
3811
case Intrinsic::loongarch_lsx_vmax_wu:
3812
case Intrinsic::loongarch_lsx_vmax_du:
3813
case Intrinsic::loongarch_lasx_xvmax_bu:
3814
case Intrinsic::loongarch_lasx_xvmax_hu:
3815
case Intrinsic::loongarch_lasx_xvmax_wu:
3816
case Intrinsic::loongarch_lasx_xvmax_du:
3817
return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3818
N->getOperand(2));
3819
case Intrinsic::loongarch_lsx_vmaxi_b:
3820
case Intrinsic::loongarch_lsx_vmaxi_h:
3821
case Intrinsic::loongarch_lsx_vmaxi_w:
3822
case Intrinsic::loongarch_lsx_vmaxi_d:
3823
case Intrinsic::loongarch_lasx_xvmaxi_b:
3824
case Intrinsic::loongarch_lasx_xvmaxi_h:
3825
case Intrinsic::loongarch_lasx_xvmaxi_w:
3826
case Intrinsic::loongarch_lasx_xvmaxi_d:
3827
return DAG.getNode(ISD::SMAX, DL, N->getValueType(0), N->getOperand(1),
3828
lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3829
case Intrinsic::loongarch_lsx_vmaxi_bu:
3830
case Intrinsic::loongarch_lsx_vmaxi_hu:
3831
case Intrinsic::loongarch_lsx_vmaxi_wu:
3832
case Intrinsic::loongarch_lsx_vmaxi_du:
3833
case Intrinsic::loongarch_lasx_xvmaxi_bu:
3834
case Intrinsic::loongarch_lasx_xvmaxi_hu:
3835
case Intrinsic::loongarch_lasx_xvmaxi_wu:
3836
case Intrinsic::loongarch_lasx_xvmaxi_du:
3837
return DAG.getNode(ISD::UMAX, DL, N->getValueType(0), N->getOperand(1),
3838
lowerVectorSplatImm<5>(N, 2, DAG));
3839
case Intrinsic::loongarch_lsx_vmin_b:
3840
case Intrinsic::loongarch_lsx_vmin_h:
3841
case Intrinsic::loongarch_lsx_vmin_w:
3842
case Intrinsic::loongarch_lsx_vmin_d:
3843
case Intrinsic::loongarch_lasx_xvmin_b:
3844
case Intrinsic::loongarch_lasx_xvmin_h:
3845
case Intrinsic::loongarch_lasx_xvmin_w:
3846
case Intrinsic::loongarch_lasx_xvmin_d:
3847
return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3848
N->getOperand(2));
3849
case Intrinsic::loongarch_lsx_vmin_bu:
3850
case Intrinsic::loongarch_lsx_vmin_hu:
3851
case Intrinsic::loongarch_lsx_vmin_wu:
3852
case Intrinsic::loongarch_lsx_vmin_du:
3853
case Intrinsic::loongarch_lasx_xvmin_bu:
3854
case Intrinsic::loongarch_lasx_xvmin_hu:
3855
case Intrinsic::loongarch_lasx_xvmin_wu:
3856
case Intrinsic::loongarch_lasx_xvmin_du:
3857
return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3858
N->getOperand(2));
3859
case Intrinsic::loongarch_lsx_vmini_b:
3860
case Intrinsic::loongarch_lsx_vmini_h:
3861
case Intrinsic::loongarch_lsx_vmini_w:
3862
case Intrinsic::loongarch_lsx_vmini_d:
3863
case Intrinsic::loongarch_lasx_xvmini_b:
3864
case Intrinsic::loongarch_lasx_xvmini_h:
3865
case Intrinsic::loongarch_lasx_xvmini_w:
3866
case Intrinsic::loongarch_lasx_xvmini_d:
3867
return DAG.getNode(ISD::SMIN, DL, N->getValueType(0), N->getOperand(1),
3868
lowerVectorSplatImm<5>(N, 2, DAG, /*IsSigned=*/true));
3869
case Intrinsic::loongarch_lsx_vmini_bu:
3870
case Intrinsic::loongarch_lsx_vmini_hu:
3871
case Intrinsic::loongarch_lsx_vmini_wu:
3872
case Intrinsic::loongarch_lsx_vmini_du:
3873
case Intrinsic::loongarch_lasx_xvmini_bu:
3874
case Intrinsic::loongarch_lasx_xvmini_hu:
3875
case Intrinsic::loongarch_lasx_xvmini_wu:
3876
case Intrinsic::loongarch_lasx_xvmini_du:
3877
return DAG.getNode(ISD::UMIN, DL, N->getValueType(0), N->getOperand(1),
3878
lowerVectorSplatImm<5>(N, 2, DAG));
3879
case Intrinsic::loongarch_lsx_vmul_b:
3880
case Intrinsic::loongarch_lsx_vmul_h:
3881
case Intrinsic::loongarch_lsx_vmul_w:
3882
case Intrinsic::loongarch_lsx_vmul_d:
3883
case Intrinsic::loongarch_lasx_xvmul_b:
3884
case Intrinsic::loongarch_lasx_xvmul_h:
3885
case Intrinsic::loongarch_lasx_xvmul_w:
3886
case Intrinsic::loongarch_lasx_xvmul_d:
3887
return DAG.getNode(ISD::MUL, DL, N->getValueType(0), N->getOperand(1),
3888
N->getOperand(2));
3889
case Intrinsic::loongarch_lsx_vmadd_b:
3890
case Intrinsic::loongarch_lsx_vmadd_h:
3891
case Intrinsic::loongarch_lsx_vmadd_w:
3892
case Intrinsic::loongarch_lsx_vmadd_d:
3893
case Intrinsic::loongarch_lasx_xvmadd_b:
3894
case Intrinsic::loongarch_lasx_xvmadd_h:
3895
case Intrinsic::loongarch_lasx_xvmadd_w:
3896
case Intrinsic::loongarch_lasx_xvmadd_d: {
3897
EVT ResTy = N->getValueType(0);
3898
return DAG.getNode(ISD::ADD, SDLoc(N), ResTy, N->getOperand(1),
3899
DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3900
N->getOperand(3)));
3901
}
3902
case Intrinsic::loongarch_lsx_vmsub_b:
3903
case Intrinsic::loongarch_lsx_vmsub_h:
3904
case Intrinsic::loongarch_lsx_vmsub_w:
3905
case Intrinsic::loongarch_lsx_vmsub_d:
3906
case Intrinsic::loongarch_lasx_xvmsub_b:
3907
case Intrinsic::loongarch_lasx_xvmsub_h:
3908
case Intrinsic::loongarch_lasx_xvmsub_w:
3909
case Intrinsic::loongarch_lasx_xvmsub_d: {
3910
EVT ResTy = N->getValueType(0);
3911
return DAG.getNode(ISD::SUB, SDLoc(N), ResTy, N->getOperand(1),
3912
DAG.getNode(ISD::MUL, SDLoc(N), ResTy, N->getOperand(2),
3913
N->getOperand(3)));
3914
}
3915
case Intrinsic::loongarch_lsx_vdiv_b:
3916
case Intrinsic::loongarch_lsx_vdiv_h:
3917
case Intrinsic::loongarch_lsx_vdiv_w:
3918
case Intrinsic::loongarch_lsx_vdiv_d:
3919
case Intrinsic::loongarch_lasx_xvdiv_b:
3920
case Intrinsic::loongarch_lasx_xvdiv_h:
3921
case Intrinsic::loongarch_lasx_xvdiv_w:
3922
case Intrinsic::loongarch_lasx_xvdiv_d:
3923
return DAG.getNode(ISD::SDIV, DL, N->getValueType(0), N->getOperand(1),
3924
N->getOperand(2));
3925
case Intrinsic::loongarch_lsx_vdiv_bu:
3926
case Intrinsic::loongarch_lsx_vdiv_hu:
3927
case Intrinsic::loongarch_lsx_vdiv_wu:
3928
case Intrinsic::loongarch_lsx_vdiv_du:
3929
case Intrinsic::loongarch_lasx_xvdiv_bu:
3930
case Intrinsic::loongarch_lasx_xvdiv_hu:
3931
case Intrinsic::loongarch_lasx_xvdiv_wu:
3932
case Intrinsic::loongarch_lasx_xvdiv_du:
3933
return DAG.getNode(ISD::UDIV, DL, N->getValueType(0), N->getOperand(1),
3934
N->getOperand(2));
3935
case Intrinsic::loongarch_lsx_vmod_b:
3936
case Intrinsic::loongarch_lsx_vmod_h:
3937
case Intrinsic::loongarch_lsx_vmod_w:
3938
case Intrinsic::loongarch_lsx_vmod_d:
3939
case Intrinsic::loongarch_lasx_xvmod_b:
3940
case Intrinsic::loongarch_lasx_xvmod_h:
3941
case Intrinsic::loongarch_lasx_xvmod_w:
3942
case Intrinsic::loongarch_lasx_xvmod_d:
3943
return DAG.getNode(ISD::SREM, DL, N->getValueType(0), N->getOperand(1),
3944
N->getOperand(2));
3945
case Intrinsic::loongarch_lsx_vmod_bu:
3946
case Intrinsic::loongarch_lsx_vmod_hu:
3947
case Intrinsic::loongarch_lsx_vmod_wu:
3948
case Intrinsic::loongarch_lsx_vmod_du:
3949
case Intrinsic::loongarch_lasx_xvmod_bu:
3950
case Intrinsic::loongarch_lasx_xvmod_hu:
3951
case Intrinsic::loongarch_lasx_xvmod_wu:
3952
case Intrinsic::loongarch_lasx_xvmod_du:
3953
return DAG.getNode(ISD::UREM, DL, N->getValueType(0), N->getOperand(1),
3954
N->getOperand(2));
3955
case Intrinsic::loongarch_lsx_vand_v:
3956
case Intrinsic::loongarch_lasx_xvand_v:
3957
return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3958
N->getOperand(2));
3959
case Intrinsic::loongarch_lsx_vor_v:
3960
case Intrinsic::loongarch_lasx_xvor_v:
3961
return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3962
N->getOperand(2));
3963
case Intrinsic::loongarch_lsx_vxor_v:
3964
case Intrinsic::loongarch_lasx_xvxor_v:
3965
return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3966
N->getOperand(2));
3967
case Intrinsic::loongarch_lsx_vnor_v:
3968
case Intrinsic::loongarch_lasx_xvnor_v: {
3969
SDValue Res = DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3970
N->getOperand(2));
3971
return DAG.getNOT(DL, Res, Res->getValueType(0));
3972
}
3973
case Intrinsic::loongarch_lsx_vandi_b:
3974
case Intrinsic::loongarch_lasx_xvandi_b:
3975
return DAG.getNode(ISD::AND, DL, N->getValueType(0), N->getOperand(1),
3976
lowerVectorSplatImm<8>(N, 2, DAG));
3977
case Intrinsic::loongarch_lsx_vori_b:
3978
case Intrinsic::loongarch_lasx_xvori_b:
3979
return DAG.getNode(ISD::OR, DL, N->getValueType(0), N->getOperand(1),
3980
lowerVectorSplatImm<8>(N, 2, DAG));
3981
case Intrinsic::loongarch_lsx_vxori_b:
3982
case Intrinsic::loongarch_lasx_xvxori_b:
3983
return DAG.getNode(ISD::XOR, DL, N->getValueType(0), N->getOperand(1),
3984
lowerVectorSplatImm<8>(N, 2, DAG));
3985
case Intrinsic::loongarch_lsx_vsll_b:
3986
case Intrinsic::loongarch_lsx_vsll_h:
3987
case Intrinsic::loongarch_lsx_vsll_w:
3988
case Intrinsic::loongarch_lsx_vsll_d:
3989
case Intrinsic::loongarch_lasx_xvsll_b:
3990
case Intrinsic::loongarch_lasx_xvsll_h:
3991
case Intrinsic::loongarch_lasx_xvsll_w:
3992
case Intrinsic::loongarch_lasx_xvsll_d:
3993
return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3994
truncateVecElts(N, DAG));
3995
case Intrinsic::loongarch_lsx_vslli_b:
3996
case Intrinsic::loongarch_lasx_xvslli_b:
3997
return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
3998
lowerVectorSplatImm<3>(N, 2, DAG));
3999
case Intrinsic::loongarch_lsx_vslli_h:
4000
case Intrinsic::loongarch_lasx_xvslli_h:
4001
return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4002
lowerVectorSplatImm<4>(N, 2, DAG));
4003
case Intrinsic::loongarch_lsx_vslli_w:
4004
case Intrinsic::loongarch_lasx_xvslli_w:
4005
return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4006
lowerVectorSplatImm<5>(N, 2, DAG));
4007
case Intrinsic::loongarch_lsx_vslli_d:
4008
case Intrinsic::loongarch_lasx_xvslli_d:
4009
return DAG.getNode(ISD::SHL, DL, N->getValueType(0), N->getOperand(1),
4010
lowerVectorSplatImm<6>(N, 2, DAG));
4011
case Intrinsic::loongarch_lsx_vsrl_b:
4012
case Intrinsic::loongarch_lsx_vsrl_h:
4013
case Intrinsic::loongarch_lsx_vsrl_w:
4014
case Intrinsic::loongarch_lsx_vsrl_d:
4015
case Intrinsic::loongarch_lasx_xvsrl_b:
4016
case Intrinsic::loongarch_lasx_xvsrl_h:
4017
case Intrinsic::loongarch_lasx_xvsrl_w:
4018
case Intrinsic::loongarch_lasx_xvsrl_d:
4019
return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4020
truncateVecElts(N, DAG));
4021
case Intrinsic::loongarch_lsx_vsrli_b:
4022
case Intrinsic::loongarch_lasx_xvsrli_b:
4023
return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4024
lowerVectorSplatImm<3>(N, 2, DAG));
4025
case Intrinsic::loongarch_lsx_vsrli_h:
4026
case Intrinsic::loongarch_lasx_xvsrli_h:
4027
return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4028
lowerVectorSplatImm<4>(N, 2, DAG));
4029
case Intrinsic::loongarch_lsx_vsrli_w:
4030
case Intrinsic::loongarch_lasx_xvsrli_w:
4031
return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4032
lowerVectorSplatImm<5>(N, 2, DAG));
4033
case Intrinsic::loongarch_lsx_vsrli_d:
4034
case Intrinsic::loongarch_lasx_xvsrli_d:
4035
return DAG.getNode(ISD::SRL, DL, N->getValueType(0), N->getOperand(1),
4036
lowerVectorSplatImm<6>(N, 2, DAG));
4037
case Intrinsic::loongarch_lsx_vsra_b:
4038
case Intrinsic::loongarch_lsx_vsra_h:
4039
case Intrinsic::loongarch_lsx_vsra_w:
4040
case Intrinsic::loongarch_lsx_vsra_d:
4041
case Intrinsic::loongarch_lasx_xvsra_b:
4042
case Intrinsic::loongarch_lasx_xvsra_h:
4043
case Intrinsic::loongarch_lasx_xvsra_w:
4044
case Intrinsic::loongarch_lasx_xvsra_d:
4045
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4046
truncateVecElts(N, DAG));
4047
case Intrinsic::loongarch_lsx_vsrai_b:
4048
case Intrinsic::loongarch_lasx_xvsrai_b:
4049
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4050
lowerVectorSplatImm<3>(N, 2, DAG));
4051
case Intrinsic::loongarch_lsx_vsrai_h:
4052
case Intrinsic::loongarch_lasx_xvsrai_h:
4053
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4054
lowerVectorSplatImm<4>(N, 2, DAG));
4055
case Intrinsic::loongarch_lsx_vsrai_w:
4056
case Intrinsic::loongarch_lasx_xvsrai_w:
4057
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4058
lowerVectorSplatImm<5>(N, 2, DAG));
4059
case Intrinsic::loongarch_lsx_vsrai_d:
4060
case Intrinsic::loongarch_lasx_xvsrai_d:
4061
return DAG.getNode(ISD::SRA, DL, N->getValueType(0), N->getOperand(1),
4062
lowerVectorSplatImm<6>(N, 2, DAG));
4063
case Intrinsic::loongarch_lsx_vclz_b:
4064
case Intrinsic::loongarch_lsx_vclz_h:
4065
case Intrinsic::loongarch_lsx_vclz_w:
4066
case Intrinsic::loongarch_lsx_vclz_d:
4067
case Intrinsic::loongarch_lasx_xvclz_b:
4068
case Intrinsic::loongarch_lasx_xvclz_h:
4069
case Intrinsic::loongarch_lasx_xvclz_w:
4070
case Intrinsic::loongarch_lasx_xvclz_d:
4071
return DAG.getNode(ISD::CTLZ, DL, N->getValueType(0), N->getOperand(1));
4072
case Intrinsic::loongarch_lsx_vpcnt_b:
4073
case Intrinsic::loongarch_lsx_vpcnt_h:
4074
case Intrinsic::loongarch_lsx_vpcnt_w:
4075
case Intrinsic::loongarch_lsx_vpcnt_d:
4076
case Intrinsic::loongarch_lasx_xvpcnt_b:
4077
case Intrinsic::loongarch_lasx_xvpcnt_h:
4078
case Intrinsic::loongarch_lasx_xvpcnt_w:
4079
case Intrinsic::loongarch_lasx_xvpcnt_d:
4080
return DAG.getNode(ISD::CTPOP, DL, N->getValueType(0), N->getOperand(1));
4081
case Intrinsic::loongarch_lsx_vbitclr_b:
4082
case Intrinsic::loongarch_lsx_vbitclr_h:
4083
case Intrinsic::loongarch_lsx_vbitclr_w:
4084
case Intrinsic::loongarch_lsx_vbitclr_d:
4085
case Intrinsic::loongarch_lasx_xvbitclr_b:
4086
case Intrinsic::loongarch_lasx_xvbitclr_h:
4087
case Intrinsic::loongarch_lasx_xvbitclr_w:
4088
case Intrinsic::loongarch_lasx_xvbitclr_d:
4089
return lowerVectorBitClear(N, DAG);
4090
case Intrinsic::loongarch_lsx_vbitclri_b:
4091
case Intrinsic::loongarch_lasx_xvbitclri_b:
4092
return lowerVectorBitClearImm<3>(N, DAG);
4093
case Intrinsic::loongarch_lsx_vbitclri_h:
4094
case Intrinsic::loongarch_lasx_xvbitclri_h:
4095
return lowerVectorBitClearImm<4>(N, DAG);
4096
case Intrinsic::loongarch_lsx_vbitclri_w:
4097
case Intrinsic::loongarch_lasx_xvbitclri_w:
4098
return lowerVectorBitClearImm<5>(N, DAG);
4099
case Intrinsic::loongarch_lsx_vbitclri_d:
4100
case Intrinsic::loongarch_lasx_xvbitclri_d:
4101
return lowerVectorBitClearImm<6>(N, DAG);
4102
case Intrinsic::loongarch_lsx_vbitset_b:
4103
case Intrinsic::loongarch_lsx_vbitset_h:
4104
case Intrinsic::loongarch_lsx_vbitset_w:
4105
case Intrinsic::loongarch_lsx_vbitset_d:
4106
case Intrinsic::loongarch_lasx_xvbitset_b:
4107
case Intrinsic::loongarch_lasx_xvbitset_h:
4108
case Intrinsic::loongarch_lasx_xvbitset_w:
4109
case Intrinsic::loongarch_lasx_xvbitset_d: {
4110
EVT VecTy = N->getValueType(0);
4111
SDValue One = DAG.getConstant(1, DL, VecTy);
4112
return DAG.getNode(
4113
ISD::OR, DL, VecTy, N->getOperand(1),
4114
DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4115
}
4116
case Intrinsic::loongarch_lsx_vbitseti_b:
4117
case Intrinsic::loongarch_lasx_xvbitseti_b:
4118
return lowerVectorBitSetImm<3>(N, DAG);
4119
case Intrinsic::loongarch_lsx_vbitseti_h:
4120
case Intrinsic::loongarch_lasx_xvbitseti_h:
4121
return lowerVectorBitSetImm<4>(N, DAG);
4122
case Intrinsic::loongarch_lsx_vbitseti_w:
4123
case Intrinsic::loongarch_lasx_xvbitseti_w:
4124
return lowerVectorBitSetImm<5>(N, DAG);
4125
case Intrinsic::loongarch_lsx_vbitseti_d:
4126
case Intrinsic::loongarch_lasx_xvbitseti_d:
4127
return lowerVectorBitSetImm<6>(N, DAG);
4128
case Intrinsic::loongarch_lsx_vbitrev_b:
4129
case Intrinsic::loongarch_lsx_vbitrev_h:
4130
case Intrinsic::loongarch_lsx_vbitrev_w:
4131
case Intrinsic::loongarch_lsx_vbitrev_d:
4132
case Intrinsic::loongarch_lasx_xvbitrev_b:
4133
case Intrinsic::loongarch_lasx_xvbitrev_h:
4134
case Intrinsic::loongarch_lasx_xvbitrev_w:
4135
case Intrinsic::loongarch_lasx_xvbitrev_d: {
4136
EVT VecTy = N->getValueType(0);
4137
SDValue One = DAG.getConstant(1, DL, VecTy);
4138
return DAG.getNode(
4139
ISD::XOR, DL, VecTy, N->getOperand(1),
4140
DAG.getNode(ISD::SHL, DL, VecTy, One, truncateVecElts(N, DAG)));
4141
}
4142
case Intrinsic::loongarch_lsx_vbitrevi_b:
4143
case Intrinsic::loongarch_lasx_xvbitrevi_b:
4144
return lowerVectorBitRevImm<3>(N, DAG);
4145
case Intrinsic::loongarch_lsx_vbitrevi_h:
4146
case Intrinsic::loongarch_lasx_xvbitrevi_h:
4147
return lowerVectorBitRevImm<4>(N, DAG);
4148
case Intrinsic::loongarch_lsx_vbitrevi_w:
4149
case Intrinsic::loongarch_lasx_xvbitrevi_w:
4150
return lowerVectorBitRevImm<5>(N, DAG);
4151
case Intrinsic::loongarch_lsx_vbitrevi_d:
4152
case Intrinsic::loongarch_lasx_xvbitrevi_d:
4153
return lowerVectorBitRevImm<6>(N, DAG);
4154
case Intrinsic::loongarch_lsx_vfadd_s:
4155
case Intrinsic::loongarch_lsx_vfadd_d:
4156
case Intrinsic::loongarch_lasx_xvfadd_s:
4157
case Intrinsic::loongarch_lasx_xvfadd_d:
4158
return DAG.getNode(ISD::FADD, DL, N->getValueType(0), N->getOperand(1),
4159
N->getOperand(2));
4160
case Intrinsic::loongarch_lsx_vfsub_s:
4161
case Intrinsic::loongarch_lsx_vfsub_d:
4162
case Intrinsic::loongarch_lasx_xvfsub_s:
4163
case Intrinsic::loongarch_lasx_xvfsub_d:
4164
return DAG.getNode(ISD::FSUB, DL, N->getValueType(0), N->getOperand(1),
4165
N->getOperand(2));
4166
case Intrinsic::loongarch_lsx_vfmul_s:
4167
case Intrinsic::loongarch_lsx_vfmul_d:
4168
case Intrinsic::loongarch_lasx_xvfmul_s:
4169
case Intrinsic::loongarch_lasx_xvfmul_d:
4170
return DAG.getNode(ISD::FMUL, DL, N->getValueType(0), N->getOperand(1),
4171
N->getOperand(2));
4172
case Intrinsic::loongarch_lsx_vfdiv_s:
4173
case Intrinsic::loongarch_lsx_vfdiv_d:
4174
case Intrinsic::loongarch_lasx_xvfdiv_s:
4175
case Intrinsic::loongarch_lasx_xvfdiv_d:
4176
return DAG.getNode(ISD::FDIV, DL, N->getValueType(0), N->getOperand(1),
4177
N->getOperand(2));
4178
case Intrinsic::loongarch_lsx_vfmadd_s:
4179
case Intrinsic::loongarch_lsx_vfmadd_d:
4180
case Intrinsic::loongarch_lasx_xvfmadd_s:
4181
case Intrinsic::loongarch_lasx_xvfmadd_d:
4182
return DAG.getNode(ISD::FMA, DL, N->getValueType(0), N->getOperand(1),
4183
N->getOperand(2), N->getOperand(3));
4184
case Intrinsic::loongarch_lsx_vinsgr2vr_b:
4185
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4186
N->getOperand(1), N->getOperand(2),
4187
legalizeIntrinsicImmArg<4>(N, 3, DAG, Subtarget));
4188
case Intrinsic::loongarch_lsx_vinsgr2vr_h:
4189
case Intrinsic::loongarch_lasx_xvinsgr2vr_w:
4190
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4191
N->getOperand(1), N->getOperand(2),
4192
legalizeIntrinsicImmArg<3>(N, 3, DAG, Subtarget));
4193
case Intrinsic::loongarch_lsx_vinsgr2vr_w:
4194
case Intrinsic::loongarch_lasx_xvinsgr2vr_d:
4195
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4196
N->getOperand(1), N->getOperand(2),
4197
legalizeIntrinsicImmArg<2>(N, 3, DAG, Subtarget));
4198
case Intrinsic::loongarch_lsx_vinsgr2vr_d:
4199
return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), N->getValueType(0),
4200
N->getOperand(1), N->getOperand(2),
4201
legalizeIntrinsicImmArg<1>(N, 3, DAG, Subtarget));
4202
case Intrinsic::loongarch_lsx_vreplgr2vr_b:
4203
case Intrinsic::loongarch_lsx_vreplgr2vr_h:
4204
case Intrinsic::loongarch_lsx_vreplgr2vr_w:
4205
case Intrinsic::loongarch_lsx_vreplgr2vr_d:
4206
case Intrinsic::loongarch_lasx_xvreplgr2vr_b:
4207
case Intrinsic::loongarch_lasx_xvreplgr2vr_h:
4208
case Intrinsic::loongarch_lasx_xvreplgr2vr_w:
4209
case Intrinsic::loongarch_lasx_xvreplgr2vr_d: {
4210
EVT ResTy = N->getValueType(0);
4211
SmallVector<SDValue> Ops(ResTy.getVectorNumElements(), N->getOperand(1));
4212
return DAG.getBuildVector(ResTy, DL, Ops);
4213
}
4214
case Intrinsic::loongarch_lsx_vreplve_b:
4215
case Intrinsic::loongarch_lsx_vreplve_h:
4216
case Intrinsic::loongarch_lsx_vreplve_w:
4217
case Intrinsic::loongarch_lsx_vreplve_d:
4218
case Intrinsic::loongarch_lasx_xvreplve_b:
4219
case Intrinsic::loongarch_lasx_xvreplve_h:
4220
case Intrinsic::loongarch_lasx_xvreplve_w:
4221
case Intrinsic::loongarch_lasx_xvreplve_d:
4222
return DAG.getNode(LoongArchISD::VREPLVE, DL, N->getValueType(0),
4223
N->getOperand(1),
4224
DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getGRLenVT(),
4225
N->getOperand(2)));
4226
}
4227
return SDValue();
4228
}
4229
4230
SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
4231
DAGCombinerInfo &DCI) const {
4232
SelectionDAG &DAG = DCI.DAG;
4233
switch (N->getOpcode()) {
4234
default:
4235
break;
4236
case ISD::AND:
4237
return performANDCombine(N, DAG, DCI, Subtarget);
4238
case ISD::OR:
4239
return performORCombine(N, DAG, DCI, Subtarget);
4240
case ISD::SETCC:
4241
return performSETCCCombine(N, DAG, DCI, Subtarget);
4242
case ISD::SRL:
4243
return performSRLCombine(N, DAG, DCI, Subtarget);
4244
case LoongArchISD::BITREV_W:
4245
return performBITREV_WCombine(N, DAG, DCI, Subtarget);
4246
case ISD::INTRINSIC_WO_CHAIN:
4247
return performINTRINSIC_WO_CHAINCombine(N, DAG, DCI, Subtarget);
4248
}
4249
return SDValue();
4250
}
4251
4252
static MachineBasicBlock *insertDivByZeroTrap(MachineInstr &MI,
4253
MachineBasicBlock *MBB) {
4254
if (!ZeroDivCheck)
4255
return MBB;
4256
4257
// Build instructions:
4258
// MBB:
4259
// div(or mod) $dst, $dividend, $divisor
4260
// bnez $divisor, SinkMBB
4261
// BreakMBB:
4262
// break 7 // BRK_DIVZERO
4263
// SinkMBB:
4264
// fallthrough
4265
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
4266
MachineFunction::iterator It = ++MBB->getIterator();
4267
MachineFunction *MF = MBB->getParent();
4268
auto BreakMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4269
auto SinkMBB = MF->CreateMachineBasicBlock(LLVM_BB);
4270
MF->insert(It, BreakMBB);
4271
MF->insert(It, SinkMBB);
4272
4273
// Transfer the remainder of MBB and its successor edges to SinkMBB.
4274
SinkMBB->splice(SinkMBB->end(), MBB, std::next(MI.getIterator()), MBB->end());
4275
SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
4276
4277
const TargetInstrInfo &TII = *MF->getSubtarget().getInstrInfo();
4278
DebugLoc DL = MI.getDebugLoc();
4279
MachineOperand &Divisor = MI.getOperand(2);
4280
Register DivisorReg = Divisor.getReg();
4281
4282
// MBB:
4283
BuildMI(MBB, DL, TII.get(LoongArch::BNEZ))
4284
.addReg(DivisorReg, getKillRegState(Divisor.isKill()))
4285
.addMBB(SinkMBB);
4286
MBB->addSuccessor(BreakMBB);
4287
MBB->addSuccessor(SinkMBB);
4288
4289
// BreakMBB:
4290
// See linux header file arch/loongarch/include/uapi/asm/break.h for the
4291
// definition of BRK_DIVZERO.
4292
BuildMI(BreakMBB, DL, TII.get(LoongArch::BREAK)).addImm(7 /*BRK_DIVZERO*/);
4293
BreakMBB->addSuccessor(SinkMBB);
4294
4295
// Clear Divisor's kill flag.
4296
Divisor.setIsKill(false);
4297
4298
return SinkMBB;
4299
}
4300
4301
static MachineBasicBlock *
4302
emitVecCondBranchPseudo(MachineInstr &MI, MachineBasicBlock *BB,
4303
const LoongArchSubtarget &Subtarget) {
4304
unsigned CondOpc;
4305
switch (MI.getOpcode()) {
4306
default:
4307
llvm_unreachable("Unexpected opcode");
4308
case LoongArch::PseudoVBZ:
4309
CondOpc = LoongArch::VSETEQZ_V;
4310
break;
4311
case LoongArch::PseudoVBZ_B:
4312
CondOpc = LoongArch::VSETANYEQZ_B;
4313
break;
4314
case LoongArch::PseudoVBZ_H:
4315
CondOpc = LoongArch::VSETANYEQZ_H;
4316
break;
4317
case LoongArch::PseudoVBZ_W:
4318
CondOpc = LoongArch::VSETANYEQZ_W;
4319
break;
4320
case LoongArch::PseudoVBZ_D:
4321
CondOpc = LoongArch::VSETANYEQZ_D;
4322
break;
4323
case LoongArch::PseudoVBNZ:
4324
CondOpc = LoongArch::VSETNEZ_V;
4325
break;
4326
case LoongArch::PseudoVBNZ_B:
4327
CondOpc = LoongArch::VSETALLNEZ_B;
4328
break;
4329
case LoongArch::PseudoVBNZ_H:
4330
CondOpc = LoongArch::VSETALLNEZ_H;
4331
break;
4332
case LoongArch::PseudoVBNZ_W:
4333
CondOpc = LoongArch::VSETALLNEZ_W;
4334
break;
4335
case LoongArch::PseudoVBNZ_D:
4336
CondOpc = LoongArch::VSETALLNEZ_D;
4337
break;
4338
case LoongArch::PseudoXVBZ:
4339
CondOpc = LoongArch::XVSETEQZ_V;
4340
break;
4341
case LoongArch::PseudoXVBZ_B:
4342
CondOpc = LoongArch::XVSETANYEQZ_B;
4343
break;
4344
case LoongArch::PseudoXVBZ_H:
4345
CondOpc = LoongArch::XVSETANYEQZ_H;
4346
break;
4347
case LoongArch::PseudoXVBZ_W:
4348
CondOpc = LoongArch::XVSETANYEQZ_W;
4349
break;
4350
case LoongArch::PseudoXVBZ_D:
4351
CondOpc = LoongArch::XVSETANYEQZ_D;
4352
break;
4353
case LoongArch::PseudoXVBNZ:
4354
CondOpc = LoongArch::XVSETNEZ_V;
4355
break;
4356
case LoongArch::PseudoXVBNZ_B:
4357
CondOpc = LoongArch::XVSETALLNEZ_B;
4358
break;
4359
case LoongArch::PseudoXVBNZ_H:
4360
CondOpc = LoongArch::XVSETALLNEZ_H;
4361
break;
4362
case LoongArch::PseudoXVBNZ_W:
4363
CondOpc = LoongArch::XVSETALLNEZ_W;
4364
break;
4365
case LoongArch::PseudoXVBNZ_D:
4366
CondOpc = LoongArch::XVSETALLNEZ_D;
4367
break;
4368
}
4369
4370
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4371
const BasicBlock *LLVM_BB = BB->getBasicBlock();
4372
DebugLoc DL = MI.getDebugLoc();
4373
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4374
MachineFunction::iterator It = ++BB->getIterator();
4375
4376
MachineFunction *F = BB->getParent();
4377
MachineBasicBlock *FalseBB = F->CreateMachineBasicBlock(LLVM_BB);
4378
MachineBasicBlock *TrueBB = F->CreateMachineBasicBlock(LLVM_BB);
4379
MachineBasicBlock *SinkBB = F->CreateMachineBasicBlock(LLVM_BB);
4380
4381
F->insert(It, FalseBB);
4382
F->insert(It, TrueBB);
4383
F->insert(It, SinkBB);
4384
4385
// Transfer the remainder of MBB and its successor edges to Sink.
4386
SinkBB->splice(SinkBB->end(), BB, std::next(MI.getIterator()), BB->end());
4387
SinkBB->transferSuccessorsAndUpdatePHIs(BB);
4388
4389
// Insert the real instruction to BB.
4390
Register FCC = MRI.createVirtualRegister(&LoongArch::CFRRegClass);
4391
BuildMI(BB, DL, TII->get(CondOpc), FCC).addReg(MI.getOperand(1).getReg());
4392
4393
// Insert branch.
4394
BuildMI(BB, DL, TII->get(LoongArch::BCNEZ)).addReg(FCC).addMBB(TrueBB);
4395
BB->addSuccessor(FalseBB);
4396
BB->addSuccessor(TrueBB);
4397
4398
// FalseBB.
4399
Register RD1 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4400
BuildMI(FalseBB, DL, TII->get(LoongArch::ADDI_W), RD1)
4401
.addReg(LoongArch::R0)
4402
.addImm(0);
4403
BuildMI(FalseBB, DL, TII->get(LoongArch::PseudoBR)).addMBB(SinkBB);
4404
FalseBB->addSuccessor(SinkBB);
4405
4406
// TrueBB.
4407
Register RD2 = MRI.createVirtualRegister(&LoongArch::GPRRegClass);
4408
BuildMI(TrueBB, DL, TII->get(LoongArch::ADDI_W), RD2)
4409
.addReg(LoongArch::R0)
4410
.addImm(1);
4411
TrueBB->addSuccessor(SinkBB);
4412
4413
// SinkBB: merge the results.
4414
BuildMI(*SinkBB, SinkBB->begin(), DL, TII->get(LoongArch::PHI),
4415
MI.getOperand(0).getReg())
4416
.addReg(RD1)
4417
.addMBB(FalseBB)
4418
.addReg(RD2)
4419
.addMBB(TrueBB);
4420
4421
// The pseudo instruction is gone now.
4422
MI.eraseFromParent();
4423
return SinkBB;
4424
}
4425
4426
static MachineBasicBlock *
4427
emitPseudoXVINSGR2VR(MachineInstr &MI, MachineBasicBlock *BB,
4428
const LoongArchSubtarget &Subtarget) {
4429
unsigned InsOp;
4430
unsigned HalfSize;
4431
switch (MI.getOpcode()) {
4432
default:
4433
llvm_unreachable("Unexpected opcode");
4434
case LoongArch::PseudoXVINSGR2VR_B:
4435
HalfSize = 16;
4436
InsOp = LoongArch::VINSGR2VR_B;
4437
break;
4438
case LoongArch::PseudoXVINSGR2VR_H:
4439
HalfSize = 8;
4440
InsOp = LoongArch::VINSGR2VR_H;
4441
break;
4442
}
4443
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4444
const TargetRegisterClass *RC = &LoongArch::LASX256RegClass;
4445
const TargetRegisterClass *SubRC = &LoongArch::LSX128RegClass;
4446
DebugLoc DL = MI.getDebugLoc();
4447
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4448
// XDst = vector_insert XSrc, Elt, Idx
4449
Register XDst = MI.getOperand(0).getReg();
4450
Register XSrc = MI.getOperand(1).getReg();
4451
Register Elt = MI.getOperand(2).getReg();
4452
unsigned Idx = MI.getOperand(3).getImm();
4453
4454
Register ScratchReg1 = XSrc;
4455
if (Idx >= HalfSize) {
4456
ScratchReg1 = MRI.createVirtualRegister(RC);
4457
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), ScratchReg1)
4458
.addReg(XSrc)
4459
.addReg(XSrc)
4460
.addImm(1);
4461
}
4462
4463
Register ScratchSubReg1 = MRI.createVirtualRegister(SubRC);
4464
Register ScratchSubReg2 = MRI.createVirtualRegister(SubRC);
4465
BuildMI(*BB, MI, DL, TII->get(LoongArch::COPY), ScratchSubReg1)
4466
.addReg(ScratchReg1, 0, LoongArch::sub_128);
4467
BuildMI(*BB, MI, DL, TII->get(InsOp), ScratchSubReg2)
4468
.addReg(ScratchSubReg1)
4469
.addReg(Elt)
4470
.addImm(Idx >= HalfSize ? Idx - HalfSize : Idx);
4471
4472
Register ScratchReg2 = XDst;
4473
if (Idx >= HalfSize)
4474
ScratchReg2 = MRI.createVirtualRegister(RC);
4475
4476
BuildMI(*BB, MI, DL, TII->get(LoongArch::SUBREG_TO_REG), ScratchReg2)
4477
.addImm(0)
4478
.addReg(ScratchSubReg2)
4479
.addImm(LoongArch::sub_128);
4480
4481
if (Idx >= HalfSize)
4482
BuildMI(*BB, MI, DL, TII->get(LoongArch::XVPERMI_Q), XDst)
4483
.addReg(XSrc)
4484
.addReg(ScratchReg2)
4485
.addImm(2);
4486
4487
MI.eraseFromParent();
4488
return BB;
4489
}
4490
4491
MachineBasicBlock *LoongArchTargetLowering::EmitInstrWithCustomInserter(
4492
MachineInstr &MI, MachineBasicBlock *BB) const {
4493
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
4494
DebugLoc DL = MI.getDebugLoc();
4495
4496
switch (MI.getOpcode()) {
4497
default:
4498
llvm_unreachable("Unexpected instr type to insert");
4499
case LoongArch::DIV_W:
4500
case LoongArch::DIV_WU:
4501
case LoongArch::MOD_W:
4502
case LoongArch::MOD_WU:
4503
case LoongArch::DIV_D:
4504
case LoongArch::DIV_DU:
4505
case LoongArch::MOD_D:
4506
case LoongArch::MOD_DU:
4507
return insertDivByZeroTrap(MI, BB);
4508
break;
4509
case LoongArch::WRFCSR: {
4510
BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVGR2FCSR),
4511
LoongArch::FCSR0 + MI.getOperand(0).getImm())
4512
.addReg(MI.getOperand(1).getReg());
4513
MI.eraseFromParent();
4514
return BB;
4515
}
4516
case LoongArch::RDFCSR: {
4517
MachineInstr *ReadFCSR =
4518
BuildMI(*BB, MI, DL, TII->get(LoongArch::MOVFCSR2GR),
4519
MI.getOperand(0).getReg())
4520
.addReg(LoongArch::FCSR0 + MI.getOperand(1).getImm());
4521
ReadFCSR->getOperand(1).setIsUndef();
4522
MI.eraseFromParent();
4523
return BB;
4524
}
4525
case LoongArch::PseudoVBZ:
4526
case LoongArch::PseudoVBZ_B:
4527
case LoongArch::PseudoVBZ_H:
4528
case LoongArch::PseudoVBZ_W:
4529
case LoongArch::PseudoVBZ_D:
4530
case LoongArch::PseudoVBNZ:
4531
case LoongArch::PseudoVBNZ_B:
4532
case LoongArch::PseudoVBNZ_H:
4533
case LoongArch::PseudoVBNZ_W:
4534
case LoongArch::PseudoVBNZ_D:
4535
case LoongArch::PseudoXVBZ:
4536
case LoongArch::PseudoXVBZ_B:
4537
case LoongArch::PseudoXVBZ_H:
4538
case LoongArch::PseudoXVBZ_W:
4539
case LoongArch::PseudoXVBZ_D:
4540
case LoongArch::PseudoXVBNZ:
4541
case LoongArch::PseudoXVBNZ_B:
4542
case LoongArch::PseudoXVBNZ_H:
4543
case LoongArch::PseudoXVBNZ_W:
4544
case LoongArch::PseudoXVBNZ_D:
4545
return emitVecCondBranchPseudo(MI, BB, Subtarget);
4546
case LoongArch::PseudoXVINSGR2VR_B:
4547
case LoongArch::PseudoXVINSGR2VR_H:
4548
return emitPseudoXVINSGR2VR(MI, BB, Subtarget);
4549
}
4550
}
4551
4552
bool LoongArchTargetLowering::allowsMisalignedMemoryAccesses(
4553
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
4554
unsigned *Fast) const {
4555
if (!Subtarget.hasUAL())
4556
return false;
4557
4558
// TODO: set reasonable speed number.
4559
if (Fast)
4560
*Fast = 1;
4561
return true;
4562
}
4563
4564
const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
4565
switch ((LoongArchISD::NodeType)Opcode) {
4566
case LoongArchISD::FIRST_NUMBER:
4567
break;
4568
4569
#define NODE_NAME_CASE(node) \
4570
case LoongArchISD::node: \
4571
return "LoongArchISD::" #node;
4572
4573
// TODO: Add more target-dependent nodes later.
4574
NODE_NAME_CASE(CALL)
4575
NODE_NAME_CASE(CALL_MEDIUM)
4576
NODE_NAME_CASE(CALL_LARGE)
4577
NODE_NAME_CASE(RET)
4578
NODE_NAME_CASE(TAIL)
4579
NODE_NAME_CASE(TAIL_MEDIUM)
4580
NODE_NAME_CASE(TAIL_LARGE)
4581
NODE_NAME_CASE(SLL_W)
4582
NODE_NAME_CASE(SRA_W)
4583
NODE_NAME_CASE(SRL_W)
4584
NODE_NAME_CASE(BSTRINS)
4585
NODE_NAME_CASE(BSTRPICK)
4586
NODE_NAME_CASE(MOVGR2FR_W_LA64)
4587
NODE_NAME_CASE(MOVFR2GR_S_LA64)
4588
NODE_NAME_CASE(FTINT)
4589
NODE_NAME_CASE(REVB_2H)
4590
NODE_NAME_CASE(REVB_2W)
4591
NODE_NAME_CASE(BITREV_4B)
4592
NODE_NAME_CASE(BITREV_W)
4593
NODE_NAME_CASE(ROTR_W)
4594
NODE_NAME_CASE(ROTL_W)
4595
NODE_NAME_CASE(DIV_WU)
4596
NODE_NAME_CASE(MOD_WU)
4597
NODE_NAME_CASE(CLZ_W)
4598
NODE_NAME_CASE(CTZ_W)
4599
NODE_NAME_CASE(DBAR)
4600
NODE_NAME_CASE(IBAR)
4601
NODE_NAME_CASE(BREAK)
4602
NODE_NAME_CASE(SYSCALL)
4603
NODE_NAME_CASE(CRC_W_B_W)
4604
NODE_NAME_CASE(CRC_W_H_W)
4605
NODE_NAME_CASE(CRC_W_W_W)
4606
NODE_NAME_CASE(CRC_W_D_W)
4607
NODE_NAME_CASE(CRCC_W_B_W)
4608
NODE_NAME_CASE(CRCC_W_H_W)
4609
NODE_NAME_CASE(CRCC_W_W_W)
4610
NODE_NAME_CASE(CRCC_W_D_W)
4611
NODE_NAME_CASE(CSRRD)
4612
NODE_NAME_CASE(CSRWR)
4613
NODE_NAME_CASE(CSRXCHG)
4614
NODE_NAME_CASE(IOCSRRD_B)
4615
NODE_NAME_CASE(IOCSRRD_H)
4616
NODE_NAME_CASE(IOCSRRD_W)
4617
NODE_NAME_CASE(IOCSRRD_D)
4618
NODE_NAME_CASE(IOCSRWR_B)
4619
NODE_NAME_CASE(IOCSRWR_H)
4620
NODE_NAME_CASE(IOCSRWR_W)
4621
NODE_NAME_CASE(IOCSRWR_D)
4622
NODE_NAME_CASE(CPUCFG)
4623
NODE_NAME_CASE(MOVGR2FCSR)
4624
NODE_NAME_CASE(MOVFCSR2GR)
4625
NODE_NAME_CASE(CACOP_D)
4626
NODE_NAME_CASE(CACOP_W)
4627
NODE_NAME_CASE(VSHUF)
4628
NODE_NAME_CASE(VPICKEV)
4629
NODE_NAME_CASE(VPICKOD)
4630
NODE_NAME_CASE(VPACKEV)
4631
NODE_NAME_CASE(VPACKOD)
4632
NODE_NAME_CASE(VILVL)
4633
NODE_NAME_CASE(VILVH)
4634
NODE_NAME_CASE(VSHUF4I)
4635
NODE_NAME_CASE(VREPLVEI)
4636
NODE_NAME_CASE(XVPERMI)
4637
NODE_NAME_CASE(VPICK_SEXT_ELT)
4638
NODE_NAME_CASE(VPICK_ZEXT_ELT)
4639
NODE_NAME_CASE(VREPLVE)
4640
NODE_NAME_CASE(VALL_ZERO)
4641
NODE_NAME_CASE(VANY_ZERO)
4642
NODE_NAME_CASE(VALL_NONZERO)
4643
NODE_NAME_CASE(VANY_NONZERO)
4644
}
4645
#undef NODE_NAME_CASE
4646
return nullptr;
4647
}
4648
4649
//===----------------------------------------------------------------------===//
4650
// Calling Convention Implementation
4651
//===----------------------------------------------------------------------===//
4652
4653
// Eight general-purpose registers a0-a7 used for passing integer arguments,
4654
// with a0-a1 reused to return values. Generally, the GPRs are used to pass
4655
// fixed-point arguments, and floating-point arguments when no FPR is available
4656
// or with soft float ABI.
4657
const MCPhysReg ArgGPRs[] = {LoongArch::R4, LoongArch::R5, LoongArch::R6,
4658
LoongArch::R7, LoongArch::R8, LoongArch::R9,
4659
LoongArch::R10, LoongArch::R11};
4660
// Eight floating-point registers fa0-fa7 used for passing floating-point
4661
// arguments, and fa0-fa1 are also used to return values.
4662
const MCPhysReg ArgFPR32s[] = {LoongArch::F0, LoongArch::F1, LoongArch::F2,
4663
LoongArch::F3, LoongArch::F4, LoongArch::F5,
4664
LoongArch::F6, LoongArch::F7};
4665
// FPR32 and FPR64 alias each other.
4666
const MCPhysReg ArgFPR64s[] = {
4667
LoongArch::F0_64, LoongArch::F1_64, LoongArch::F2_64, LoongArch::F3_64,
4668
LoongArch::F4_64, LoongArch::F5_64, LoongArch::F6_64, LoongArch::F7_64};
4669
4670
const MCPhysReg ArgVRs[] = {LoongArch::VR0, LoongArch::VR1, LoongArch::VR2,
4671
LoongArch::VR3, LoongArch::VR4, LoongArch::VR5,
4672
LoongArch::VR6, LoongArch::VR7};
4673
4674
const MCPhysReg ArgXRs[] = {LoongArch::XR0, LoongArch::XR1, LoongArch::XR2,
4675
LoongArch::XR3, LoongArch::XR4, LoongArch::XR5,
4676
LoongArch::XR6, LoongArch::XR7};
4677
4678
// Pass a 2*GRLen argument that has been split into two GRLen values through
4679
// registers or the stack as necessary.
4680
static bool CC_LoongArchAssign2GRLen(unsigned GRLen, CCState &State,
4681
CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1,
4682
unsigned ValNo2, MVT ValVT2, MVT LocVT2,
4683
ISD::ArgFlagsTy ArgFlags2) {
4684
unsigned GRLenInBytes = GRLen / 8;
4685
if (Register Reg = State.AllocateReg(ArgGPRs)) {
4686
// At least one half can be passed via register.
4687
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
4688
VA1.getLocVT(), CCValAssign::Full));
4689
} else {
4690
// Both halves must be passed on the stack, with proper alignment.
4691
Align StackAlign =
4692
std::max(Align(GRLenInBytes), ArgFlags1.getNonZeroOrigAlign());
4693
State.addLoc(
4694
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
4695
State.AllocateStack(GRLenInBytes, StackAlign),
4696
VA1.getLocVT(), CCValAssign::Full));
4697
State.addLoc(CCValAssign::getMem(
4698
ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4699
LocVT2, CCValAssign::Full));
4700
return false;
4701
}
4702
if (Register Reg = State.AllocateReg(ArgGPRs)) {
4703
// The second half can also be passed via register.
4704
State.addLoc(
4705
CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
4706
} else {
4707
// The second half is passed via the stack, without additional alignment.
4708
State.addLoc(CCValAssign::getMem(
4709
ValNo2, ValVT2, State.AllocateStack(GRLenInBytes, Align(GRLenInBytes)),
4710
LocVT2, CCValAssign::Full));
4711
}
4712
return false;
4713
}
4714
4715
// Implements the LoongArch calling convention. Returns true upon failure.
4716
static bool CC_LoongArch(const DataLayout &DL, LoongArchABI::ABI ABI,
4717
unsigned ValNo, MVT ValVT,
4718
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
4719
CCState &State, bool IsFixed, bool IsRet,
4720
Type *OrigTy) {
4721
unsigned GRLen = DL.getLargestLegalIntTypeSizeInBits();
4722
assert((GRLen == 32 || GRLen == 64) && "Unspport GRLen");
4723
MVT GRLenVT = GRLen == 32 ? MVT::i32 : MVT::i64;
4724
MVT LocVT = ValVT;
4725
4726
// Any return value split into more than two values can't be returned
4727
// directly.
4728
if (IsRet && ValNo > 1)
4729
return true;
4730
4731
// If passing a variadic argument, or if no FPR is available.
4732
bool UseGPRForFloat = true;
4733
4734
switch (ABI) {
4735
default:
4736
llvm_unreachable("Unexpected ABI");
4737
break;
4738
case LoongArchABI::ABI_ILP32F:
4739
case LoongArchABI::ABI_LP64F:
4740
case LoongArchABI::ABI_ILP32D:
4741
case LoongArchABI::ABI_LP64D:
4742
UseGPRForFloat = !IsFixed;
4743
break;
4744
case LoongArchABI::ABI_ILP32S:
4745
case LoongArchABI::ABI_LP64S:
4746
break;
4747
}
4748
4749
// FPR32 and FPR64 alias each other.
4750
if (State.getFirstUnallocated(ArgFPR32s) == std::size(ArgFPR32s))
4751
UseGPRForFloat = true;
4752
4753
if (UseGPRForFloat && ValVT == MVT::f32) {
4754
LocVT = GRLenVT;
4755
LocInfo = CCValAssign::BCvt;
4756
} else if (UseGPRForFloat && GRLen == 64 && ValVT == MVT::f64) {
4757
LocVT = MVT::i64;
4758
LocInfo = CCValAssign::BCvt;
4759
} else if (UseGPRForFloat && GRLen == 32 && ValVT == MVT::f64) {
4760
// TODO: Handle passing f64 on LA32 with D feature.
4761
report_fatal_error("Passing f64 with GPR on LA32 is undefined");
4762
}
4763
4764
// If this is a variadic argument, the LoongArch calling convention requires
4765
// that it is assigned an 'even' or 'aligned' register if it has (2*GRLen)/8
4766
// byte alignment. An aligned register should be used regardless of whether
4767
// the original argument was split during legalisation or not. The argument
4768
// will not be passed by registers if the original type is larger than
4769
// 2*GRLen, so the register alignment rule does not apply.
4770
unsigned TwoGRLenInBytes = (2 * GRLen) / 8;
4771
if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoGRLenInBytes &&
4772
DL.getTypeAllocSize(OrigTy) == TwoGRLenInBytes) {
4773
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
4774
// Skip 'odd' register if necessary.
4775
if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
4776
State.AllocateReg(ArgGPRs);
4777
}
4778
4779
SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
4780
SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
4781
State.getPendingArgFlags();
4782
4783
assert(PendingLocs.size() == PendingArgFlags.size() &&
4784
"PendingLocs and PendingArgFlags out of sync");
4785
4786
// Split arguments might be passed indirectly, so keep track of the pending
4787
// values.
4788
if (ValVT.isScalarInteger() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
4789
LocVT = GRLenVT;
4790
LocInfo = CCValAssign::Indirect;
4791
PendingLocs.push_back(
4792
CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
4793
PendingArgFlags.push_back(ArgFlags);
4794
if (!ArgFlags.isSplitEnd()) {
4795
return false;
4796
}
4797
}
4798
4799
// If the split argument only had two elements, it should be passed directly
4800
// in registers or on the stack.
4801
if (ValVT.isScalarInteger() && ArgFlags.isSplitEnd() &&
4802
PendingLocs.size() <= 2) {
4803
assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
4804
// Apply the normal calling convention rules to the first half of the
4805
// split argument.
4806
CCValAssign VA = PendingLocs[0];
4807
ISD::ArgFlagsTy AF = PendingArgFlags[0];
4808
PendingLocs.clear();
4809
PendingArgFlags.clear();
4810
return CC_LoongArchAssign2GRLen(GRLen, State, VA, AF, ValNo, ValVT, LocVT,
4811
ArgFlags);
4812
}
4813
4814
// Allocate to a register if possible, or else a stack slot.
4815
Register Reg;
4816
unsigned StoreSizeBytes = GRLen / 8;
4817
Align StackAlign = Align(GRLen / 8);
4818
4819
if (ValVT == MVT::f32 && !UseGPRForFloat)
4820
Reg = State.AllocateReg(ArgFPR32s);
4821
else if (ValVT == MVT::f64 && !UseGPRForFloat)
4822
Reg = State.AllocateReg(ArgFPR64s);
4823
else if (ValVT.is128BitVector())
4824
Reg = State.AllocateReg(ArgVRs);
4825
else if (ValVT.is256BitVector())
4826
Reg = State.AllocateReg(ArgXRs);
4827
else
4828
Reg = State.AllocateReg(ArgGPRs);
4829
4830
unsigned StackOffset =
4831
Reg ? 0 : State.AllocateStack(StoreSizeBytes, StackAlign);
4832
4833
// If we reach this point and PendingLocs is non-empty, we must be at the
4834
// end of a split argument that must be passed indirectly.
4835
if (!PendingLocs.empty()) {
4836
assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
4837
assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
4838
for (auto &It : PendingLocs) {
4839
if (Reg)
4840
It.convertToReg(Reg);
4841
else
4842
It.convertToMem(StackOffset);
4843
State.addLoc(It);
4844
}
4845
PendingLocs.clear();
4846
PendingArgFlags.clear();
4847
return false;
4848
}
4849
assert((!UseGPRForFloat || LocVT == GRLenVT) &&
4850
"Expected an GRLenVT at this stage");
4851
4852
if (Reg) {
4853
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
4854
return false;
4855
}
4856
4857
// When a floating-point value is passed on the stack, no bit-cast is needed.
4858
if (ValVT.isFloatingPoint()) {
4859
LocVT = ValVT;
4860
LocInfo = CCValAssign::Full;
4861
}
4862
4863
State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
4864
return false;
4865
}
4866
4867
void LoongArchTargetLowering::analyzeInputArgs(
4868
MachineFunction &MF, CCState &CCInfo,
4869
const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet,
4870
LoongArchCCAssignFn Fn) const {
4871
FunctionType *FType = MF.getFunction().getFunctionType();
4872
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4873
MVT ArgVT = Ins[i].VT;
4874
Type *ArgTy = nullptr;
4875
if (IsRet)
4876
ArgTy = FType->getReturnType();
4877
else if (Ins[i].isOrigArg())
4878
ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
4879
LoongArchABI::ABI ABI =
4880
MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4881
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Ins[i].Flags,
4882
CCInfo, /*IsFixed=*/true, IsRet, ArgTy)) {
4883
LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type " << ArgVT
4884
<< '\n');
4885
llvm_unreachable("");
4886
}
4887
}
4888
}
4889
4890
void LoongArchTargetLowering::analyzeOutputArgs(
4891
MachineFunction &MF, CCState &CCInfo,
4892
const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
4893
CallLoweringInfo *CLI, LoongArchCCAssignFn Fn) const {
4894
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
4895
MVT ArgVT = Outs[i].VT;
4896
Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
4897
LoongArchABI::ABI ABI =
4898
MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
4899
if (Fn(MF.getDataLayout(), ABI, i, ArgVT, CCValAssign::Full, Outs[i].Flags,
4900
CCInfo, Outs[i].IsFixed, IsRet, OrigTy)) {
4901
LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type " << ArgVT
4902
<< "\n");
4903
llvm_unreachable("");
4904
}
4905
}
4906
}
4907
4908
// Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
4909
// values.
4910
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
4911
const CCValAssign &VA, const SDLoc &DL) {
4912
switch (VA.getLocInfo()) {
4913
default:
4914
llvm_unreachable("Unexpected CCValAssign::LocInfo");
4915
case CCValAssign::Full:
4916
case CCValAssign::Indirect:
4917
break;
4918
case CCValAssign::BCvt:
4919
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4920
Val = DAG.getNode(LoongArchISD::MOVGR2FR_W_LA64, DL, MVT::f32, Val);
4921
else
4922
Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
4923
break;
4924
}
4925
return Val;
4926
}
4927
4928
static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
4929
const CCValAssign &VA, const SDLoc &DL,
4930
const ISD::InputArg &In,
4931
const LoongArchTargetLowering &TLI) {
4932
MachineFunction &MF = DAG.getMachineFunction();
4933
MachineRegisterInfo &RegInfo = MF.getRegInfo();
4934
EVT LocVT = VA.getLocVT();
4935
SDValue Val;
4936
const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
4937
Register VReg = RegInfo.createVirtualRegister(RC);
4938
RegInfo.addLiveIn(VA.getLocReg(), VReg);
4939
Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
4940
4941
// If input is sign extended from 32 bits, note it for the OptW pass.
4942
if (In.isOrigArg()) {
4943
Argument *OrigArg = MF.getFunction().getArg(In.getOrigArgIndex());
4944
if (OrigArg->getType()->isIntegerTy()) {
4945
unsigned BitWidth = OrigArg->getType()->getIntegerBitWidth();
4946
// An input zero extended from i31 can also be considered sign extended.
4947
if ((BitWidth <= 32 && In.Flags.isSExt()) ||
4948
(BitWidth < 32 && In.Flags.isZExt())) {
4949
LoongArchMachineFunctionInfo *LAFI =
4950
MF.getInfo<LoongArchMachineFunctionInfo>();
4951
LAFI->addSExt32Register(VReg);
4952
}
4953
}
4954
}
4955
4956
return convertLocVTToValVT(DAG, Val, VA, DL);
4957
}
4958
4959
// The caller is responsible for loading the full value if the argument is
4960
// passed with CCValAssign::Indirect.
4961
static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
4962
const CCValAssign &VA, const SDLoc &DL) {
4963
MachineFunction &MF = DAG.getMachineFunction();
4964
MachineFrameInfo &MFI = MF.getFrameInfo();
4965
EVT ValVT = VA.getValVT();
4966
int FI = MFI.CreateFixedObject(ValVT.getStoreSize(), VA.getLocMemOffset(),
4967
/*IsImmutable=*/true);
4968
SDValue FIN = DAG.getFrameIndex(
4969
FI, MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0)));
4970
4971
ISD::LoadExtType ExtType;
4972
switch (VA.getLocInfo()) {
4973
default:
4974
llvm_unreachable("Unexpected CCValAssign::LocInfo");
4975
case CCValAssign::Full:
4976
case CCValAssign::Indirect:
4977
case CCValAssign::BCvt:
4978
ExtType = ISD::NON_EXTLOAD;
4979
break;
4980
}
4981
return DAG.getExtLoad(
4982
ExtType, DL, VA.getLocVT(), Chain, FIN,
4983
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
4984
}
4985
4986
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
4987
const CCValAssign &VA, const SDLoc &DL) {
4988
EVT LocVT = VA.getLocVT();
4989
4990
switch (VA.getLocInfo()) {
4991
default:
4992
llvm_unreachable("Unexpected CCValAssign::LocInfo");
4993
case CCValAssign::Full:
4994
break;
4995
case CCValAssign::BCvt:
4996
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
4997
Val = DAG.getNode(LoongArchISD::MOVFR2GR_S_LA64, DL, MVT::i64, Val);
4998
else
4999
Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
5000
break;
5001
}
5002
return Val;
5003
}
5004
5005
static bool CC_LoongArch_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
5006
CCValAssign::LocInfo LocInfo,
5007
ISD::ArgFlagsTy ArgFlags, CCState &State) {
5008
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
5009
// Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, SpLim
5010
// s0 s1 s2 s3 s4 s5 s6 s7 s8
5011
static const MCPhysReg GPRList[] = {
5012
LoongArch::R23, LoongArch::R24, LoongArch::R25,
5013
LoongArch::R26, LoongArch::R27, LoongArch::R28,
5014
LoongArch::R29, LoongArch::R30, LoongArch::R31};
5015
if (unsigned Reg = State.AllocateReg(GPRList)) {
5016
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5017
return false;
5018
}
5019
}
5020
5021
if (LocVT == MVT::f32) {
5022
// Pass in STG registers: F1, F2, F3, F4
5023
// fs0,fs1,fs2,fs3
5024
static const MCPhysReg FPR32List[] = {LoongArch::F24, LoongArch::F25,
5025
LoongArch::F26, LoongArch::F27};
5026
if (unsigned Reg = State.AllocateReg(FPR32List)) {
5027
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5028
return false;
5029
}
5030
}
5031
5032
if (LocVT == MVT::f64) {
5033
// Pass in STG registers: D1, D2, D3, D4
5034
// fs4,fs5,fs6,fs7
5035
static const MCPhysReg FPR64List[] = {LoongArch::F28_64, LoongArch::F29_64,
5036
LoongArch::F30_64, LoongArch::F31_64};
5037
if (unsigned Reg = State.AllocateReg(FPR64List)) {
5038
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
5039
return false;
5040
}
5041
}
5042
5043
report_fatal_error("No registers left in GHC calling convention");
5044
return true;
5045
}
5046
5047
// Transform physical registers into virtual registers.
5048
SDValue LoongArchTargetLowering::LowerFormalArguments(
5049
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5050
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5051
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5052
5053
MachineFunction &MF = DAG.getMachineFunction();
5054
5055
switch (CallConv) {
5056
default:
5057
llvm_unreachable("Unsupported calling convention");
5058
case CallingConv::C:
5059
case CallingConv::Fast:
5060
break;
5061
case CallingConv::GHC:
5062
if (!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicF) ||
5063
!MF.getSubtarget().hasFeature(LoongArch::FeatureBasicD))
5064
report_fatal_error(
5065
"GHC calling convention requires the F and D extensions");
5066
}
5067
5068
EVT PtrVT = getPointerTy(DAG.getDataLayout());
5069
MVT GRLenVT = Subtarget.getGRLenVT();
5070
unsigned GRLenInBytes = Subtarget.getGRLen() / 8;
5071
// Used with varargs to acumulate store chains.
5072
std::vector<SDValue> OutChains;
5073
5074
// Assign locations to all of the incoming arguments.
5075
SmallVector<CCValAssign> ArgLocs;
5076
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5077
5078
if (CallConv == CallingConv::GHC)
5079
CCInfo.AnalyzeFormalArguments(Ins, CC_LoongArch_GHC);
5080
else
5081
analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false, CC_LoongArch);
5082
5083
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
5084
CCValAssign &VA = ArgLocs[i];
5085
SDValue ArgValue;
5086
if (VA.isRegLoc())
5087
ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, Ins[i], *this);
5088
else
5089
ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
5090
if (VA.getLocInfo() == CCValAssign::Indirect) {
5091
// If the original argument was split and passed by reference, we need to
5092
// load all parts of it here (using the same address).
5093
InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
5094
MachinePointerInfo()));
5095
unsigned ArgIndex = Ins[i].OrigArgIndex;
5096
unsigned ArgPartOffset = Ins[i].PartOffset;
5097
assert(ArgPartOffset == 0);
5098
while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
5099
CCValAssign &PartVA = ArgLocs[i + 1];
5100
unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
5101
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5102
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, Offset);
5103
InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
5104
MachinePointerInfo()));
5105
++i;
5106
}
5107
continue;
5108
}
5109
InVals.push_back(ArgValue);
5110
}
5111
5112
if (IsVarArg) {
5113
ArrayRef<MCPhysReg> ArgRegs = ArrayRef(ArgGPRs);
5114
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
5115
const TargetRegisterClass *RC = &LoongArch::GPRRegClass;
5116
MachineFrameInfo &MFI = MF.getFrameInfo();
5117
MachineRegisterInfo &RegInfo = MF.getRegInfo();
5118
auto *LoongArchFI = MF.getInfo<LoongArchMachineFunctionInfo>();
5119
5120
// Offset of the first variable argument from stack pointer, and size of
5121
// the vararg save area. For now, the varargs save area is either zero or
5122
// large enough to hold a0-a7.
5123
int VaArgOffset, VarArgsSaveSize;
5124
5125
// If all registers are allocated, then all varargs must be passed on the
5126
// stack and we don't need to save any argregs.
5127
if (ArgRegs.size() == Idx) {
5128
VaArgOffset = CCInfo.getStackSize();
5129
VarArgsSaveSize = 0;
5130
} else {
5131
VarArgsSaveSize = GRLenInBytes * (ArgRegs.size() - Idx);
5132
VaArgOffset = -VarArgsSaveSize;
5133
}
5134
5135
// Record the frame index of the first variable argument
5136
// which is a value necessary to VASTART.
5137
int FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5138
LoongArchFI->setVarArgsFrameIndex(FI);
5139
5140
// If saving an odd number of registers then create an extra stack slot to
5141
// ensure that the frame pointer is 2*GRLen-aligned, which in turn ensures
5142
// offsets to even-numbered registered remain 2*GRLen-aligned.
5143
if (Idx % 2) {
5144
MFI.CreateFixedObject(GRLenInBytes, VaArgOffset - (int)GRLenInBytes,
5145
true);
5146
VarArgsSaveSize += GRLenInBytes;
5147
}
5148
5149
// Copy the integer registers that may have been used for passing varargs
5150
// to the vararg save area.
5151
for (unsigned I = Idx; I < ArgRegs.size();
5152
++I, VaArgOffset += GRLenInBytes) {
5153
const Register Reg = RegInfo.createVirtualRegister(RC);
5154
RegInfo.addLiveIn(ArgRegs[I], Reg);
5155
SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, GRLenVT);
5156
FI = MFI.CreateFixedObject(GRLenInBytes, VaArgOffset, true);
5157
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5158
SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
5159
MachinePointerInfo::getFixedStack(MF, FI));
5160
cast<StoreSDNode>(Store.getNode())
5161
->getMemOperand()
5162
->setValue((Value *)nullptr);
5163
OutChains.push_back(Store);
5164
}
5165
LoongArchFI->setVarArgsSaveSize(VarArgsSaveSize);
5166
}
5167
5168
// All stores are grouped in one node to allow the matching between
5169
// the size of Ins and InVals. This only happens for vararg functions.
5170
if (!OutChains.empty()) {
5171
OutChains.push_back(Chain);
5172
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
5173
}
5174
5175
return Chain;
5176
}
5177
5178
bool LoongArchTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
5179
return CI->isTailCall();
5180
}
5181
5182
// Check if the return value is used as only a return value, as otherwise
5183
// we can't perform a tail-call.
5184
bool LoongArchTargetLowering::isUsedByReturnOnly(SDNode *N,
5185
SDValue &Chain) const {
5186
if (N->getNumValues() != 1)
5187
return false;
5188
if (!N->hasNUsesOfValue(1, 0))
5189
return false;
5190
5191
SDNode *Copy = *N->use_begin();
5192
if (Copy->getOpcode() != ISD::CopyToReg)
5193
return false;
5194
5195
// If the ISD::CopyToReg has a glue operand, we conservatively assume it
5196
// isn't safe to perform a tail call.
5197
if (Copy->getGluedNode())
5198
return false;
5199
5200
// The copy must be used by a LoongArchISD::RET, and nothing else.
5201
bool HasRet = false;
5202
for (SDNode *Node : Copy->uses()) {
5203
if (Node->getOpcode() != LoongArchISD::RET)
5204
return false;
5205
HasRet = true;
5206
}
5207
5208
if (!HasRet)
5209
return false;
5210
5211
Chain = Copy->getOperand(0);
5212
return true;
5213
}
5214
5215
// Check whether the call is eligible for tail call optimization.
5216
bool LoongArchTargetLowering::isEligibleForTailCallOptimization(
5217
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
5218
const SmallVectorImpl<CCValAssign> &ArgLocs) const {
5219
5220
auto CalleeCC = CLI.CallConv;
5221
auto &Outs = CLI.Outs;
5222
auto &Caller = MF.getFunction();
5223
auto CallerCC = Caller.getCallingConv();
5224
5225
// Do not tail call opt if the stack is used to pass parameters.
5226
if (CCInfo.getStackSize() != 0)
5227
return false;
5228
5229
// Do not tail call opt if any parameters need to be passed indirectly.
5230
for (auto &VA : ArgLocs)
5231
if (VA.getLocInfo() == CCValAssign::Indirect)
5232
return false;
5233
5234
// Do not tail call opt if either caller or callee uses struct return
5235
// semantics.
5236
auto IsCallerStructRet = Caller.hasStructRetAttr();
5237
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
5238
if (IsCallerStructRet || IsCalleeStructRet)
5239
return false;
5240
5241
// Do not tail call opt if either the callee or caller has a byval argument.
5242
for (auto &Arg : Outs)
5243
if (Arg.Flags.isByVal())
5244
return false;
5245
5246
// The callee has to preserve all registers the caller needs to preserve.
5247
const LoongArchRegisterInfo *TRI = Subtarget.getRegisterInfo();
5248
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5249
if (CalleeCC != CallerCC) {
5250
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5251
if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5252
return false;
5253
}
5254
return true;
5255
}
5256
5257
static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
5258
return DAG.getDataLayout().getPrefTypeAlign(
5259
VT.getTypeForEVT(*DAG.getContext()));
5260
}
5261
5262
// Lower a call to a callseq_start + CALL + callseq_end chain, and add input
5263
// and output parameter nodes.
5264
SDValue
5265
LoongArchTargetLowering::LowerCall(CallLoweringInfo &CLI,
5266
SmallVectorImpl<SDValue> &InVals) const {
5267
SelectionDAG &DAG = CLI.DAG;
5268
SDLoc &DL = CLI.DL;
5269
SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
5270
SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5271
SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
5272
SDValue Chain = CLI.Chain;
5273
SDValue Callee = CLI.Callee;
5274
CallingConv::ID CallConv = CLI.CallConv;
5275
bool IsVarArg = CLI.IsVarArg;
5276
EVT PtrVT = getPointerTy(DAG.getDataLayout());
5277
MVT GRLenVT = Subtarget.getGRLenVT();
5278
bool &IsTailCall = CLI.IsTailCall;
5279
5280
MachineFunction &MF = DAG.getMachineFunction();
5281
5282
// Analyze the operands of the call, assigning locations to each operand.
5283
SmallVector<CCValAssign> ArgLocs;
5284
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5285
5286
if (CallConv == CallingConv::GHC)
5287
ArgCCInfo.AnalyzeCallOperands(Outs, CC_LoongArch_GHC);
5288
else
5289
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CC_LoongArch);
5290
5291
// Check if it's really possible to do a tail call.
5292
if (IsTailCall)
5293
IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
5294
5295
if (IsTailCall)
5296
++NumTailCalls;
5297
else if (CLI.CB && CLI.CB->isMustTailCall())
5298
report_fatal_error("failed to perform tail call elimination on a call "
5299
"site marked musttail");
5300
5301
// Get a count of how many bytes are to be pushed on the stack.
5302
unsigned NumBytes = ArgCCInfo.getStackSize();
5303
5304
// Create local copies for byval args.
5305
SmallVector<SDValue> ByValArgs;
5306
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5307
ISD::ArgFlagsTy Flags = Outs[i].Flags;
5308
if (!Flags.isByVal())
5309
continue;
5310
5311
SDValue Arg = OutVals[i];
5312
unsigned Size = Flags.getByValSize();
5313
Align Alignment = Flags.getNonZeroByValAlign();
5314
5315
int FI =
5316
MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
5317
SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5318
SDValue SizeNode = DAG.getConstant(Size, DL, GRLenVT);
5319
5320
Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
5321
/*IsVolatile=*/false,
5322
/*AlwaysInline=*/false, /*CI=*/nullptr, std::nullopt,
5323
MachinePointerInfo(), MachinePointerInfo());
5324
ByValArgs.push_back(FIPtr);
5325
}
5326
5327
if (!IsTailCall)
5328
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
5329
5330
// Copy argument values to their designated locations.
5331
SmallVector<std::pair<Register, SDValue>> RegsToPass;
5332
SmallVector<SDValue> MemOpChains;
5333
SDValue StackPtr;
5334
for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
5335
CCValAssign &VA = ArgLocs[i];
5336
SDValue ArgValue = OutVals[i];
5337
ISD::ArgFlagsTy Flags = Outs[i].Flags;
5338
5339
// Promote the value if needed.
5340
// For now, only handle fully promoted and indirect arguments.
5341
if (VA.getLocInfo() == CCValAssign::Indirect) {
5342
// Store the argument in a stack slot and pass its address.
5343
Align StackAlign =
5344
std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
5345
getPrefTypeAlign(ArgValue.getValueType(), DAG));
5346
TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
5347
// If the original argument was split and passed by reference, we need to
5348
// store the required parts of it here (and pass just one address).
5349
unsigned ArgIndex = Outs[i].OrigArgIndex;
5350
unsigned ArgPartOffset = Outs[i].PartOffset;
5351
assert(ArgPartOffset == 0);
5352
// Calculate the total size to store. We don't have access to what we're
5353
// actually storing other than performing the loop and collecting the
5354
// info.
5355
SmallVector<std::pair<SDValue, SDValue>> Parts;
5356
while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
5357
SDValue PartValue = OutVals[i + 1];
5358
unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
5359
SDValue Offset = DAG.getIntPtrConstant(PartOffset, DL);
5360
EVT PartVT = PartValue.getValueType();
5361
5362
StoredSize += PartVT.getStoreSize();
5363
StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
5364
Parts.push_back(std::make_pair(PartValue, Offset));
5365
++i;
5366
}
5367
SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
5368
int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
5369
MemOpChains.push_back(
5370
DAG.getStore(Chain, DL, ArgValue, SpillSlot,
5371
MachinePointerInfo::getFixedStack(MF, FI)));
5372
for (const auto &Part : Parts) {
5373
SDValue PartValue = Part.first;
5374
SDValue PartOffset = Part.second;
5375
SDValue Address =
5376
DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, PartOffset);
5377
MemOpChains.push_back(
5378
DAG.getStore(Chain, DL, PartValue, Address,
5379
MachinePointerInfo::getFixedStack(MF, FI)));
5380
}
5381
ArgValue = SpillSlot;
5382
} else {
5383
ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL);
5384
}
5385
5386
// Use local copy if it is a byval arg.
5387
if (Flags.isByVal())
5388
ArgValue = ByValArgs[j++];
5389
5390
if (VA.isRegLoc()) {
5391
// Queue up the argument copies and emit them at the end.
5392
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
5393
} else {
5394
assert(VA.isMemLoc() && "Argument not register or memory");
5395
assert(!IsTailCall && "Tail call not allowed if stack is used "
5396
"for passing parameters");
5397
5398
// Work out the address of the stack slot.
5399
if (!StackPtr.getNode())
5400
StackPtr = DAG.getCopyFromReg(Chain, DL, LoongArch::R3, PtrVT);
5401
SDValue Address =
5402
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
5403
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
5404
5405
// Emit the store.
5406
MemOpChains.push_back(
5407
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
5408
}
5409
}
5410
5411
// Join the stores, which are independent of one another.
5412
if (!MemOpChains.empty())
5413
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
5414
5415
SDValue Glue;
5416
5417
// Build a sequence of copy-to-reg nodes, chained and glued together.
5418
for (auto &Reg : RegsToPass) {
5419
Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
5420
Glue = Chain.getValue(1);
5421
}
5422
5423
// If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
5424
// TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
5425
// split it and then direct call can be matched by PseudoCALL.
5426
if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
5427
const GlobalValue *GV = S->getGlobal();
5428
unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(GV)
5429
? LoongArchII::MO_CALL
5430
: LoongArchII::MO_CALL_PLT;
5431
Callee = DAG.getTargetGlobalAddress(S->getGlobal(), DL, PtrVT, 0, OpFlags);
5432
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5433
unsigned OpFlags = getTargetMachine().shouldAssumeDSOLocal(nullptr)
5434
? LoongArchII::MO_CALL
5435
: LoongArchII::MO_CALL_PLT;
5436
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
5437
}
5438
5439
// The first call operand is the chain and the second is the target address.
5440
SmallVector<SDValue> Ops;
5441
Ops.push_back(Chain);
5442
Ops.push_back(Callee);
5443
5444
// Add argument registers to the end of the list so that they are
5445
// known live into the call.
5446
for (auto &Reg : RegsToPass)
5447
Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
5448
5449
if (!IsTailCall) {
5450
// Add a register mask operand representing the call-preserved registers.
5451
const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5452
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
5453
assert(Mask && "Missing call preserved mask for calling convention");
5454
Ops.push_back(DAG.getRegisterMask(Mask));
5455
}
5456
5457
// Glue the call to the argument copies, if any.
5458
if (Glue.getNode())
5459
Ops.push_back(Glue);
5460
5461
// Emit the call.
5462
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
5463
unsigned Op;
5464
switch (DAG.getTarget().getCodeModel()) {
5465
default:
5466
report_fatal_error("Unsupported code model");
5467
case CodeModel::Small:
5468
Op = IsTailCall ? LoongArchISD::TAIL : LoongArchISD::CALL;
5469
break;
5470
case CodeModel::Medium:
5471
assert(Subtarget.is64Bit() && "Medium code model requires LA64");
5472
Op = IsTailCall ? LoongArchISD::TAIL_MEDIUM : LoongArchISD::CALL_MEDIUM;
5473
break;
5474
case CodeModel::Large:
5475
assert(Subtarget.is64Bit() && "Large code model requires LA64");
5476
Op = IsTailCall ? LoongArchISD::TAIL_LARGE : LoongArchISD::CALL_LARGE;
5477
break;
5478
}
5479
5480
if (IsTailCall) {
5481
MF.getFrameInfo().setHasTailCall();
5482
SDValue Ret = DAG.getNode(Op, DL, NodeTys, Ops);
5483
DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
5484
return Ret;
5485
}
5486
5487
Chain = DAG.getNode(Op, DL, NodeTys, Ops);
5488
DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
5489
Glue = Chain.getValue(1);
5490
5491
// Mark the end of the call, which is glued to the call itself.
5492
Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
5493
Glue = Chain.getValue(1);
5494
5495
// Assign locations to each value returned by this call.
5496
SmallVector<CCValAssign> RVLocs;
5497
CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
5498
analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, CC_LoongArch);
5499
5500
// Copy all of the result registers out of their specified physreg.
5501
for (auto &VA : RVLocs) {
5502
// Copy the value out.
5503
SDValue RetValue =
5504
DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
5505
// Glue the RetValue to the end of the call sequence.
5506
Chain = RetValue.getValue(1);
5507
Glue = RetValue.getValue(2);
5508
5509
RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL);
5510
5511
InVals.push_back(RetValue);
5512
}
5513
5514
return Chain;
5515
}
5516
5517
bool LoongArchTargetLowering::CanLowerReturn(
5518
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
5519
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
5520
SmallVector<CCValAssign> RVLocs;
5521
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
5522
5523
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5524
LoongArchABI::ABI ABI =
5525
MF.getSubtarget<LoongArchSubtarget>().getTargetABI();
5526
if (CC_LoongArch(MF.getDataLayout(), ABI, i, Outs[i].VT, CCValAssign::Full,
5527
Outs[i].Flags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true,
5528
nullptr))
5529
return false;
5530
}
5531
return true;
5532
}
5533
5534
SDValue LoongArchTargetLowering::LowerReturn(
5535
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
5536
const SmallVectorImpl<ISD::OutputArg> &Outs,
5537
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
5538
SelectionDAG &DAG) const {
5539
// Stores the assignment of the return value to a location.
5540
SmallVector<CCValAssign> RVLocs;
5541
5542
// Info about the registers and stack slot.
5543
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
5544
*DAG.getContext());
5545
5546
analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
5547
nullptr, CC_LoongArch);
5548
if (CallConv == CallingConv::GHC && !RVLocs.empty())
5549
report_fatal_error("GHC functions return void only");
5550
SDValue Glue;
5551
SmallVector<SDValue, 4> RetOps(1, Chain);
5552
5553
// Copy the result values into the output registers.
5554
for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
5555
CCValAssign &VA = RVLocs[i];
5556
assert(VA.isRegLoc() && "Can only return in registers!");
5557
5558
// Handle a 'normal' return.
5559
SDValue Val = convertValVTToLocVT(DAG, OutVals[i], VA, DL);
5560
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
5561
5562
// Guarantee that all emitted copies are stuck together.
5563
Glue = Chain.getValue(1);
5564
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
5565
}
5566
5567
RetOps[0] = Chain; // Update chain.
5568
5569
// Add the glue node if we have it.
5570
if (Glue.getNode())
5571
RetOps.push_back(Glue);
5572
5573
return DAG.getNode(LoongArchISD::RET, DL, MVT::Other, RetOps);
5574
}
5575
5576
bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5577
bool ForCodeSize) const {
5578
// TODO: Maybe need more checks here after vector extension is supported.
5579
if (VT == MVT::f32 && !Subtarget.hasBasicF())
5580
return false;
5581
if (VT == MVT::f64 && !Subtarget.hasBasicD())
5582
return false;
5583
return (Imm.isZero() || Imm.isExactlyValue(+1.0));
5584
}
5585
5586
bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const {
5587
return true;
5588
}
5589
5590
bool LoongArchTargetLowering::isCheapToSpeculateCtlz(Type *) const {
5591
return true;
5592
}
5593
5594
bool LoongArchTargetLowering::shouldInsertFencesForAtomic(
5595
const Instruction *I) const {
5596
if (!Subtarget.is64Bit())
5597
return isa<LoadInst>(I) || isa<StoreInst>(I);
5598
5599
if (isa<LoadInst>(I))
5600
return true;
5601
5602
// On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not
5603
// require fences beacuse we can use amswap_db.[w/d].
5604
Type *Ty = I->getOperand(0)->getType();
5605
if (isa<StoreInst>(I) && Ty->isIntegerTy()) {
5606
unsigned Size = Ty->getIntegerBitWidth();
5607
return (Size == 8 || Size == 16);
5608
}
5609
5610
return false;
5611
}
5612
5613
EVT LoongArchTargetLowering::getSetCCResultType(const DataLayout &DL,
5614
LLVMContext &Context,
5615
EVT VT) const {
5616
if (!VT.isVector())
5617
return getPointerTy(DL);
5618
return VT.changeVectorElementTypeToInteger();
5619
}
5620
5621
bool LoongArchTargetLowering::hasAndNot(SDValue Y) const {
5622
// TODO: Support vectors.
5623
return Y.getValueType().isScalarInteger() && !isa<ConstantSDNode>(Y);
5624
}
5625
5626
bool LoongArchTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5627
const CallInst &I,
5628
MachineFunction &MF,
5629
unsigned Intrinsic) const {
5630
switch (Intrinsic) {
5631
default:
5632
return false;
5633
case Intrinsic::loongarch_masked_atomicrmw_xchg_i32:
5634
case Intrinsic::loongarch_masked_atomicrmw_add_i32:
5635
case Intrinsic::loongarch_masked_atomicrmw_sub_i32:
5636
case Intrinsic::loongarch_masked_atomicrmw_nand_i32:
5637
Info.opc = ISD::INTRINSIC_W_CHAIN;
5638
Info.memVT = MVT::i32;
5639
Info.ptrVal = I.getArgOperand(0);
5640
Info.offset = 0;
5641
Info.align = Align(4);
5642
Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
5643
MachineMemOperand::MOVolatile;
5644
return true;
5645
// TODO: Add more Intrinsics later.
5646
}
5647
}
5648
5649
TargetLowering::AtomicExpansionKind
5650
LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
5651
// TODO: Add more AtomicRMWInst that needs to be extended.
5652
5653
// Since floating-point operation requires a non-trivial set of data
5654
// operations, use CmpXChg to expand.
5655
if (AI->isFloatingPointOperation() ||
5656
AI->getOperation() == AtomicRMWInst::UIncWrap ||
5657
AI->getOperation() == AtomicRMWInst::UDecWrap)
5658
return AtomicExpansionKind::CmpXChg;
5659
5660
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
5661
if (Size == 8 || Size == 16)
5662
return AtomicExpansionKind::MaskedIntrinsic;
5663
return AtomicExpansionKind::None;
5664
}
5665
5666
static Intrinsic::ID
5667
getIntrinsicForMaskedAtomicRMWBinOp(unsigned GRLen,
5668
AtomicRMWInst::BinOp BinOp) {
5669
if (GRLen == 64) {
5670
switch (BinOp) {
5671
default:
5672
llvm_unreachable("Unexpected AtomicRMW BinOp");
5673
case AtomicRMWInst::Xchg:
5674
return Intrinsic::loongarch_masked_atomicrmw_xchg_i64;
5675
case AtomicRMWInst::Add:
5676
return Intrinsic::loongarch_masked_atomicrmw_add_i64;
5677
case AtomicRMWInst::Sub:
5678
return Intrinsic::loongarch_masked_atomicrmw_sub_i64;
5679
case AtomicRMWInst::Nand:
5680
return Intrinsic::loongarch_masked_atomicrmw_nand_i64;
5681
case AtomicRMWInst::UMax:
5682
return Intrinsic::loongarch_masked_atomicrmw_umax_i64;
5683
case AtomicRMWInst::UMin:
5684
return Intrinsic::loongarch_masked_atomicrmw_umin_i64;
5685
case AtomicRMWInst::Max:
5686
return Intrinsic::loongarch_masked_atomicrmw_max_i64;
5687
case AtomicRMWInst::Min:
5688
return Intrinsic::loongarch_masked_atomicrmw_min_i64;
5689
// TODO: support other AtomicRMWInst.
5690
}
5691
}
5692
5693
if (GRLen == 32) {
5694
switch (BinOp) {
5695
default:
5696
llvm_unreachable("Unexpected AtomicRMW BinOp");
5697
case AtomicRMWInst::Xchg:
5698
return Intrinsic::loongarch_masked_atomicrmw_xchg_i32;
5699
case AtomicRMWInst::Add:
5700
return Intrinsic::loongarch_masked_atomicrmw_add_i32;
5701
case AtomicRMWInst::Sub:
5702
return Intrinsic::loongarch_masked_atomicrmw_sub_i32;
5703
case AtomicRMWInst::Nand:
5704
return Intrinsic::loongarch_masked_atomicrmw_nand_i32;
5705
// TODO: support other AtomicRMWInst.
5706
}
5707
}
5708
5709
llvm_unreachable("Unexpected GRLen\n");
5710
}
5711
5712
TargetLowering::AtomicExpansionKind
5713
LoongArchTargetLowering::shouldExpandAtomicCmpXchgInIR(
5714
AtomicCmpXchgInst *CI) const {
5715
unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
5716
if (Size == 8 || Size == 16)
5717
return AtomicExpansionKind::MaskedIntrinsic;
5718
return AtomicExpansionKind::None;
5719
}
5720
5721
Value *LoongArchTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
5722
IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
5723
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
5724
AtomicOrdering FailOrd = CI->getFailureOrdering();
5725
Value *FailureOrdering =
5726
Builder.getIntN(Subtarget.getGRLen(), static_cast<uint64_t>(FailOrd));
5727
5728
// TODO: Support cmpxchg on LA32.
5729
Intrinsic::ID CmpXchgIntrID = Intrinsic::loongarch_masked_cmpxchg_i64;
5730
CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
5731
NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
5732
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5733
Type *Tys[] = {AlignedAddr->getType()};
5734
Function *MaskedCmpXchg =
5735
Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
5736
Value *Result = Builder.CreateCall(
5737
MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, FailureOrdering});
5738
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5739
return Result;
5740
}
5741
5742
Value *LoongArchTargetLowering::emitMaskedAtomicRMWIntrinsic(
5743
IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
5744
Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
5745
// In the case of an atomicrmw xchg with a constant 0/-1 operand, replace
5746
// the atomic instruction with an AtomicRMWInst::And/Or with appropriate
5747
// mask, as this produces better code than the LL/SC loop emitted by
5748
// int_loongarch_masked_atomicrmw_xchg.
5749
if (AI->getOperation() == AtomicRMWInst::Xchg &&
5750
isa<ConstantInt>(AI->getValOperand())) {
5751
ConstantInt *CVal = cast<ConstantInt>(AI->getValOperand());
5752
if (CVal->isZero())
5753
return Builder.CreateAtomicRMW(AtomicRMWInst::And, AlignedAddr,
5754
Builder.CreateNot(Mask, "Inv_Mask"),
5755
AI->getAlign(), Ord);
5756
if (CVal->isMinusOne())
5757
return Builder.CreateAtomicRMW(AtomicRMWInst::Or, AlignedAddr, Mask,
5758
AI->getAlign(), Ord);
5759
}
5760
5761
unsigned GRLen = Subtarget.getGRLen();
5762
Value *Ordering =
5763
Builder.getIntN(GRLen, static_cast<uint64_t>(AI->getOrdering()));
5764
Type *Tys[] = {AlignedAddr->getType()};
5765
Function *LlwOpScwLoop = Intrinsic::getDeclaration(
5766
AI->getModule(),
5767
getIntrinsicForMaskedAtomicRMWBinOp(GRLen, AI->getOperation()), Tys);
5768
5769
if (GRLen == 64) {
5770
Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
5771
Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
5772
ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
5773
}
5774
5775
Value *Result;
5776
5777
// Must pass the shift amount needed to sign extend the loaded value prior
5778
// to performing a signed comparison for min/max. ShiftAmt is the number of
5779
// bits to shift the value into position. Pass GRLen-ShiftAmt-ValWidth, which
5780
// is the number of bits to left+right shift the value in order to
5781
// sign-extend.
5782
if (AI->getOperation() == AtomicRMWInst::Min ||
5783
AI->getOperation() == AtomicRMWInst::Max) {
5784
const DataLayout &DL = AI->getDataLayout();
5785
unsigned ValWidth =
5786
DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
5787
Value *SextShamt =
5788
Builder.CreateSub(Builder.getIntN(GRLen, GRLen - ValWidth), ShiftAmt);
5789
Result = Builder.CreateCall(LlwOpScwLoop,
5790
{AlignedAddr, Incr, Mask, SextShamt, Ordering});
5791
} else {
5792
Result =
5793
Builder.CreateCall(LlwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
5794
}
5795
5796
if (GRLen == 64)
5797
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
5798
return Result;
5799
}
5800
5801
bool LoongArchTargetLowering::isFMAFasterThanFMulAndFAdd(
5802
const MachineFunction &MF, EVT VT) const {
5803
VT = VT.getScalarType();
5804
5805
if (!VT.isSimple())
5806
return false;
5807
5808
switch (VT.getSimpleVT().SimpleTy) {
5809
case MVT::f32:
5810
case MVT::f64:
5811
return true;
5812
default:
5813
break;
5814
}
5815
5816
return false;
5817
}
5818
5819
Register LoongArchTargetLowering::getExceptionPointerRegister(
5820
const Constant *PersonalityFn) const {
5821
return LoongArch::R4;
5822
}
5823
5824
Register LoongArchTargetLowering::getExceptionSelectorRegister(
5825
const Constant *PersonalityFn) const {
5826
return LoongArch::R5;
5827
}
5828
5829
//===----------------------------------------------------------------------===//
5830
// LoongArch Inline Assembly Support
5831
//===----------------------------------------------------------------------===//
5832
5833
LoongArchTargetLowering::ConstraintType
5834
LoongArchTargetLowering::getConstraintType(StringRef Constraint) const {
5835
// LoongArch specific constraints in GCC: config/loongarch/constraints.md
5836
//
5837
// 'f': A floating-point register (if available).
5838
// 'k': A memory operand whose address is formed by a base register and
5839
// (optionally scaled) index register.
5840
// 'l': A signed 16-bit constant.
5841
// 'm': A memory operand whose address is formed by a base register and
5842
// offset that is suitable for use in instructions with the same
5843
// addressing mode as st.w and ld.w.
5844
// 'I': A signed 12-bit constant (for arithmetic instructions).
5845
// 'J': Integer zero.
5846
// 'K': An unsigned 12-bit constant (for logic instructions).
5847
// "ZB": An address that is held in a general-purpose register. The offset is
5848
// zero.
5849
// "ZC": A memory operand whose address is formed by a base register and
5850
// offset that is suitable for use in instructions with the same
5851
// addressing mode as ll.w and sc.w.
5852
if (Constraint.size() == 1) {
5853
switch (Constraint[0]) {
5854
default:
5855
break;
5856
case 'f':
5857
return C_RegisterClass;
5858
case 'l':
5859
case 'I':
5860
case 'J':
5861
case 'K':
5862
return C_Immediate;
5863
case 'k':
5864
return C_Memory;
5865
}
5866
}
5867
5868
if (Constraint == "ZC" || Constraint == "ZB")
5869
return C_Memory;
5870
5871
// 'm' is handled here.
5872
return TargetLowering::getConstraintType(Constraint);
5873
}
5874
5875
InlineAsm::ConstraintCode LoongArchTargetLowering::getInlineAsmMemConstraint(
5876
StringRef ConstraintCode) const {
5877
return StringSwitch<InlineAsm::ConstraintCode>(ConstraintCode)
5878
.Case("k", InlineAsm::ConstraintCode::k)
5879
.Case("ZB", InlineAsm::ConstraintCode::ZB)
5880
.Case("ZC", InlineAsm::ConstraintCode::ZC)
5881
.Default(TargetLowering::getInlineAsmMemConstraint(ConstraintCode));
5882
}
5883
5884
std::pair<unsigned, const TargetRegisterClass *>
5885
LoongArchTargetLowering::getRegForInlineAsmConstraint(
5886
const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5887
// First, see if this is a constraint that directly corresponds to a LoongArch
5888
// register class.
5889
if (Constraint.size() == 1) {
5890
switch (Constraint[0]) {
5891
case 'r':
5892
// TODO: Support fixed vectors up to GRLen?
5893
if (VT.isVector())
5894
break;
5895
return std::make_pair(0U, &LoongArch::GPRRegClass);
5896
case 'f':
5897
if (Subtarget.hasBasicF() && VT == MVT::f32)
5898
return std::make_pair(0U, &LoongArch::FPR32RegClass);
5899
if (Subtarget.hasBasicD() && VT == MVT::f64)
5900
return std::make_pair(0U, &LoongArch::FPR64RegClass);
5901
if (Subtarget.hasExtLSX() &&
5902
TRI->isTypeLegalForClass(LoongArch::LSX128RegClass, VT))
5903
return std::make_pair(0U, &LoongArch::LSX128RegClass);
5904
if (Subtarget.hasExtLASX() &&
5905
TRI->isTypeLegalForClass(LoongArch::LASX256RegClass, VT))
5906
return std::make_pair(0U, &LoongArch::LASX256RegClass);
5907
break;
5908
default:
5909
break;
5910
}
5911
}
5912
5913
// TargetLowering::getRegForInlineAsmConstraint uses the name of the TableGen
5914
// record (e.g. the "R0" in `def R0`) to choose registers for InlineAsm
5915
// constraints while the official register name is prefixed with a '$'. So we
5916
// clip the '$' from the original constraint string (e.g. {$r0} to {r0}.)
5917
// before it being parsed. And TargetLowering::getRegForInlineAsmConstraint is
5918
// case insensitive, so no need to convert the constraint to upper case here.
5919
//
5920
// For now, no need to support ABI names (e.g. `$a0`) as clang will correctly
5921
// decode the usage of register name aliases into their official names. And
5922
// AFAIK, the not yet upstreamed `rustc` for LoongArch will always use
5923
// official register names.
5924
if (Constraint.starts_with("{$r") || Constraint.starts_with("{$f") ||
5925
Constraint.starts_with("{$vr") || Constraint.starts_with("{$xr")) {
5926
bool IsFP = Constraint[2] == 'f';
5927
std::pair<StringRef, StringRef> Temp = Constraint.split('$');
5928
std::pair<unsigned, const TargetRegisterClass *> R;
5929
R = TargetLowering::getRegForInlineAsmConstraint(
5930
TRI, join_items("", Temp.first, Temp.second), VT);
5931
// Match those names to the widest floating point register type available.
5932
if (IsFP) {
5933
unsigned RegNo = R.first;
5934
if (LoongArch::F0 <= RegNo && RegNo <= LoongArch::F31) {
5935
if (Subtarget.hasBasicD() && (VT == MVT::f64 || VT == MVT::Other)) {
5936
unsigned DReg = RegNo - LoongArch::F0 + LoongArch::F0_64;
5937
return std::make_pair(DReg, &LoongArch::FPR64RegClass);
5938
}
5939
}
5940
}
5941
return R;
5942
}
5943
5944
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5945
}
5946
5947
void LoongArchTargetLowering::LowerAsmOperandForConstraint(
5948
SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
5949
SelectionDAG &DAG) const {
5950
// Currently only support length 1 constraints.
5951
if (Constraint.size() == 1) {
5952
switch (Constraint[0]) {
5953
case 'l':
5954
// Validate & create a 16-bit signed immediate operand.
5955
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5956
uint64_t CVal = C->getSExtValue();
5957
if (isInt<16>(CVal))
5958
Ops.push_back(
5959
DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5960
}
5961
return;
5962
case 'I':
5963
// Validate & create a 12-bit signed immediate operand.
5964
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5965
uint64_t CVal = C->getSExtValue();
5966
if (isInt<12>(CVal))
5967
Ops.push_back(
5968
DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5969
}
5970
return;
5971
case 'J':
5972
// Validate & create an integer zero operand.
5973
if (auto *C = dyn_cast<ConstantSDNode>(Op))
5974
if (C->getZExtValue() == 0)
5975
Ops.push_back(
5976
DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getGRLenVT()));
5977
return;
5978
case 'K':
5979
// Validate & create a 12-bit unsigned immediate operand.
5980
if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
5981
uint64_t CVal = C->getZExtValue();
5982
if (isUInt<12>(CVal))
5983
Ops.push_back(
5984
DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getGRLenVT()));
5985
}
5986
return;
5987
default:
5988
break;
5989
}
5990
}
5991
TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5992
}
5993
5994
#define GET_REGISTER_MATCHER
5995
#include "LoongArchGenAsmMatcher.inc"
5996
5997
Register
5998
LoongArchTargetLowering::getRegisterByName(const char *RegName, LLT VT,
5999
const MachineFunction &MF) const {
6000
std::pair<StringRef, StringRef> Name = StringRef(RegName).split('$');
6001
std::string NewRegName = Name.second.str();
6002
Register Reg = MatchRegisterAltName(NewRegName);
6003
if (Reg == LoongArch::NoRegister)
6004
Reg = MatchRegisterName(NewRegName);
6005
if (Reg == LoongArch::NoRegister)
6006
report_fatal_error(
6007
Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
6008
BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
6009
if (!ReservedRegs.test(Reg))
6010
report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
6011
StringRef(RegName) + "\"."));
6012
return Reg;
6013
}
6014
6015
bool LoongArchTargetLowering::decomposeMulByConstant(LLVMContext &Context,
6016
EVT VT, SDValue C) const {
6017
// TODO: Support vectors.
6018
if (!VT.isScalarInteger())
6019
return false;
6020
6021
// Omit the optimization if the data size exceeds GRLen.
6022
if (VT.getSizeInBits() > Subtarget.getGRLen())
6023
return false;
6024
6025
if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
6026
const APInt &Imm = ConstNode->getAPIntValue();
6027
// Break MUL into (SLLI + ADD/SUB) or ALSL.
6028
if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
6029
(1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
6030
return true;
6031
// Break MUL into (ALSL x, (SLLI x, imm0), imm1).
6032
if (ConstNode->hasOneUse() &&
6033
((Imm - 2).isPowerOf2() || (Imm - 4).isPowerOf2() ||
6034
(Imm - 8).isPowerOf2() || (Imm - 16).isPowerOf2()))
6035
return true;
6036
// Break (MUL x, imm) into (ADD (SLLI x, s0), (SLLI x, s1)),
6037
// in which the immediate has two set bits. Or Break (MUL x, imm)
6038
// into (SUB (SLLI x, s0), (SLLI x, s1)), in which the immediate
6039
// equals to (1 << s0) - (1 << s1).
6040
if (ConstNode->hasOneUse() && !(Imm.sge(-2048) && Imm.sle(4095))) {
6041
unsigned Shifts = Imm.countr_zero();
6042
// Reject immediates which can be composed via a single LUI.
6043
if (Shifts >= 12)
6044
return false;
6045
// Reject multiplications can be optimized to
6046
// (SLLI (ALSL x, x, 1/2/3/4), s).
6047
APInt ImmPop = Imm.ashr(Shifts);
6048
if (ImmPop == 3 || ImmPop == 5 || ImmPop == 9 || ImmPop == 17)
6049
return false;
6050
// We do not consider the case `(-Imm - ImmSmall).isPowerOf2()`,
6051
// since it needs one more instruction than other 3 cases.
6052
APInt ImmSmall = APInt(Imm.getBitWidth(), 1ULL << Shifts, true);
6053
if ((Imm - ImmSmall).isPowerOf2() || (Imm + ImmSmall).isPowerOf2() ||
6054
(ImmSmall - Imm).isPowerOf2())
6055
return true;
6056
}
6057
}
6058
6059
return false;
6060
}
6061
6062
bool LoongArchTargetLowering::isLegalAddressingMode(const DataLayout &DL,
6063
const AddrMode &AM,
6064
Type *Ty, unsigned AS,
6065
Instruction *I) const {
6066
// LoongArch has four basic addressing modes:
6067
// 1. reg
6068
// 2. reg + 12-bit signed offset
6069
// 3. reg + 14-bit signed offset left-shifted by 2
6070
// 4. reg1 + reg2
6071
// TODO: Add more checks after support vector extension.
6072
6073
// No global is ever allowed as a base.
6074
if (AM.BaseGV)
6075
return false;
6076
6077
// Require a 12-bit signed offset or 14-bit signed offset left-shifted by 2
6078
// with `UAL` feature.
6079
if (!isInt<12>(AM.BaseOffs) &&
6080
!(isShiftedInt<14, 2>(AM.BaseOffs) && Subtarget.hasUAL()))
6081
return false;
6082
6083
switch (AM.Scale) {
6084
case 0:
6085
// "r+i" or just "i", depending on HasBaseReg.
6086
break;
6087
case 1:
6088
// "r+r+i" is not allowed.
6089
if (AM.HasBaseReg && AM.BaseOffs)
6090
return false;
6091
// Otherwise we have "r+r" or "r+i".
6092
break;
6093
case 2:
6094
// "2*r+r" or "2*r+i" is not allowed.
6095
if (AM.HasBaseReg || AM.BaseOffs)
6096
return false;
6097
// Allow "2*r" as "r+r".
6098
break;
6099
default:
6100
return false;
6101
}
6102
6103
return true;
6104
}
6105
6106
bool LoongArchTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
6107
return isInt<12>(Imm);
6108
}
6109
6110
bool LoongArchTargetLowering::isLegalAddImmediate(int64_t Imm) const {
6111
return isInt<12>(Imm);
6112
}
6113
6114
bool LoongArchTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
6115
// Zexts are free if they can be combined with a load.
6116
// Don't advertise i32->i64 zextload as being free for LA64. It interacts
6117
// poorly with type legalization of compares preferring sext.
6118
if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
6119
EVT MemVT = LD->getMemoryVT();
6120
if ((MemVT == MVT::i8 || MemVT == MVT::i16) &&
6121
(LD->getExtensionType() == ISD::NON_EXTLOAD ||
6122
LD->getExtensionType() == ISD::ZEXTLOAD))
6123
return true;
6124
}
6125
6126
return TargetLowering::isZExtFree(Val, VT2);
6127
}
6128
6129
bool LoongArchTargetLowering::isSExtCheaperThanZExt(EVT SrcVT,
6130
EVT DstVT) const {
6131
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
6132
}
6133
6134
bool LoongArchTargetLowering::signExtendConstant(const ConstantInt *CI) const {
6135
return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
6136
}
6137
6138
bool LoongArchTargetLowering::hasAndNotCompare(SDValue Y) const {
6139
// TODO: Support vectors.
6140
if (Y.getValueType().isVector())
6141
return false;
6142
6143
return !isa<ConstantSDNode>(Y);
6144
}
6145
6146
ISD::NodeType LoongArchTargetLowering::getExtendForAtomicCmpSwapArg() const {
6147
// TODO: LAMCAS will use amcas{_DB,}.[bhwd] which does not require extension.
6148
return ISD::SIGN_EXTEND;
6149
}
6150
6151
bool LoongArchTargetLowering::shouldSignExtendTypeInLibCall(
6152
EVT Type, bool IsSigned) const {
6153
if (Subtarget.is64Bit() && Type == MVT::i32)
6154
return true;
6155
6156
return IsSigned;
6157
}
6158
6159
bool LoongArchTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
6160
// Return false to suppress the unnecessary extensions if the LibCall
6161
// arguments or return value is a float narrower than GRLEN on a soft FP ABI.
6162
if (Subtarget.isSoftFPABI() && (Type.isFloatingPoint() && !Type.isVector() &&
6163
Type.getSizeInBits() < Subtarget.getGRLen()))
6164
return false;
6165
return true;
6166
}
6167
6168