Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
35266 views
1
//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines an instruction selector for the ARM target.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#include "ARM.h"
14
#include "ARMBaseInstrInfo.h"
15
#include "ARMTargetMachine.h"
16
#include "MCTargetDesc/ARMAddressingModes.h"
17
#include "Utils/ARMBaseInfo.h"
18
#include "llvm/ADT/APSInt.h"
19
#include "llvm/ADT/StringSwitch.h"
20
#include "llvm/CodeGen/MachineFrameInfo.h"
21
#include "llvm/CodeGen/MachineFunction.h"
22
#include "llvm/CodeGen/MachineInstrBuilder.h"
23
#include "llvm/CodeGen/MachineRegisterInfo.h"
24
#include "llvm/CodeGen/SelectionDAG.h"
25
#include "llvm/CodeGen/SelectionDAGISel.h"
26
#include "llvm/CodeGen/TargetLowering.h"
27
#include "llvm/IR/CallingConv.h"
28
#include "llvm/IR/Constants.h"
29
#include "llvm/IR/DerivedTypes.h"
30
#include "llvm/IR/Function.h"
31
#include "llvm/IR/Intrinsics.h"
32
#include "llvm/IR/IntrinsicsARM.h"
33
#include "llvm/IR/LLVMContext.h"
34
#include "llvm/Support/CommandLine.h"
35
#include "llvm/Support/Debug.h"
36
#include "llvm/Support/ErrorHandling.h"
37
#include "llvm/Target/TargetOptions.h"
38
#include <optional>
39
40
using namespace llvm;
41
42
#define DEBUG_TYPE "arm-isel"
43
#define PASS_NAME "ARM Instruction Selection"
44
45
static cl::opt<bool>
46
DisableShifterOp("disable-shifter-op", cl::Hidden,
47
cl::desc("Disable isel of shifter-op"),
48
cl::init(false));
49
50
//===--------------------------------------------------------------------===//
51
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
52
/// instructions for SelectionDAG operations.
53
///
54
namespace {
55
56
class ARMDAGToDAGISel : public SelectionDAGISel {
57
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
58
/// make the right decision when generating code for different targets.
59
const ARMSubtarget *Subtarget;
60
61
public:
62
ARMDAGToDAGISel() = delete;
63
64
explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
65
: SelectionDAGISel(tm, OptLevel) {}
66
67
bool runOnMachineFunction(MachineFunction &MF) override {
68
// Reset the subtarget each time through.
69
Subtarget = &MF.getSubtarget<ARMSubtarget>();
70
SelectionDAGISel::runOnMachineFunction(MF);
71
return true;
72
}
73
74
void PreprocessISelDAG() override;
75
76
/// getI32Imm - Return a target constant of type i32 with the specified
77
/// value.
78
inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
79
return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
80
}
81
82
void Select(SDNode *N) override;
83
84
/// Return true as some complex patterns, like those that call
85
/// canExtractShiftFromMul can modify the DAG inplace.
86
bool ComplexPatternFuncMutatesDAG() const override { return true; }
87
88
bool hasNoVMLxHazardUse(SDNode *N) const;
89
bool isShifterOpProfitable(const SDValue &Shift,
90
ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
91
bool SelectRegShifterOperand(SDValue N, SDValue &A,
92
SDValue &B, SDValue &C,
93
bool CheckProfitability = true);
94
bool SelectImmShifterOperand(SDValue N, SDValue &A,
95
SDValue &B, bool CheckProfitability = true);
96
bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
97
SDValue &C) {
98
// Don't apply the profitability check
99
return SelectRegShifterOperand(N, A, B, C, false);
100
}
101
bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
102
// Don't apply the profitability check
103
return SelectImmShifterOperand(N, A, B, false);
104
}
105
bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
106
if (!N.hasOneUse())
107
return false;
108
return SelectImmShifterOperand(N, A, B, false);
109
}
110
111
bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
112
113
bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
114
bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
115
116
bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
117
const ConstantSDNode *CN = cast<ConstantSDNode>(N);
118
Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
119
Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
120
return true;
121
}
122
123
bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
124
SDValue &Offset, SDValue &Opc);
125
bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
126
SDValue &Offset, SDValue &Opc);
127
bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
128
SDValue &Offset, SDValue &Opc);
129
bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
130
bool SelectAddrMode3(SDValue N, SDValue &Base,
131
SDValue &Offset, SDValue &Opc);
132
bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
133
SDValue &Offset, SDValue &Opc);
134
bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
135
bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
136
bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
137
bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
138
bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
139
140
bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
141
142
// Thumb Addressing Modes:
143
bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
144
bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
145
bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
146
SDValue &OffImm);
147
bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
148
SDValue &OffImm);
149
bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
150
SDValue &OffImm);
151
bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
152
SDValue &OffImm);
153
bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
154
template <unsigned Shift>
155
bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
156
157
// Thumb 2 Addressing Modes:
158
bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
159
template <unsigned Shift>
160
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
161
bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
162
SDValue &OffImm);
163
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
164
SDValue &OffImm);
165
template <unsigned Shift>
166
bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
167
bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
168
unsigned Shift);
169
template <unsigned Shift>
170
bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
171
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
172
SDValue &OffReg, SDValue &ShImm);
173
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
174
175
template<int Min, int Max>
176
bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
177
178
inline bool is_so_imm(unsigned Imm) const {
179
return ARM_AM::getSOImmVal(Imm) != -1;
180
}
181
182
inline bool is_so_imm_not(unsigned Imm) const {
183
return ARM_AM::getSOImmVal(~Imm) != -1;
184
}
185
186
inline bool is_t2_so_imm(unsigned Imm) const {
187
return ARM_AM::getT2SOImmVal(Imm) != -1;
188
}
189
190
inline bool is_t2_so_imm_not(unsigned Imm) const {
191
return ARM_AM::getT2SOImmVal(~Imm) != -1;
192
}
193
194
// Include the pieces autogenerated from the target description.
195
#include "ARMGenDAGISel.inc"
196
197
private:
198
void transferMemOperands(SDNode *Src, SDNode *Dst);
199
200
/// Indexed (pre/post inc/dec) load matching code for ARM.
201
bool tryARMIndexedLoad(SDNode *N);
202
bool tryT1IndexedLoad(SDNode *N);
203
bool tryT2IndexedLoad(SDNode *N);
204
bool tryMVEIndexedLoad(SDNode *N);
205
bool tryFMULFixed(SDNode *N, SDLoc dl);
206
bool tryFP_TO_INT(SDNode *N, SDLoc dl);
207
bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
208
bool IsUnsigned,
209
bool FixedToFloat);
210
211
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
212
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
213
/// loads of D registers and even subregs and odd subregs of Q registers.
214
/// For NumVecs <= 2, QOpcodes1 is not used.
215
void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
216
const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
217
const uint16_t *QOpcodes1);
218
219
/// SelectVST - Select NEON store intrinsics. NumVecs should
220
/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
221
/// stores of D registers and even subregs and odd subregs of Q registers.
222
/// For NumVecs <= 2, QOpcodes1 is not used.
223
void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
224
const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
225
const uint16_t *QOpcodes1);
226
227
/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should
228
/// be 2, 3 or 4. The opcode arrays specify the instructions used for
229
/// load/store of D registers and Q registers.
230
void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
231
unsigned NumVecs, const uint16_t *DOpcodes,
232
const uint16_t *QOpcodes);
233
234
/// Helper functions for setting up clusters of MVE predication operands.
235
template <typename SDValueVector>
236
void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
237
SDValue PredicateMask);
238
template <typename SDValueVector>
239
void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
240
SDValue PredicateMask, SDValue Inactive);
241
242
template <typename SDValueVector>
243
void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
244
template <typename SDValueVector>
245
void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
246
247
/// SelectMVE_WB - Select MVE writeback load/store intrinsics.
248
void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
249
250
/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
251
void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
252
bool HasSaturationOperand);
253
254
/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
255
void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
256
uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
257
258
/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
259
/// vector lanes.
260
void SelectMVE_VSHLC(SDNode *N, bool Predicated);
261
262
/// Select long MVE vector reductions with two vector operands
263
/// Stride is the number of vector element widths the instruction can operate
264
/// on:
265
/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
266
/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
267
/// Stride is used when addressing the OpcodesS array which contains multiple
268
/// opcodes for each element width.
269
/// TySize is the index into the list of element types listed above
270
void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
271
const uint16_t *OpcodesS, const uint16_t *OpcodesU,
272
size_t Stride, size_t TySize);
273
274
/// Select a 64-bit MVE vector reduction with two vector operands
275
/// arm_mve_vmlldava_[predicated]
276
void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
277
const uint16_t *OpcodesU);
278
/// Select a 72-bit MVE vector rounding reduction with two vector operands
279
/// int_arm_mve_vrmlldavha[_predicated]
280
void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
281
const uint16_t *OpcodesU);
282
283
/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
284
/// should be 2 or 4. The opcode array specifies the instructions
285
/// used for 8, 16 and 32-bit lane sizes respectively, and each
286
/// pointer points to a set of NumVecs sub-opcodes used for the
287
/// different stages (e.g. VLD20 versus VLD21) of each load family.
288
void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
289
const uint16_t *const *Opcodes, bool HasWriteback);
290
291
/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
292
/// array of 3 elements for the 8, 16 and 32-bit lane sizes.
293
void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
294
bool Wrapping, bool Predicated);
295
296
/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
297
/// CX1DA, CX2D, CX2DA, CX3, CX3DA).
298
/// \arg \c NumExtraOps number of extra operands besides the coprocossor,
299
/// the accumulator and the immediate operand, i.e. 0
300
/// for CX1*, 1 for CX2*, 2 for CX3*
301
/// \arg \c HasAccum whether the instruction has an accumulator operand
302
void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
303
bool HasAccum);
304
305
/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs
306
/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used
307
/// for loading D registers.
308
void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
309
unsigned NumVecs, const uint16_t *DOpcodes,
310
const uint16_t *QOpcodes0 = nullptr,
311
const uint16_t *QOpcodes1 = nullptr);
312
313
/// Try to select SBFX/UBFX instructions for ARM.
314
bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
315
316
bool tryInsertVectorElt(SDNode *N);
317
318
// Select special operations if node forms integer ABS pattern
319
bool tryABSOp(SDNode *N);
320
321
bool tryReadRegister(SDNode *N);
322
bool tryWriteRegister(SDNode *N);
323
324
bool tryInlineAsm(SDNode *N);
325
326
void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
327
328
void SelectCMP_SWAP(SDNode *N);
329
330
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
331
/// inline asm expressions.
332
bool SelectInlineAsmMemoryOperand(const SDValue &Op,
333
InlineAsm::ConstraintCode ConstraintID,
334
std::vector<SDValue> &OutOps) override;
335
336
// Form pairs of consecutive R, S, D, or Q registers.
337
SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
338
SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
339
SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
340
SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
341
342
// Form sequences of 4 consecutive S, D, or Q registers.
343
SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
344
SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
345
SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
346
347
// Get the alignment operand for a NEON VLD or VST instruction.
348
SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
349
bool is64BitVector);
350
351
/// Checks if N is a multiplication by a constant where we can extract out a
352
/// power of two from the constant so that it can be used in a shift, but only
353
/// if it simplifies the materialization of the constant. Returns true if it
354
/// is, and assigns to PowerOfTwo the power of two that should be extracted
355
/// out and to NewMulConst the new constant to be multiplied by.
356
bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
357
unsigned &PowerOfTwo, SDValue &NewMulConst) const;
358
359
/// Replace N with M in CurDAG, in a way that also ensures that M gets
360
/// selected when N would have been selected.
361
void replaceDAGValue(const SDValue &N, SDValue M);
362
};
363
364
class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
365
public:
366
static char ID;
367
ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
368
: SelectionDAGISelLegacy(
369
ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
370
};
371
}
372
373
char ARMDAGToDAGISelLegacy::ID = 0;
374
375
INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
376
377
/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
378
/// operand. If so Imm will receive the 32-bit value.
379
static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
380
if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
381
Imm = N->getAsZExtVal();
382
return true;
383
}
384
return false;
385
}
386
387
// isInt32Immediate - This method tests to see if a constant operand.
388
// If so Imm will receive the 32 bit value.
389
static bool isInt32Immediate(SDValue N, unsigned &Imm) {
390
return isInt32Immediate(N.getNode(), Imm);
391
}
392
393
// isOpcWithIntImmediate - This method tests to see if the node is a specific
394
// opcode and that it has a immediate integer right operand.
395
// If so Imm will receive the 32 bit value.
396
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
397
return N->getOpcode() == Opc &&
398
isInt32Immediate(N->getOperand(1).getNode(), Imm);
399
}
400
401
/// Check whether a particular node is a constant value representable as
402
/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
403
///
404
/// \param ScaledConstant [out] - On success, the pre-scaled constant value.
405
static bool isScaledConstantInRange(SDValue Node, int Scale,
406
int RangeMin, int RangeMax,
407
int &ScaledConstant) {
408
assert(Scale > 0 && "Invalid scale!");
409
410
// Check that this is a constant.
411
const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
412
if (!C)
413
return false;
414
415
ScaledConstant = (int) C->getZExtValue();
416
if ((ScaledConstant % Scale) != 0)
417
return false;
418
419
ScaledConstant /= Scale;
420
return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
421
}
422
423
void ARMDAGToDAGISel::PreprocessISelDAG() {
424
if (!Subtarget->hasV6T2Ops())
425
return;
426
427
bool isThumb2 = Subtarget->isThumb();
428
// We use make_early_inc_range to avoid invalidation issues.
429
for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
430
if (N.getOpcode() != ISD::ADD)
431
continue;
432
433
// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
434
// leading zeros, followed by consecutive set bits, followed by 1 or 2
435
// trailing zeros, e.g. 1020.
436
// Transform the expression to
437
// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
438
// of trailing zeros of c2. The left shift would be folded as an shifter
439
// operand of 'add' and the 'and' and 'srl' would become a bits extraction
440
// node (UBFX).
441
442
SDValue N0 = N.getOperand(0);
443
SDValue N1 = N.getOperand(1);
444
unsigned And_imm = 0;
445
if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
446
if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
447
std::swap(N0, N1);
448
}
449
if (!And_imm)
450
continue;
451
452
// Check if the AND mask is an immediate of the form: 000.....1111111100
453
unsigned TZ = llvm::countr_zero(And_imm);
454
if (TZ != 1 && TZ != 2)
455
// Be conservative here. Shifter operands aren't always free. e.g. On
456
// Swift, left shifter operand of 1 / 2 for free but others are not.
457
// e.g.
458
// ubfx r3, r1, #16, #8
459
// ldr.w r3, [r0, r3, lsl #2]
460
// vs.
461
// mov.w r9, #1020
462
// and.w r2, r9, r1, lsr #14
463
// ldr r2, [r0, r2]
464
continue;
465
And_imm >>= TZ;
466
if (And_imm & (And_imm + 1))
467
continue;
468
469
// Look for (and (srl X, c1), c2).
470
SDValue Srl = N1.getOperand(0);
471
unsigned Srl_imm = 0;
472
if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
473
(Srl_imm <= 2))
474
continue;
475
476
// Make sure first operand is not a shifter operand which would prevent
477
// folding of the left shift.
478
SDValue CPTmp0;
479
SDValue CPTmp1;
480
SDValue CPTmp2;
481
if (isThumb2) {
482
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
483
continue;
484
} else {
485
if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
486
SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
487
continue;
488
}
489
490
// Now make the transformation.
491
Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
492
Srl.getOperand(0),
493
CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
494
MVT::i32));
495
N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
496
Srl,
497
CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
498
N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
499
N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
500
CurDAG->UpdateNodeOperands(&N, N0, N1);
501
}
502
}
503
504
/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
505
/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
506
/// least on current ARM implementations) which should be avoidded.
507
bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
508
if (OptLevel == CodeGenOptLevel::None)
509
return true;
510
511
if (!Subtarget->hasVMLxHazards())
512
return true;
513
514
if (!N->hasOneUse())
515
return false;
516
517
SDNode *Use = *N->use_begin();
518
if (Use->getOpcode() == ISD::CopyToReg)
519
return true;
520
if (Use->isMachineOpcode()) {
521
const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
522
CurDAG->getSubtarget().getInstrInfo());
523
524
const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
525
if (MCID.mayStore())
526
return true;
527
unsigned Opcode = MCID.getOpcode();
528
if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
529
return true;
530
// vmlx feeding into another vmlx. We actually want to unfold
531
// the use later in the MLxExpansion pass. e.g.
532
// vmla
533
// vmla (stall 8 cycles)
534
//
535
// vmul (5 cycles)
536
// vadd (5 cycles)
537
// vmla
538
// This adds up to about 18 - 19 cycles.
539
//
540
// vmla
541
// vmul (stall 4 cycles)
542
// vadd adds up to about 14 cycles.
543
return TII->isFpMLxInstruction(Opcode);
544
}
545
546
return false;
547
}
548
549
bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
550
ARM_AM::ShiftOpc ShOpcVal,
551
unsigned ShAmt) {
552
if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
553
return true;
554
if (Shift.hasOneUse())
555
return true;
556
// R << 2 is free.
557
return ShOpcVal == ARM_AM::lsl &&
558
(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
559
}
560
561
bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
562
unsigned MaxShift,
563
unsigned &PowerOfTwo,
564
SDValue &NewMulConst) const {
565
assert(N.getOpcode() == ISD::MUL);
566
assert(MaxShift > 0);
567
568
// If the multiply is used in more than one place then changing the constant
569
// will make other uses incorrect, so don't.
570
if (!N.hasOneUse()) return false;
571
// Check if the multiply is by a constant
572
ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
573
if (!MulConst) return false;
574
// If the constant is used in more than one place then modifying it will mean
575
// we need to materialize two constants instead of one, which is a bad idea.
576
if (!MulConst->hasOneUse()) return false;
577
unsigned MulConstVal = MulConst->getZExtValue();
578
if (MulConstVal == 0) return false;
579
580
// Find the largest power of 2 that MulConstVal is a multiple of
581
PowerOfTwo = MaxShift;
582
while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
583
--PowerOfTwo;
584
if (PowerOfTwo == 0) return false;
585
}
586
587
// Only optimise if the new cost is better
588
unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
589
NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
590
unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
591
unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
592
return NewCost < OldCost;
593
}
594
595
void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
596
CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
597
ReplaceUses(N, M);
598
}
599
600
bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
601
SDValue &BaseReg,
602
SDValue &Opc,
603
bool CheckProfitability) {
604
if (DisableShifterOp)
605
return false;
606
607
// If N is a multiply-by-constant and it's profitable to extract a shift and
608
// use it in a shifted operand do so.
609
if (N.getOpcode() == ISD::MUL) {
610
unsigned PowerOfTwo = 0;
611
SDValue NewMulConst;
612
if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
613
HandleSDNode Handle(N);
614
SDLoc Loc(N);
615
replaceDAGValue(N.getOperand(1), NewMulConst);
616
BaseReg = Handle.getValue();
617
Opc = CurDAG->getTargetConstant(
618
ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
619
return true;
620
}
621
}
622
623
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
624
625
// Don't match base register only case. That is matched to a separate
626
// lower complexity pattern with explicit register operand.
627
if (ShOpcVal == ARM_AM::no_shift) return false;
628
629
BaseReg = N.getOperand(0);
630
unsigned ShImmVal = 0;
631
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
632
if (!RHS) return false;
633
ShImmVal = RHS->getZExtValue() & 31;
634
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
635
SDLoc(N), MVT::i32);
636
return true;
637
}
638
639
bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
640
SDValue &BaseReg,
641
SDValue &ShReg,
642
SDValue &Opc,
643
bool CheckProfitability) {
644
if (DisableShifterOp)
645
return false;
646
647
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
648
649
// Don't match base register only case. That is matched to a separate
650
// lower complexity pattern with explicit register operand.
651
if (ShOpcVal == ARM_AM::no_shift) return false;
652
653
BaseReg = N.getOperand(0);
654
unsigned ShImmVal = 0;
655
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
656
if (RHS) return false;
657
658
ShReg = N.getOperand(1);
659
if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
660
return false;
661
Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
662
SDLoc(N), MVT::i32);
663
return true;
664
}
665
666
// Determine whether an ISD::OR's operands are suitable to turn the operation
667
// into an addition, which often has more compact encodings.
668
bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
669
assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
670
Out = N;
671
return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
672
}
673
674
675
bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
676
SDValue &Base,
677
SDValue &OffImm) {
678
// Match simple R + imm12 operands.
679
680
// Base only.
681
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
682
!CurDAG->isBaseWithConstantOffset(N)) {
683
if (N.getOpcode() == ISD::FrameIndex) {
684
// Match frame index.
685
int FI = cast<FrameIndexSDNode>(N)->getIndex();
686
Base = CurDAG->getTargetFrameIndex(
687
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
688
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
689
return true;
690
}
691
692
if (N.getOpcode() == ARMISD::Wrapper &&
693
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
694
N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
695
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
696
Base = N.getOperand(0);
697
} else
698
Base = N;
699
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
700
return true;
701
}
702
703
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
704
int RHSC = (int)RHS->getSExtValue();
705
if (N.getOpcode() == ISD::SUB)
706
RHSC = -RHSC;
707
708
if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
709
Base = N.getOperand(0);
710
if (Base.getOpcode() == ISD::FrameIndex) {
711
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
712
Base = CurDAG->getTargetFrameIndex(
713
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
714
}
715
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
716
return true;
717
}
718
}
719
720
// Base only.
721
Base = N;
722
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
723
return true;
724
}
725
726
727
728
bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
729
SDValue &Opc) {
730
if (N.getOpcode() == ISD::MUL &&
731
((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
732
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
733
// X * [3,5,9] -> X + X * [2,4,8] etc.
734
int RHSC = (int)RHS->getZExtValue();
735
if (RHSC & 1) {
736
RHSC = RHSC & ~1;
737
ARM_AM::AddrOpc AddSub = ARM_AM::add;
738
if (RHSC < 0) {
739
AddSub = ARM_AM::sub;
740
RHSC = - RHSC;
741
}
742
if (isPowerOf2_32(RHSC)) {
743
unsigned ShAmt = Log2_32(RHSC);
744
Base = Offset = N.getOperand(0);
745
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
746
ARM_AM::lsl),
747
SDLoc(N), MVT::i32);
748
return true;
749
}
750
}
751
}
752
}
753
754
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
755
// ISD::OR that is equivalent to an ISD::ADD.
756
!CurDAG->isBaseWithConstantOffset(N))
757
return false;
758
759
// Leave simple R +/- imm12 operands for LDRi12
760
if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
761
int RHSC;
762
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
763
-0x1000+1, 0x1000, RHSC)) // 12 bits.
764
return false;
765
}
766
767
// Otherwise this is R +/- [possibly shifted] R.
768
ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
769
ARM_AM::ShiftOpc ShOpcVal =
770
ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
771
unsigned ShAmt = 0;
772
773
Base = N.getOperand(0);
774
Offset = N.getOperand(1);
775
776
if (ShOpcVal != ARM_AM::no_shift) {
777
// Check to see if the RHS of the shift is a constant, if not, we can't fold
778
// it.
779
if (ConstantSDNode *Sh =
780
dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
781
ShAmt = Sh->getZExtValue();
782
if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
783
Offset = N.getOperand(1).getOperand(0);
784
else {
785
ShAmt = 0;
786
ShOpcVal = ARM_AM::no_shift;
787
}
788
} else {
789
ShOpcVal = ARM_AM::no_shift;
790
}
791
}
792
793
// Try matching (R shl C) + (R).
794
if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
795
!(Subtarget->isLikeA9() || Subtarget->isSwift() ||
796
N.getOperand(0).hasOneUse())) {
797
ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
798
if (ShOpcVal != ARM_AM::no_shift) {
799
// Check to see if the RHS of the shift is a constant, if not, we can't
800
// fold it.
801
if (ConstantSDNode *Sh =
802
dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
803
ShAmt = Sh->getZExtValue();
804
if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
805
Offset = N.getOperand(0).getOperand(0);
806
Base = N.getOperand(1);
807
} else {
808
ShAmt = 0;
809
ShOpcVal = ARM_AM::no_shift;
810
}
811
} else {
812
ShOpcVal = ARM_AM::no_shift;
813
}
814
}
815
}
816
817
// If Offset is a multiply-by-constant and it's profitable to extract a shift
818
// and use it in a shifted operand do so.
819
if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
820
unsigned PowerOfTwo = 0;
821
SDValue NewMulConst;
822
if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
823
HandleSDNode Handle(Offset);
824
replaceDAGValue(Offset.getOperand(1), NewMulConst);
825
Offset = Handle.getValue();
826
ShAmt = PowerOfTwo;
827
ShOpcVal = ARM_AM::lsl;
828
}
829
}
830
831
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
832
SDLoc(N), MVT::i32);
833
return true;
834
}
835
836
bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
837
SDValue &Offset, SDValue &Opc) {
838
unsigned Opcode = Op->getOpcode();
839
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
840
? cast<LoadSDNode>(Op)->getAddressingMode()
841
: cast<StoreSDNode>(Op)->getAddressingMode();
842
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
843
? ARM_AM::add : ARM_AM::sub;
844
int Val;
845
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
846
return false;
847
848
Offset = N;
849
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
850
unsigned ShAmt = 0;
851
if (ShOpcVal != ARM_AM::no_shift) {
852
// Check to see if the RHS of the shift is a constant, if not, we can't fold
853
// it.
854
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
855
ShAmt = Sh->getZExtValue();
856
if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
857
Offset = N.getOperand(0);
858
else {
859
ShAmt = 0;
860
ShOpcVal = ARM_AM::no_shift;
861
}
862
} else {
863
ShOpcVal = ARM_AM::no_shift;
864
}
865
}
866
867
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
868
SDLoc(N), MVT::i32);
869
return true;
870
}
871
872
bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
873
SDValue &Offset, SDValue &Opc) {
874
unsigned Opcode = Op->getOpcode();
875
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
876
? cast<LoadSDNode>(Op)->getAddressingMode()
877
: cast<StoreSDNode>(Op)->getAddressingMode();
878
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
879
? ARM_AM::add : ARM_AM::sub;
880
int Val;
881
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
882
if (AddSub == ARM_AM::sub) Val *= -1;
883
Offset = CurDAG->getRegister(0, MVT::i32);
884
Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
885
return true;
886
}
887
888
return false;
889
}
890
891
892
bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
893
SDValue &Offset, SDValue &Opc) {
894
unsigned Opcode = Op->getOpcode();
895
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
896
? cast<LoadSDNode>(Op)->getAddressingMode()
897
: cast<StoreSDNode>(Op)->getAddressingMode();
898
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
899
? ARM_AM::add : ARM_AM::sub;
900
int Val;
901
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
902
Offset = CurDAG->getRegister(0, MVT::i32);
903
Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
904
ARM_AM::no_shift),
905
SDLoc(Op), MVT::i32);
906
return true;
907
}
908
909
return false;
910
}
911
912
bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
913
Base = N;
914
return true;
915
}
916
917
bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
918
SDValue &Base, SDValue &Offset,
919
SDValue &Opc) {
920
if (N.getOpcode() == ISD::SUB) {
921
// X - C is canonicalize to X + -C, no need to handle it here.
922
Base = N.getOperand(0);
923
Offset = N.getOperand(1);
924
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
925
MVT::i32);
926
return true;
927
}
928
929
if (!CurDAG->isBaseWithConstantOffset(N)) {
930
Base = N;
931
if (N.getOpcode() == ISD::FrameIndex) {
932
int FI = cast<FrameIndexSDNode>(N)->getIndex();
933
Base = CurDAG->getTargetFrameIndex(
934
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
935
}
936
Offset = CurDAG->getRegister(0, MVT::i32);
937
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
938
MVT::i32);
939
return true;
940
}
941
942
// If the RHS is +/- imm8, fold into addr mode.
943
int RHSC;
944
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
945
-256 + 1, 256, RHSC)) { // 8 bits.
946
Base = N.getOperand(0);
947
if (Base.getOpcode() == ISD::FrameIndex) {
948
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
949
Base = CurDAG->getTargetFrameIndex(
950
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
951
}
952
Offset = CurDAG->getRegister(0, MVT::i32);
953
954
ARM_AM::AddrOpc AddSub = ARM_AM::add;
955
if (RHSC < 0) {
956
AddSub = ARM_AM::sub;
957
RHSC = -RHSC;
958
}
959
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
960
MVT::i32);
961
return true;
962
}
963
964
Base = N.getOperand(0);
965
Offset = N.getOperand(1);
966
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
967
MVT::i32);
968
return true;
969
}
970
971
bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
972
SDValue &Offset, SDValue &Opc) {
973
unsigned Opcode = Op->getOpcode();
974
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
975
? cast<LoadSDNode>(Op)->getAddressingMode()
976
: cast<StoreSDNode>(Op)->getAddressingMode();
977
ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
978
? ARM_AM::add : ARM_AM::sub;
979
int Val;
980
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
981
Offset = CurDAG->getRegister(0, MVT::i32);
982
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
983
MVT::i32);
984
return true;
985
}
986
987
Offset = N;
988
Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
989
MVT::i32);
990
return true;
991
}
992
993
bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
994
bool FP16) {
995
if (!CurDAG->isBaseWithConstantOffset(N)) {
996
Base = N;
997
if (N.getOpcode() == ISD::FrameIndex) {
998
int FI = cast<FrameIndexSDNode>(N)->getIndex();
999
Base = CurDAG->getTargetFrameIndex(
1000
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1001
} else if (N.getOpcode() == ARMISD::Wrapper &&
1002
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1003
N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1004
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1005
Base = N.getOperand(0);
1006
}
1007
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1008
SDLoc(N), MVT::i32);
1009
return true;
1010
}
1011
1012
// If the RHS is +/- imm8, fold into addr mode.
1013
int RHSC;
1014
const int Scale = FP16 ? 2 : 4;
1015
1016
if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1017
Base = N.getOperand(0);
1018
if (Base.getOpcode() == ISD::FrameIndex) {
1019
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1020
Base = CurDAG->getTargetFrameIndex(
1021
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1022
}
1023
1024
ARM_AM::AddrOpc AddSub = ARM_AM::add;
1025
if (RHSC < 0) {
1026
AddSub = ARM_AM::sub;
1027
RHSC = -RHSC;
1028
}
1029
1030
if (FP16)
1031
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1032
SDLoc(N), MVT::i32);
1033
else
1034
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1035
SDLoc(N), MVT::i32);
1036
1037
return true;
1038
}
1039
1040
Base = N;
1041
1042
if (FP16)
1043
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1044
SDLoc(N), MVT::i32);
1045
else
1046
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1047
SDLoc(N), MVT::i32);
1048
1049
return true;
1050
}
1051
1052
bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1053
SDValue &Base, SDValue &Offset) {
1054
return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1055
}
1056
1057
bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1058
SDValue &Base, SDValue &Offset) {
1059
return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1060
}
1061
1062
bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1063
SDValue &Align) {
1064
Addr = N;
1065
1066
unsigned Alignment = 0;
1067
1068
MemSDNode *MemN = cast<MemSDNode>(Parent);
1069
1070
if (isa<LSBaseSDNode>(MemN) ||
1071
((MemN->getOpcode() == ARMISD::VST1_UPD ||
1072
MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1073
MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1074
// This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1075
// The maximum alignment is equal to the memory size being referenced.
1076
llvm::Align MMOAlign = MemN->getAlign();
1077
unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1078
if (MMOAlign.value() >= MemSize && MemSize > 1)
1079
Alignment = MemSize;
1080
} else {
1081
// All other uses of addrmode6 are for intrinsics. For now just record
1082
// the raw alignment value; it will be refined later based on the legal
1083
// alignment operands for the intrinsic.
1084
Alignment = MemN->getAlign().value();
1085
}
1086
1087
Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1088
return true;
1089
}
1090
1091
bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1092
SDValue &Offset) {
1093
LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1094
ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1095
if (AM != ISD::POST_INC)
1096
return false;
1097
Offset = N;
1098
if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1099
if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1100
Offset = CurDAG->getRegister(0, MVT::i32);
1101
}
1102
return true;
1103
}
1104
1105
bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1106
SDValue &Offset, SDValue &Label) {
1107
if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1108
Offset = N.getOperand(0);
1109
SDValue N1 = N.getOperand(1);
1110
Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1111
return true;
1112
}
1113
1114
return false;
1115
}
1116
1117
1118
//===----------------------------------------------------------------------===//
1119
// Thumb Addressing Modes
1120
//===----------------------------------------------------------------------===//
1121
1122
static bool shouldUseZeroOffsetLdSt(SDValue N) {
1123
// Negative numbers are difficult to materialise in thumb1. If we are
1124
// selecting the add of a negative, instead try to select ri with a zero
1125
// offset, so create the add node directly which will become a sub.
1126
if (N.getOpcode() != ISD::ADD)
1127
return false;
1128
1129
// Look for an imm which is not legal for ld/st, but is legal for sub.
1130
if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1131
return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1132
1133
return false;
1134
}
1135
1136
bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1137
SDValue &Offset) {
1138
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1139
if (!isNullConstant(N))
1140
return false;
1141
1142
Base = Offset = N;
1143
return true;
1144
}
1145
1146
Base = N.getOperand(0);
1147
Offset = N.getOperand(1);
1148
return true;
1149
}
1150
1151
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1152
SDValue &Offset) {
1153
if (shouldUseZeroOffsetLdSt(N))
1154
return false; // Select ri instead
1155
return SelectThumbAddrModeRRSext(N, Base, Offset);
1156
}
1157
1158
bool
1159
ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1160
SDValue &Base, SDValue &OffImm) {
1161
if (shouldUseZeroOffsetLdSt(N)) {
1162
Base = N;
1163
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1164
return true;
1165
}
1166
1167
if (!CurDAG->isBaseWithConstantOffset(N)) {
1168
if (N.getOpcode() == ISD::ADD) {
1169
return false; // We want to select register offset instead
1170
} else if (N.getOpcode() == ARMISD::Wrapper &&
1171
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1172
N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1173
N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1174
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1175
Base = N.getOperand(0);
1176
} else {
1177
Base = N;
1178
}
1179
1180
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1181
return true;
1182
}
1183
1184
// If the RHS is + imm5 * scale, fold into addr mode.
1185
int RHSC;
1186
if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1187
Base = N.getOperand(0);
1188
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1189
return true;
1190
}
1191
1192
// Offset is too large, so use register offset instead.
1193
return false;
1194
}
1195
1196
bool
1197
ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1198
SDValue &OffImm) {
1199
return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1200
}
1201
1202
bool
1203
ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1204
SDValue &OffImm) {
1205
return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1206
}
1207
1208
bool
1209
ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1210
SDValue &OffImm) {
1211
return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1212
}
1213
1214
bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1215
SDValue &Base, SDValue &OffImm) {
1216
if (N.getOpcode() == ISD::FrameIndex) {
1217
int FI = cast<FrameIndexSDNode>(N)->getIndex();
1218
// Only multiples of 4 are allowed for the offset, so the frame object
1219
// alignment must be at least 4.
1220
MachineFrameInfo &MFI = MF->getFrameInfo();
1221
if (MFI.getObjectAlign(FI) < Align(4))
1222
MFI.setObjectAlignment(FI, Align(4));
1223
Base = CurDAG->getTargetFrameIndex(
1224
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1225
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1226
return true;
1227
}
1228
1229
if (!CurDAG->isBaseWithConstantOffset(N))
1230
return false;
1231
1232
if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1233
// If the RHS is + imm8 * scale, fold into addr mode.
1234
int RHSC;
1235
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1236
Base = N.getOperand(0);
1237
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1238
// Make sure the offset is inside the object, or we might fail to
1239
// allocate an emergency spill slot. (An out-of-range access is UB, but
1240
// it could show up anyway.)
1241
MachineFrameInfo &MFI = MF->getFrameInfo();
1242
if (RHSC * 4 < MFI.getObjectSize(FI)) {
1243
// For LHS+RHS to result in an offset that's a multiple of 4 the object
1244
// indexed by the LHS must be 4-byte aligned.
1245
if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1246
MFI.setObjectAlignment(FI, Align(4));
1247
if (MFI.getObjectAlign(FI) >= Align(4)) {
1248
Base = CurDAG->getTargetFrameIndex(
1249
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1250
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1251
return true;
1252
}
1253
}
1254
}
1255
}
1256
1257
return false;
1258
}
1259
1260
template <unsigned Shift>
1261
bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1262
SDValue &OffImm) {
1263
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1264
int RHSC;
1265
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1266
RHSC)) {
1267
Base = N.getOperand(0);
1268
if (N.getOpcode() == ISD::SUB)
1269
RHSC = -RHSC;
1270
OffImm =
1271
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1272
return true;
1273
}
1274
}
1275
1276
// Base only.
1277
Base = N;
1278
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1279
return true;
1280
}
1281
1282
1283
//===----------------------------------------------------------------------===//
1284
// Thumb 2 Addressing Modes
1285
//===----------------------------------------------------------------------===//
1286
1287
1288
bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1289
SDValue &Base, SDValue &OffImm) {
1290
// Match simple R + imm12 operands.
1291
1292
// Base only.
1293
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1294
!CurDAG->isBaseWithConstantOffset(N)) {
1295
if (N.getOpcode() == ISD::FrameIndex) {
1296
// Match frame index.
1297
int FI = cast<FrameIndexSDNode>(N)->getIndex();
1298
Base = CurDAG->getTargetFrameIndex(
1299
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1300
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1301
return true;
1302
}
1303
1304
if (N.getOpcode() == ARMISD::Wrapper &&
1305
N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1306
N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1307
N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1308
Base = N.getOperand(0);
1309
if (Base.getOpcode() == ISD::TargetConstantPool)
1310
return false; // We want to select t2LDRpci instead.
1311
} else
1312
Base = N;
1313
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1314
return true;
1315
}
1316
1317
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1318
if (SelectT2AddrModeImm8(N, Base, OffImm))
1319
// Let t2LDRi8 handle (R - imm8).
1320
return false;
1321
1322
int RHSC = (int)RHS->getZExtValue();
1323
if (N.getOpcode() == ISD::SUB)
1324
RHSC = -RHSC;
1325
1326
if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1327
Base = N.getOperand(0);
1328
if (Base.getOpcode() == ISD::FrameIndex) {
1329
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1330
Base = CurDAG->getTargetFrameIndex(
1331
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1332
}
1333
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1334
return true;
1335
}
1336
}
1337
1338
// Base only.
1339
Base = N;
1340
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1341
return true;
1342
}
1343
1344
template <unsigned Shift>
1345
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1346
SDValue &OffImm) {
1347
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1348
int RHSC;
1349
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1350
Base = N.getOperand(0);
1351
if (Base.getOpcode() == ISD::FrameIndex) {
1352
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1353
Base = CurDAG->getTargetFrameIndex(
1354
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1355
}
1356
1357
if (N.getOpcode() == ISD::SUB)
1358
RHSC = -RHSC;
1359
OffImm =
1360
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1361
return true;
1362
}
1363
}
1364
1365
// Base only.
1366
Base = N;
1367
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1368
return true;
1369
}
1370
1371
bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1372
SDValue &Base, SDValue &OffImm) {
1373
// Match simple R - imm8 operands.
1374
if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1375
!CurDAG->isBaseWithConstantOffset(N))
1376
return false;
1377
1378
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1379
int RHSC = (int)RHS->getSExtValue();
1380
if (N.getOpcode() == ISD::SUB)
1381
RHSC = -RHSC;
1382
1383
if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1384
Base = N.getOperand(0);
1385
if (Base.getOpcode() == ISD::FrameIndex) {
1386
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1387
Base = CurDAG->getTargetFrameIndex(
1388
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1389
}
1390
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
1391
return true;
1392
}
1393
}
1394
1395
return false;
1396
}
1397
1398
bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1399
SDValue &OffImm){
1400
unsigned Opcode = Op->getOpcode();
1401
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1402
? cast<LoadSDNode>(Op)->getAddressingMode()
1403
: cast<StoreSDNode>(Op)->getAddressingMode();
1404
int RHSC;
1405
if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1406
OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1407
? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
1408
: CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1409
return true;
1410
}
1411
1412
return false;
1413
}
1414
1415
template <unsigned Shift>
1416
bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1417
SDValue &OffImm) {
1418
if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1419
int RHSC;
1420
if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1421
RHSC)) {
1422
Base = N.getOperand(0);
1423
if (Base.getOpcode() == ISD::FrameIndex) {
1424
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1425
Base = CurDAG->getTargetFrameIndex(
1426
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1427
}
1428
1429
if (N.getOpcode() == ISD::SUB)
1430
RHSC = -RHSC;
1431
OffImm =
1432
CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);
1433
return true;
1434
}
1435
}
1436
1437
// Base only.
1438
Base = N;
1439
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1440
return true;
1441
}
1442
1443
template <unsigned Shift>
1444
bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1445
SDValue &OffImm) {
1446
return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1447
}
1448
1449
bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1450
SDValue &OffImm,
1451
unsigned Shift) {
1452
unsigned Opcode = Op->getOpcode();
1453
ISD::MemIndexedMode AM;
1454
switch (Opcode) {
1455
case ISD::LOAD:
1456
AM = cast<LoadSDNode>(Op)->getAddressingMode();
1457
break;
1458
case ISD::STORE:
1459
AM = cast<StoreSDNode>(Op)->getAddressingMode();
1460
break;
1461
case ISD::MLOAD:
1462
AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1463
break;
1464
case ISD::MSTORE:
1465
AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1466
break;
1467
default:
1468
llvm_unreachable("Unexpected Opcode for Imm7Offset");
1469
}
1470
1471
int RHSC;
1472
// 7 bit constant, shifted by Shift.
1473
if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1474
OffImm =
1475
((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1476
? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
1477
: CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
1478
MVT::i32);
1479
return true;
1480
}
1481
return false;
1482
}
1483
1484
template <int Min, int Max>
1485
bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1486
int Val;
1487
if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1488
OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);
1489
return true;
1490
}
1491
return false;
1492
}
1493
1494
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1495
SDValue &Base,
1496
SDValue &OffReg, SDValue &ShImm) {
1497
// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1498
if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1499
return false;
1500
1501
// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1502
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1503
int RHSC = (int)RHS->getZExtValue();
1504
if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1505
return false;
1506
else if (RHSC < 0 && RHSC >= -255) // 8 bits
1507
return false;
1508
}
1509
1510
// Look for (R + R) or (R + (R << [1,2,3])).
1511
unsigned ShAmt = 0;
1512
Base = N.getOperand(0);
1513
OffReg = N.getOperand(1);
1514
1515
// Swap if it is ((R << c) + R).
1516
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1517
if (ShOpcVal != ARM_AM::lsl) {
1518
ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1519
if (ShOpcVal == ARM_AM::lsl)
1520
std::swap(Base, OffReg);
1521
}
1522
1523
if (ShOpcVal == ARM_AM::lsl) {
1524
// Check to see if the RHS of the shift is a constant, if not, we can't fold
1525
// it.
1526
if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1527
ShAmt = Sh->getZExtValue();
1528
if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1529
OffReg = OffReg.getOperand(0);
1530
else {
1531
ShAmt = 0;
1532
}
1533
}
1534
}
1535
1536
// If OffReg is a multiply-by-constant and it's profitable to extract a shift
1537
// and use it in a shifted operand do so.
1538
if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1539
unsigned PowerOfTwo = 0;
1540
SDValue NewMulConst;
1541
if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1542
HandleSDNode Handle(OffReg);
1543
replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1544
OffReg = Handle.getValue();
1545
ShAmt = PowerOfTwo;
1546
}
1547
}
1548
1549
ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1550
1551
return true;
1552
}
1553
1554
bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1555
SDValue &OffImm) {
1556
// This *must* succeed since it's used for the irreplaceable ldrex and strex
1557
// instructions.
1558
Base = N;
1559
OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1560
1561
if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1562
return true;
1563
1564
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1565
if (!RHS)
1566
return true;
1567
1568
uint32_t RHSC = (int)RHS->getZExtValue();
1569
if (RHSC > 1020 || RHSC % 4 != 0)
1570
return true;
1571
1572
Base = N.getOperand(0);
1573
if (Base.getOpcode() == ISD::FrameIndex) {
1574
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1575
Base = CurDAG->getTargetFrameIndex(
1576
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1577
}
1578
1579
OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1580
return true;
1581
}
1582
1583
//===--------------------------------------------------------------------===//
1584
1585
/// getAL - Returns a ARMCC::AL immediate node.
1586
static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1587
return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1588
}
1589
1590
void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1591
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1592
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1593
}
1594
1595
bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1596
LoadSDNode *LD = cast<LoadSDNode>(N);
1597
ISD::MemIndexedMode AM = LD->getAddressingMode();
1598
if (AM == ISD::UNINDEXED)
1599
return false;
1600
1601
EVT LoadedVT = LD->getMemoryVT();
1602
SDValue Offset, AMOpc;
1603
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1604
unsigned Opcode = 0;
1605
bool Match = false;
1606
if (LoadedVT == MVT::i32 && isPre &&
1607
SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1608
Opcode = ARM::LDR_PRE_IMM;
1609
Match = true;
1610
} else if (LoadedVT == MVT::i32 && !isPre &&
1611
SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1612
Opcode = ARM::LDR_POST_IMM;
1613
Match = true;
1614
} else if (LoadedVT == MVT::i32 &&
1615
SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1616
Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1617
Match = true;
1618
1619
} else if (LoadedVT == MVT::i16 &&
1620
SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1621
Match = true;
1622
Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1623
? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1624
: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1625
} else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1626
if (LD->getExtensionType() == ISD::SEXTLOAD) {
1627
if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1628
Match = true;
1629
Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1630
}
1631
} else {
1632
if (isPre &&
1633
SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1634
Match = true;
1635
Opcode = ARM::LDRB_PRE_IMM;
1636
} else if (!isPre &&
1637
SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1638
Match = true;
1639
Opcode = ARM::LDRB_POST_IMM;
1640
} else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1641
Match = true;
1642
Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1643
}
1644
}
1645
}
1646
1647
if (Match) {
1648
if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1649
SDValue Chain = LD->getChain();
1650
SDValue Base = LD->getBasePtr();
1651
SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1652
CurDAG->getRegister(0, MVT::i32), Chain };
1653
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1654
MVT::Other, Ops);
1655
transferMemOperands(N, New);
1656
ReplaceNode(N, New);
1657
return true;
1658
} else {
1659
SDValue Chain = LD->getChain();
1660
SDValue Base = LD->getBasePtr();
1661
SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1662
CurDAG->getRegister(0, MVT::i32), Chain };
1663
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1664
MVT::Other, Ops);
1665
transferMemOperands(N, New);
1666
ReplaceNode(N, New);
1667
return true;
1668
}
1669
}
1670
1671
return false;
1672
}
1673
1674
bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1675
LoadSDNode *LD = cast<LoadSDNode>(N);
1676
EVT LoadedVT = LD->getMemoryVT();
1677
ISD::MemIndexedMode AM = LD->getAddressingMode();
1678
if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1679
LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1680
return false;
1681
1682
auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1683
if (!COffs || COffs->getZExtValue() != 4)
1684
return false;
1685
1686
// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1687
// The encoding of LDM is not how the rest of ISel expects a post-inc load to
1688
// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1689
// ISel.
1690
SDValue Chain = LD->getChain();
1691
SDValue Base = LD->getBasePtr();
1692
SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1693
CurDAG->getRegister(0, MVT::i32), Chain };
1694
SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1695
MVT::i32, MVT::Other, Ops);
1696
transferMemOperands(N, New);
1697
ReplaceNode(N, New);
1698
return true;
1699
}
1700
1701
bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1702
LoadSDNode *LD = cast<LoadSDNode>(N);
1703
ISD::MemIndexedMode AM = LD->getAddressingMode();
1704
if (AM == ISD::UNINDEXED)
1705
return false;
1706
1707
EVT LoadedVT = LD->getMemoryVT();
1708
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1709
SDValue Offset;
1710
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1711
unsigned Opcode = 0;
1712
bool Match = false;
1713
if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1714
switch (LoadedVT.getSimpleVT().SimpleTy) {
1715
case MVT::i32:
1716
Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1717
break;
1718
case MVT::i16:
1719
if (isSExtLd)
1720
Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1721
else
1722
Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1723
break;
1724
case MVT::i8:
1725
case MVT::i1:
1726
if (isSExtLd)
1727
Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1728
else
1729
Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1730
break;
1731
default:
1732
return false;
1733
}
1734
Match = true;
1735
}
1736
1737
if (Match) {
1738
SDValue Chain = LD->getChain();
1739
SDValue Base = LD->getBasePtr();
1740
SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1741
CurDAG->getRegister(0, MVT::i32), Chain };
1742
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1743
MVT::Other, Ops);
1744
transferMemOperands(N, New);
1745
ReplaceNode(N, New);
1746
return true;
1747
}
1748
1749
return false;
1750
}
1751
1752
bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1753
EVT LoadedVT;
1754
unsigned Opcode = 0;
1755
bool isSExtLd, isPre;
1756
Align Alignment;
1757
ARMVCC::VPTCodes Pred;
1758
SDValue PredReg;
1759
SDValue Chain, Base, Offset;
1760
1761
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1762
ISD::MemIndexedMode AM = LD->getAddressingMode();
1763
if (AM == ISD::UNINDEXED)
1764
return false;
1765
LoadedVT = LD->getMemoryVT();
1766
if (!LoadedVT.isVector())
1767
return false;
1768
1769
Chain = LD->getChain();
1770
Base = LD->getBasePtr();
1771
Offset = LD->getOffset();
1772
Alignment = LD->getAlign();
1773
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1774
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1775
Pred = ARMVCC::None;
1776
PredReg = CurDAG->getRegister(0, MVT::i32);
1777
} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1778
ISD::MemIndexedMode AM = LD->getAddressingMode();
1779
if (AM == ISD::UNINDEXED)
1780
return false;
1781
LoadedVT = LD->getMemoryVT();
1782
if (!LoadedVT.isVector())
1783
return false;
1784
1785
Chain = LD->getChain();
1786
Base = LD->getBasePtr();
1787
Offset = LD->getOffset();
1788
Alignment = LD->getAlign();
1789
isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1790
isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1791
Pred = ARMVCC::Then;
1792
PredReg = LD->getMask();
1793
} else
1794
llvm_unreachable("Expected a Load or a Masked Load!");
1795
1796
// We allow LE non-masked loads to change the type (for example use a vldrb.8
1797
// as opposed to a vldrw.32). This can allow extra addressing modes or
1798
// alignments for what is otherwise an equivalent instruction.
1799
bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1800
1801
SDValue NewOffset;
1802
if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1803
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1804
if (isSExtLd)
1805
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1806
else
1807
Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1808
} else if (LoadedVT == MVT::v8i8 &&
1809
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1810
if (isSExtLd)
1811
Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1812
else
1813
Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1814
} else if (LoadedVT == MVT::v4i8 &&
1815
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1816
if (isSExtLd)
1817
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1818
else
1819
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1820
} else if (Alignment >= Align(4) &&
1821
(CanChangeType || LoadedVT == MVT::v4i32 ||
1822
LoadedVT == MVT::v4f32) &&
1823
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1824
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1825
else if (Alignment >= Align(2) &&
1826
(CanChangeType || LoadedVT == MVT::v8i16 ||
1827
LoadedVT == MVT::v8f16) &&
1828
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1829
Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1830
else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1831
SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1832
Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1833
else
1834
return false;
1835
1836
SDValue Ops[] = {Base,
1837
NewOffset,
1838
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1839
PredReg,
1840
CurDAG->getRegister(0, MVT::i32), // tp_reg
1841
Chain};
1842
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1843
N->getValueType(0), MVT::Other, Ops);
1844
transferMemOperands(N, New);
1845
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1846
ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1847
ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1848
CurDAG->RemoveDeadNode(N);
1849
return true;
1850
}
1851
1852
/// Form a GPRPair pseudo register from a pair of GPR regs.
1853
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1854
SDLoc dl(V0.getNode());
1855
SDValue RegClass =
1856
CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1857
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1858
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1859
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1860
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1861
}
1862
1863
/// Form a D register from a pair of S registers.
1864
SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1865
SDLoc dl(V0.getNode());
1866
SDValue RegClass =
1867
CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1868
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1869
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1870
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1871
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1872
}
1873
1874
/// Form a quad register from a pair of D registers.
1875
SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1876
SDLoc dl(V0.getNode());
1877
SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1878
MVT::i32);
1879
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1880
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1881
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1882
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1883
}
1884
1885
/// Form 4 consecutive D registers from a pair of Q registers.
1886
SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1887
SDLoc dl(V0.getNode());
1888
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1889
MVT::i32);
1890
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1891
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1892
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1893
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1894
}
1895
1896
/// Form 4 consecutive S registers.
1897
SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1898
SDValue V2, SDValue V3) {
1899
SDLoc dl(V0.getNode());
1900
SDValue RegClass =
1901
CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1902
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1903
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1904
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1905
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1906
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1907
V2, SubReg2, V3, SubReg3 };
1908
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1909
}
1910
1911
/// Form 4 consecutive D registers.
1912
SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1913
SDValue V2, SDValue V3) {
1914
SDLoc dl(V0.getNode());
1915
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1916
MVT::i32);
1917
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1918
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1919
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1920
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1921
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1922
V2, SubReg2, V3, SubReg3 };
1923
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1924
}
1925
1926
/// Form 4 consecutive Q registers.
1927
SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1928
SDValue V2, SDValue V3) {
1929
SDLoc dl(V0.getNode());
1930
SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1931
MVT::i32);
1932
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1933
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1934
SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1935
SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1936
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1937
V2, SubReg2, V3, SubReg3 };
1938
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1939
}
1940
1941
/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1942
/// of a NEON VLD or VST instruction. The supported values depend on the
1943
/// number of registers being loaded.
1944
SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1945
unsigned NumVecs, bool is64BitVector) {
1946
unsigned NumRegs = NumVecs;
1947
if (!is64BitVector && NumVecs < 3)
1948
NumRegs *= 2;
1949
1950
unsigned Alignment = Align->getAsZExtVal();
1951
if (Alignment >= 32 && NumRegs == 4)
1952
Alignment = 32;
1953
else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1954
Alignment = 16;
1955
else if (Alignment >= 8)
1956
Alignment = 8;
1957
else
1958
Alignment = 0;
1959
1960
return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1961
}
1962
1963
static bool isVLDfixed(unsigned Opc)
1964
{
1965
switch (Opc) {
1966
default: return false;
1967
case ARM::VLD1d8wb_fixed : return true;
1968
case ARM::VLD1d16wb_fixed : return true;
1969
case ARM::VLD1d64Qwb_fixed : return true;
1970
case ARM::VLD1d32wb_fixed : return true;
1971
case ARM::VLD1d64wb_fixed : return true;
1972
case ARM::VLD1d8TPseudoWB_fixed : return true;
1973
case ARM::VLD1d16TPseudoWB_fixed : return true;
1974
case ARM::VLD1d32TPseudoWB_fixed : return true;
1975
case ARM::VLD1d64TPseudoWB_fixed : return true;
1976
case ARM::VLD1d8QPseudoWB_fixed : return true;
1977
case ARM::VLD1d16QPseudoWB_fixed : return true;
1978
case ARM::VLD1d32QPseudoWB_fixed : return true;
1979
case ARM::VLD1d64QPseudoWB_fixed : return true;
1980
case ARM::VLD1q8wb_fixed : return true;
1981
case ARM::VLD1q16wb_fixed : return true;
1982
case ARM::VLD1q32wb_fixed : return true;
1983
case ARM::VLD1q64wb_fixed : return true;
1984
case ARM::VLD1DUPd8wb_fixed : return true;
1985
case ARM::VLD1DUPd16wb_fixed : return true;
1986
case ARM::VLD1DUPd32wb_fixed : return true;
1987
case ARM::VLD1DUPq8wb_fixed : return true;
1988
case ARM::VLD1DUPq16wb_fixed : return true;
1989
case ARM::VLD1DUPq32wb_fixed : return true;
1990
case ARM::VLD2d8wb_fixed : return true;
1991
case ARM::VLD2d16wb_fixed : return true;
1992
case ARM::VLD2d32wb_fixed : return true;
1993
case ARM::VLD2q8PseudoWB_fixed : return true;
1994
case ARM::VLD2q16PseudoWB_fixed : return true;
1995
case ARM::VLD2q32PseudoWB_fixed : return true;
1996
case ARM::VLD2DUPd8wb_fixed : return true;
1997
case ARM::VLD2DUPd16wb_fixed : return true;
1998
case ARM::VLD2DUPd32wb_fixed : return true;
1999
case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
2000
case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
2001
case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
2002
}
2003
}
2004
2005
static bool isVSTfixed(unsigned Opc)
2006
{
2007
switch (Opc) {
2008
default: return false;
2009
case ARM::VST1d8wb_fixed : return true;
2010
case ARM::VST1d16wb_fixed : return true;
2011
case ARM::VST1d32wb_fixed : return true;
2012
case ARM::VST1d64wb_fixed : return true;
2013
case ARM::VST1q8wb_fixed : return true;
2014
case ARM::VST1q16wb_fixed : return true;
2015
case ARM::VST1q32wb_fixed : return true;
2016
case ARM::VST1q64wb_fixed : return true;
2017
case ARM::VST1d8TPseudoWB_fixed : return true;
2018
case ARM::VST1d16TPseudoWB_fixed : return true;
2019
case ARM::VST1d32TPseudoWB_fixed : return true;
2020
case ARM::VST1d64TPseudoWB_fixed : return true;
2021
case ARM::VST1d8QPseudoWB_fixed : return true;
2022
case ARM::VST1d16QPseudoWB_fixed : return true;
2023
case ARM::VST1d32QPseudoWB_fixed : return true;
2024
case ARM::VST1d64QPseudoWB_fixed : return true;
2025
case ARM::VST2d8wb_fixed : return true;
2026
case ARM::VST2d16wb_fixed : return true;
2027
case ARM::VST2d32wb_fixed : return true;
2028
case ARM::VST2q8PseudoWB_fixed : return true;
2029
case ARM::VST2q16PseudoWB_fixed : return true;
2030
case ARM::VST2q32PseudoWB_fixed : return true;
2031
}
2032
}
2033
2034
// Get the register stride update opcode of a VLD/VST instruction that
2035
// is otherwise equivalent to the given fixed stride updating instruction.
2036
static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2037
assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2038
&& "Incorrect fixed stride updating instruction.");
2039
switch (Opc) {
2040
default: break;
2041
case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2042
case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2043
case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2044
case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2045
case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2046
case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2047
case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2048
case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2049
case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2050
case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2051
case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2052
case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2053
case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2054
case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2055
case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2056
case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2057
case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2058
case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2059
case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2060
case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2061
case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2062
case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2063
case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2064
case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2065
case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2066
case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2067
case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2068
2069
case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2070
case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2071
case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2072
case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2073
case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2074
case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2075
case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2076
case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2077
case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2078
case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2079
case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2080
case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2081
case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2082
case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2083
case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2084
case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2085
2086
case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2087
case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2088
case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2089
case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2090
case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2091
case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2092
2093
case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2094
case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2095
case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2096
case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2097
case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2098
case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2099
2100
case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2101
case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2102
case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2103
}
2104
return Opc; // If not one we handle, return it unchanged.
2105
}
2106
2107
/// Returns true if the given increment is a Constant known to be equal to the
2108
/// access size performed by a NEON load/store. This means the "[rN]!" form can
2109
/// be used.
2110
static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2111
auto C = dyn_cast<ConstantSDNode>(Inc);
2112
return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2113
}
2114
2115
void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2116
const uint16_t *DOpcodes,
2117
const uint16_t *QOpcodes0,
2118
const uint16_t *QOpcodes1) {
2119
assert(Subtarget->hasNEON());
2120
assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2121
SDLoc dl(N);
2122
2123
SDValue MemAddr, Align;
2124
bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2125
// nodes are not intrinsics.
2126
unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2127
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2128
return;
2129
2130
SDValue Chain = N->getOperand(0);
2131
EVT VT = N->getValueType(0);
2132
bool is64BitVector = VT.is64BitVector();
2133
Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2134
2135
unsigned OpcodeIndex;
2136
switch (VT.getSimpleVT().SimpleTy) {
2137
default: llvm_unreachable("unhandled vld type");
2138
// Double-register operations:
2139
case MVT::v8i8: OpcodeIndex = 0; break;
2140
case MVT::v4f16:
2141
case MVT::v4bf16:
2142
case MVT::v4i16: OpcodeIndex = 1; break;
2143
case MVT::v2f32:
2144
case MVT::v2i32: OpcodeIndex = 2; break;
2145
case MVT::v1i64: OpcodeIndex = 3; break;
2146
// Quad-register operations:
2147
case MVT::v16i8: OpcodeIndex = 0; break;
2148
case MVT::v8f16:
2149
case MVT::v8bf16:
2150
case MVT::v8i16: OpcodeIndex = 1; break;
2151
case MVT::v4f32:
2152
case MVT::v4i32: OpcodeIndex = 2; break;
2153
case MVT::v2f64:
2154
case MVT::v2i64: OpcodeIndex = 3; break;
2155
}
2156
2157
EVT ResTy;
2158
if (NumVecs == 1)
2159
ResTy = VT;
2160
else {
2161
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2162
if (!is64BitVector)
2163
ResTyElts *= 2;
2164
ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2165
}
2166
std::vector<EVT> ResTys;
2167
ResTys.push_back(ResTy);
2168
if (isUpdating)
2169
ResTys.push_back(MVT::i32);
2170
ResTys.push_back(MVT::Other);
2171
2172
SDValue Pred = getAL(CurDAG, dl);
2173
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2174
SDNode *VLd;
2175
SmallVector<SDValue, 7> Ops;
2176
2177
// Double registers and VLD1/VLD2 quad registers are directly supported.
2178
if (is64BitVector || NumVecs <= 2) {
2179
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2180
QOpcodes0[OpcodeIndex]);
2181
Ops.push_back(MemAddr);
2182
Ops.push_back(Align);
2183
if (isUpdating) {
2184
SDValue Inc = N->getOperand(AddrOpIdx + 1);
2185
bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2186
if (!IsImmUpdate) {
2187
// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2188
// check for the opcode rather than the number of vector elements.
2189
if (isVLDfixed(Opc))
2190
Opc = getVLDSTRegisterUpdateOpcode(Opc);
2191
Ops.push_back(Inc);
2192
// VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2193
// the operands if not such an opcode.
2194
} else if (!isVLDfixed(Opc))
2195
Ops.push_back(Reg0);
2196
}
2197
Ops.push_back(Pred);
2198
Ops.push_back(Reg0);
2199
Ops.push_back(Chain);
2200
VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2201
2202
} else {
2203
// Otherwise, quad registers are loaded with two separate instructions,
2204
// where one loads the even registers and the other loads the odd registers.
2205
EVT AddrTy = MemAddr.getValueType();
2206
2207
// Load the even subregs. This is always an updating load, so that it
2208
// provides the address to the second load for the odd subregs.
2209
SDValue ImplDef =
2210
SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2211
const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2212
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2213
ResTy, AddrTy, MVT::Other, OpsA);
2214
Chain = SDValue(VLdA, 2);
2215
2216
// Load the odd subregs.
2217
Ops.push_back(SDValue(VLdA, 1));
2218
Ops.push_back(Align);
2219
if (isUpdating) {
2220
SDValue Inc = N->getOperand(AddrOpIdx + 1);
2221
assert(isa<ConstantSDNode>(Inc.getNode()) &&
2222
"only constant post-increment update allowed for VLD3/4");
2223
(void)Inc;
2224
Ops.push_back(Reg0);
2225
}
2226
Ops.push_back(SDValue(VLdA, 0));
2227
Ops.push_back(Pred);
2228
Ops.push_back(Reg0);
2229
Ops.push_back(Chain);
2230
VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2231
}
2232
2233
// Transfer memoperands.
2234
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2235
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2236
2237
if (NumVecs == 1) {
2238
ReplaceNode(N, VLd);
2239
return;
2240
}
2241
2242
// Extract out the subregisters.
2243
SDValue SuperReg = SDValue(VLd, 0);
2244
static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2245
ARM::qsub_3 == ARM::qsub_0 + 3,
2246
"Unexpected subreg numbering");
2247
unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2248
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2249
ReplaceUses(SDValue(N, Vec),
2250
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2251
ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2252
if (isUpdating)
2253
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2254
CurDAG->RemoveDeadNode(N);
2255
}
2256
2257
void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2258
const uint16_t *DOpcodes,
2259
const uint16_t *QOpcodes0,
2260
const uint16_t *QOpcodes1) {
2261
assert(Subtarget->hasNEON());
2262
assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2263
SDLoc dl(N);
2264
2265
SDValue MemAddr, Align;
2266
bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2267
// nodes are not intrinsics.
2268
unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2269
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2270
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2271
return;
2272
2273
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2274
2275
SDValue Chain = N->getOperand(0);
2276
EVT VT = N->getOperand(Vec0Idx).getValueType();
2277
bool is64BitVector = VT.is64BitVector();
2278
Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2279
2280
unsigned OpcodeIndex;
2281
switch (VT.getSimpleVT().SimpleTy) {
2282
default: llvm_unreachable("unhandled vst type");
2283
// Double-register operations:
2284
case MVT::v8i8: OpcodeIndex = 0; break;
2285
case MVT::v4f16:
2286
case MVT::v4bf16:
2287
case MVT::v4i16: OpcodeIndex = 1; break;
2288
case MVT::v2f32:
2289
case MVT::v2i32: OpcodeIndex = 2; break;
2290
case MVT::v1i64: OpcodeIndex = 3; break;
2291
// Quad-register operations:
2292
case MVT::v16i8: OpcodeIndex = 0; break;
2293
case MVT::v8f16:
2294
case MVT::v8bf16:
2295
case MVT::v8i16: OpcodeIndex = 1; break;
2296
case MVT::v4f32:
2297
case MVT::v4i32: OpcodeIndex = 2; break;
2298
case MVT::v2f64:
2299
case MVT::v2i64: OpcodeIndex = 3; break;
2300
}
2301
2302
std::vector<EVT> ResTys;
2303
if (isUpdating)
2304
ResTys.push_back(MVT::i32);
2305
ResTys.push_back(MVT::Other);
2306
2307
SDValue Pred = getAL(CurDAG, dl);
2308
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2309
SmallVector<SDValue, 7> Ops;
2310
2311
// Double registers and VST1/VST2 quad registers are directly supported.
2312
if (is64BitVector || NumVecs <= 2) {
2313
SDValue SrcReg;
2314
if (NumVecs == 1) {
2315
SrcReg = N->getOperand(Vec0Idx);
2316
} else if (is64BitVector) {
2317
// Form a REG_SEQUENCE to force register allocation.
2318
SDValue V0 = N->getOperand(Vec0Idx + 0);
2319
SDValue V1 = N->getOperand(Vec0Idx + 1);
2320
if (NumVecs == 2)
2321
SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2322
else {
2323
SDValue V2 = N->getOperand(Vec0Idx + 2);
2324
// If it's a vst3, form a quad D-register and leave the last part as
2325
// an undef.
2326
SDValue V3 = (NumVecs == 3)
2327
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2328
: N->getOperand(Vec0Idx + 3);
2329
SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2330
}
2331
} else {
2332
// Form a QQ register.
2333
SDValue Q0 = N->getOperand(Vec0Idx);
2334
SDValue Q1 = N->getOperand(Vec0Idx + 1);
2335
SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2336
}
2337
2338
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2339
QOpcodes0[OpcodeIndex]);
2340
Ops.push_back(MemAddr);
2341
Ops.push_back(Align);
2342
if (isUpdating) {
2343
SDValue Inc = N->getOperand(AddrOpIdx + 1);
2344
bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2345
if (!IsImmUpdate) {
2346
// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2347
// check for the opcode rather than the number of vector elements.
2348
if (isVSTfixed(Opc))
2349
Opc = getVLDSTRegisterUpdateOpcode(Opc);
2350
Ops.push_back(Inc);
2351
}
2352
// VST1/VST2 fixed increment does not need Reg0 so only include it in
2353
// the operands if not such an opcode.
2354
else if (!isVSTfixed(Opc))
2355
Ops.push_back(Reg0);
2356
}
2357
Ops.push_back(SrcReg);
2358
Ops.push_back(Pred);
2359
Ops.push_back(Reg0);
2360
Ops.push_back(Chain);
2361
SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2362
2363
// Transfer memoperands.
2364
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2365
2366
ReplaceNode(N, VSt);
2367
return;
2368
}
2369
2370
// Otherwise, quad registers are stored with two separate instructions,
2371
// where one stores the even registers and the other stores the odd registers.
2372
2373
// Form the QQQQ REG_SEQUENCE.
2374
SDValue V0 = N->getOperand(Vec0Idx + 0);
2375
SDValue V1 = N->getOperand(Vec0Idx + 1);
2376
SDValue V2 = N->getOperand(Vec0Idx + 2);
2377
SDValue V3 = (NumVecs == 3)
2378
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2379
: N->getOperand(Vec0Idx + 3);
2380
SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2381
2382
// Store the even D registers. This is always an updating store, so that it
2383
// provides the address to the second store for the odd subregs.
2384
const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2385
SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2386
MemAddr.getValueType(),
2387
MVT::Other, OpsA);
2388
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2389
Chain = SDValue(VStA, 1);
2390
2391
// Store the odd D registers.
2392
Ops.push_back(SDValue(VStA, 0));
2393
Ops.push_back(Align);
2394
if (isUpdating) {
2395
SDValue Inc = N->getOperand(AddrOpIdx + 1);
2396
assert(isa<ConstantSDNode>(Inc.getNode()) &&
2397
"only constant post-increment update allowed for VST3/4");
2398
(void)Inc;
2399
Ops.push_back(Reg0);
2400
}
2401
Ops.push_back(RegSeq);
2402
Ops.push_back(Pred);
2403
Ops.push_back(Reg0);
2404
Ops.push_back(Chain);
2405
SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2406
Ops);
2407
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2408
ReplaceNode(N, VStB);
2409
}
2410
2411
void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2412
unsigned NumVecs,
2413
const uint16_t *DOpcodes,
2414
const uint16_t *QOpcodes) {
2415
assert(Subtarget->hasNEON());
2416
assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2417
SDLoc dl(N);
2418
2419
SDValue MemAddr, Align;
2420
bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating
2421
// nodes are not intrinsics.
2422
unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2423
unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2424
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2425
return;
2426
2427
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2428
2429
SDValue Chain = N->getOperand(0);
2430
unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2431
EVT VT = N->getOperand(Vec0Idx).getValueType();
2432
bool is64BitVector = VT.is64BitVector();
2433
2434
unsigned Alignment = 0;
2435
if (NumVecs != 3) {
2436
Alignment = Align->getAsZExtVal();
2437
unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2438
if (Alignment > NumBytes)
2439
Alignment = NumBytes;
2440
if (Alignment < 8 && Alignment < NumBytes)
2441
Alignment = 0;
2442
// Alignment must be a power of two; make sure of that.
2443
Alignment = (Alignment & -Alignment);
2444
if (Alignment == 1)
2445
Alignment = 0;
2446
}
2447
Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2448
2449
unsigned OpcodeIndex;
2450
switch (VT.getSimpleVT().SimpleTy) {
2451
default: llvm_unreachable("unhandled vld/vst lane type");
2452
// Double-register operations:
2453
case MVT::v8i8: OpcodeIndex = 0; break;
2454
case MVT::v4f16:
2455
case MVT::v4bf16:
2456
case MVT::v4i16: OpcodeIndex = 1; break;
2457
case MVT::v2f32:
2458
case MVT::v2i32: OpcodeIndex = 2; break;
2459
// Quad-register operations:
2460
case MVT::v8f16:
2461
case MVT::v8bf16:
2462
case MVT::v8i16: OpcodeIndex = 0; break;
2463
case MVT::v4f32:
2464
case MVT::v4i32: OpcodeIndex = 1; break;
2465
}
2466
2467
std::vector<EVT> ResTys;
2468
if (IsLoad) {
2469
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2470
if (!is64BitVector)
2471
ResTyElts *= 2;
2472
ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2473
MVT::i64, ResTyElts));
2474
}
2475
if (isUpdating)
2476
ResTys.push_back(MVT::i32);
2477
ResTys.push_back(MVT::Other);
2478
2479
SDValue Pred = getAL(CurDAG, dl);
2480
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2481
2482
SmallVector<SDValue, 8> Ops;
2483
Ops.push_back(MemAddr);
2484
Ops.push_back(Align);
2485
if (isUpdating) {
2486
SDValue Inc = N->getOperand(AddrOpIdx + 1);
2487
bool IsImmUpdate =
2488
isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2489
Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2490
}
2491
2492
SDValue SuperReg;
2493
SDValue V0 = N->getOperand(Vec0Idx + 0);
2494
SDValue V1 = N->getOperand(Vec0Idx + 1);
2495
if (NumVecs == 2) {
2496
if (is64BitVector)
2497
SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2498
else
2499
SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2500
} else {
2501
SDValue V2 = N->getOperand(Vec0Idx + 2);
2502
SDValue V3 = (NumVecs == 3)
2503
? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2504
: N->getOperand(Vec0Idx + 3);
2505
if (is64BitVector)
2506
SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2507
else
2508
SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2509
}
2510
Ops.push_back(SuperReg);
2511
Ops.push_back(getI32Imm(Lane, dl));
2512
Ops.push_back(Pred);
2513
Ops.push_back(Reg0);
2514
Ops.push_back(Chain);
2515
2516
unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2517
QOpcodes[OpcodeIndex]);
2518
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2519
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2520
if (!IsLoad) {
2521
ReplaceNode(N, VLdLn);
2522
return;
2523
}
2524
2525
// Extract the subregisters.
2526
SuperReg = SDValue(VLdLn, 0);
2527
static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2528
ARM::qsub_3 == ARM::qsub_0 + 3,
2529
"Unexpected subreg numbering");
2530
unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2531
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2532
ReplaceUses(SDValue(N, Vec),
2533
CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2534
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2535
if (isUpdating)
2536
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2537
CurDAG->RemoveDeadNode(N);
2538
}
2539
2540
template <typename SDValueVector>
2541
void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2542
SDValue PredicateMask) {
2543
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2544
Ops.push_back(PredicateMask);
2545
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2546
}
2547
2548
template <typename SDValueVector>
2549
void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2550
SDValue PredicateMask,
2551
SDValue Inactive) {
2552
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2553
Ops.push_back(PredicateMask);
2554
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2555
Ops.push_back(Inactive);
2556
}
2557
2558
template <typename SDValueVector>
2559
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2560
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2561
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2562
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2563
}
2564
2565
template <typename SDValueVector>
2566
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2567
EVT InactiveTy) {
2568
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2569
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2570
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2571
Ops.push_back(SDValue(
2572
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2573
}
2574
2575
void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2576
bool Predicated) {
2577
SDLoc Loc(N);
2578
SmallVector<SDValue, 8> Ops;
2579
2580
uint16_t Opcode;
2581
switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2582
case 32:
2583
Opcode = Opcodes[0];
2584
break;
2585
case 64:
2586
Opcode = Opcodes[1];
2587
break;
2588
default:
2589
llvm_unreachable("bad vector element size in SelectMVE_WB");
2590
}
2591
2592
Ops.push_back(N->getOperand(2)); // vector of base addresses
2593
2594
int32_t ImmValue = N->getConstantOperandVal(3);
2595
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2596
2597
if (Predicated)
2598
AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2599
else
2600
AddEmptyMVEPredicateToOps(Ops, Loc);
2601
2602
Ops.push_back(N->getOperand(0)); // chain
2603
2604
SmallVector<EVT, 8> VTs;
2605
VTs.push_back(N->getValueType(1));
2606
VTs.push_back(N->getValueType(0));
2607
VTs.push_back(N->getValueType(2));
2608
2609
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2610
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2611
ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2612
ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2613
transferMemOperands(N, New);
2614
CurDAG->RemoveDeadNode(N);
2615
}
2616
2617
void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2618
bool Immediate,
2619
bool HasSaturationOperand) {
2620
SDLoc Loc(N);
2621
SmallVector<SDValue, 8> Ops;
2622
2623
// Two 32-bit halves of the value to be shifted
2624
Ops.push_back(N->getOperand(1));
2625
Ops.push_back(N->getOperand(2));
2626
2627
// The shift count
2628
if (Immediate) {
2629
int32_t ImmValue = N->getConstantOperandVal(3);
2630
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2631
} else {
2632
Ops.push_back(N->getOperand(3));
2633
}
2634
2635
// The immediate saturation operand, if any
2636
if (HasSaturationOperand) {
2637
int32_t SatOp = N->getConstantOperandVal(4);
2638
int SatBit = (SatOp == 64 ? 0 : 1);
2639
Ops.push_back(getI32Imm(SatBit, Loc));
2640
}
2641
2642
// MVE scalar shifts are IT-predicable, so include the standard
2643
// predicate arguments.
2644
Ops.push_back(getAL(CurDAG, Loc));
2645
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2646
2647
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2648
}
2649
2650
void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2651
uint16_t OpcodeWithNoCarry,
2652
bool Add, bool Predicated) {
2653
SDLoc Loc(N);
2654
SmallVector<SDValue, 8> Ops;
2655
uint16_t Opcode;
2656
2657
unsigned FirstInputOp = Predicated ? 2 : 1;
2658
2659
// Two input vectors and the input carry flag
2660
Ops.push_back(N->getOperand(FirstInputOp));
2661
Ops.push_back(N->getOperand(FirstInputOp + 1));
2662
SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2663
ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2664
uint32_t CarryMask = 1 << 29;
2665
uint32_t CarryExpected = Add ? 0 : CarryMask;
2666
if (CarryInConstant &&
2667
(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2668
Opcode = OpcodeWithNoCarry;
2669
} else {
2670
Ops.push_back(CarryIn);
2671
Opcode = OpcodeWithCarry;
2672
}
2673
2674
if (Predicated)
2675
AddMVEPredicateToOps(Ops, Loc,
2676
N->getOperand(FirstInputOp + 3), // predicate
2677
N->getOperand(FirstInputOp - 1)); // inactive
2678
else
2679
AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2680
2681
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2682
}
2683
2684
void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2685
SDLoc Loc(N);
2686
SmallVector<SDValue, 8> Ops;
2687
2688
// One vector input, followed by a 32-bit word of bits to shift in
2689
// and then an immediate shift count
2690
Ops.push_back(N->getOperand(1));
2691
Ops.push_back(N->getOperand(2));
2692
int32_t ImmValue = N->getConstantOperandVal(3);
2693
Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2694
2695
if (Predicated)
2696
AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2697
else
2698
AddEmptyMVEPredicateToOps(Ops, Loc);
2699
2700
CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2701
}
2702
2703
static bool SDValueToConstBool(SDValue SDVal) {
2704
assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2705
ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2706
uint64_t Value = SDValConstant->getZExtValue();
2707
assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2708
return Value;
2709
}
2710
2711
void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2712
const uint16_t *OpcodesS,
2713
const uint16_t *OpcodesU,
2714
size_t Stride, size_t TySize) {
2715
assert(TySize < Stride && "Invalid TySize");
2716
bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2717
bool IsSub = SDValueToConstBool(N->getOperand(2));
2718
bool IsExchange = SDValueToConstBool(N->getOperand(3));
2719
if (IsUnsigned) {
2720
assert(!IsSub &&
2721
"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2722
assert(!IsExchange &&
2723
"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2724
}
2725
2726
auto OpIsZero = [N](size_t OpNo) {
2727
return isNullConstant(N->getOperand(OpNo));
2728
};
2729
2730
// If the input accumulator value is not zero, select an instruction with
2731
// accumulator, otherwise select an instruction without accumulator
2732
bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2733
2734
const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2735
if (IsSub)
2736
Opcodes += 4 * Stride;
2737
if (IsExchange)
2738
Opcodes += 2 * Stride;
2739
if (IsAccum)
2740
Opcodes += Stride;
2741
uint16_t Opcode = Opcodes[TySize];
2742
2743
SDLoc Loc(N);
2744
SmallVector<SDValue, 8> Ops;
2745
// Push the accumulator operands, if they are used
2746
if (IsAccum) {
2747
Ops.push_back(N->getOperand(4));
2748
Ops.push_back(N->getOperand(5));
2749
}
2750
// Push the two vector operands
2751
Ops.push_back(N->getOperand(6));
2752
Ops.push_back(N->getOperand(7));
2753
2754
if (Predicated)
2755
AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2756
else
2757
AddEmptyMVEPredicateToOps(Ops, Loc);
2758
2759
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2760
}
2761
2762
void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2763
const uint16_t *OpcodesS,
2764
const uint16_t *OpcodesU) {
2765
EVT VecTy = N->getOperand(6).getValueType();
2766
size_t SizeIndex;
2767
switch (VecTy.getVectorElementType().getSizeInBits()) {
2768
case 16:
2769
SizeIndex = 0;
2770
break;
2771
case 32:
2772
SizeIndex = 1;
2773
break;
2774
default:
2775
llvm_unreachable("bad vector element size");
2776
}
2777
2778
SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2779
}
2780
2781
void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2782
const uint16_t *OpcodesS,
2783
const uint16_t *OpcodesU) {
2784
assert(
2785
N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2786
32 &&
2787
"bad vector element size");
2788
SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2789
}
2790
2791
void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2792
const uint16_t *const *Opcodes,
2793
bool HasWriteback) {
2794
EVT VT = N->getValueType(0);
2795
SDLoc Loc(N);
2796
2797
const uint16_t *OurOpcodes;
2798
switch (VT.getVectorElementType().getSizeInBits()) {
2799
case 8:
2800
OurOpcodes = Opcodes[0];
2801
break;
2802
case 16:
2803
OurOpcodes = Opcodes[1];
2804
break;
2805
case 32:
2806
OurOpcodes = Opcodes[2];
2807
break;
2808
default:
2809
llvm_unreachable("bad vector element size in SelectMVE_VLD");
2810
}
2811
2812
EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2813
SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2814
unsigned PtrOperand = HasWriteback ? 1 : 2;
2815
2816
auto Data = SDValue(
2817
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2818
SDValue Chain = N->getOperand(0);
2819
// Add a MVE_VLDn instruction for each Vec, except the last
2820
for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2821
SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2822
auto LoadInst =
2823
CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2824
Data = SDValue(LoadInst, 0);
2825
Chain = SDValue(LoadInst, 1);
2826
transferMemOperands(N, LoadInst);
2827
}
2828
// The last may need a writeback on it
2829
if (HasWriteback)
2830
ResultTys = {DataTy, MVT::i32, MVT::Other};
2831
SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2832
auto LoadInst =
2833
CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2834
transferMemOperands(N, LoadInst);
2835
2836
unsigned i;
2837
for (i = 0; i < NumVecs; i++)
2838
ReplaceUses(SDValue(N, i),
2839
CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2840
SDValue(LoadInst, 0)));
2841
if (HasWriteback)
2842
ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2843
ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2844
CurDAG->RemoveDeadNode(N);
2845
}
2846
2847
void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2848
bool Wrapping, bool Predicated) {
2849
EVT VT = N->getValueType(0);
2850
SDLoc Loc(N);
2851
2852
uint16_t Opcode;
2853
switch (VT.getScalarSizeInBits()) {
2854
case 8:
2855
Opcode = Opcodes[0];
2856
break;
2857
case 16:
2858
Opcode = Opcodes[1];
2859
break;
2860
case 32:
2861
Opcode = Opcodes[2];
2862
break;
2863
default:
2864
llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2865
}
2866
2867
SmallVector<SDValue, 8> Ops;
2868
unsigned OpIdx = 1;
2869
2870
SDValue Inactive;
2871
if (Predicated)
2872
Inactive = N->getOperand(OpIdx++);
2873
2874
Ops.push_back(N->getOperand(OpIdx++)); // base
2875
if (Wrapping)
2876
Ops.push_back(N->getOperand(OpIdx++)); // limit
2877
2878
SDValue ImmOp = N->getOperand(OpIdx++); // step
2879
int ImmValue = ImmOp->getAsZExtVal();
2880
Ops.push_back(getI32Imm(ImmValue, Loc));
2881
2882
if (Predicated)
2883
AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2884
else
2885
AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2886
2887
CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2888
}
2889
2890
void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2891
size_t NumExtraOps, bool HasAccum) {
2892
bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2893
SDLoc Loc(N);
2894
SmallVector<SDValue, 8> Ops;
2895
2896
unsigned OpIdx = 1;
2897
2898
// Convert and append the immediate operand designating the coprocessor.
2899
SDValue ImmCorpoc = N->getOperand(OpIdx++);
2900
uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2901
Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2902
2903
// For accumulating variants copy the low and high order parts of the
2904
// accumulator into a register pair and add it to the operand vector.
2905
if (HasAccum) {
2906
SDValue AccLo = N->getOperand(OpIdx++);
2907
SDValue AccHi = N->getOperand(OpIdx++);
2908
if (IsBigEndian)
2909
std::swap(AccLo, AccHi);
2910
Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2911
}
2912
2913
// Copy extra operands as-is.
2914
for (size_t I = 0; I < NumExtraOps; I++)
2915
Ops.push_back(N->getOperand(OpIdx++));
2916
2917
// Convert and append the immediate operand
2918
SDValue Imm = N->getOperand(OpIdx);
2919
uint32_t ImmVal = Imm->getAsZExtVal();
2920
Ops.push_back(getI32Imm(ImmVal, Loc));
2921
2922
// Accumulating variants are IT-predicable, add predicate operands.
2923
if (HasAccum) {
2924
SDValue Pred = getAL(CurDAG, Loc);
2925
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2926
Ops.push_back(Pred);
2927
Ops.push_back(PredReg);
2928
}
2929
2930
// Create the CDE intruction
2931
SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2932
SDValue ResultPair = SDValue(InstrNode, 0);
2933
2934
// The original intrinsic had two outputs, and the output of the dual-register
2935
// CDE instruction is a register pair. We need to extract the two subregisters
2936
// and replace all uses of the original outputs with the extracted
2937
// subregisters.
2938
uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2939
if (IsBigEndian)
2940
std::swap(SubRegs[0], SubRegs[1]);
2941
2942
for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2943
if (SDValue(N, ResIdx).use_empty())
2944
continue;
2945
SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2946
MVT::i32, ResultPair);
2947
ReplaceUses(SDValue(N, ResIdx), SubReg);
2948
}
2949
2950
CurDAG->RemoveDeadNode(N);
2951
}
2952
2953
void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2954
bool isUpdating, unsigned NumVecs,
2955
const uint16_t *DOpcodes,
2956
const uint16_t *QOpcodes0,
2957
const uint16_t *QOpcodes1) {
2958
assert(Subtarget->hasNEON());
2959
assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2960
SDLoc dl(N);
2961
2962
SDValue MemAddr, Align;
2963
unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2964
if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2965
return;
2966
2967
SDValue Chain = N->getOperand(0);
2968
EVT VT = N->getValueType(0);
2969
bool is64BitVector = VT.is64BitVector();
2970
2971
unsigned Alignment = 0;
2972
if (NumVecs != 3) {
2973
Alignment = Align->getAsZExtVal();
2974
unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2975
if (Alignment > NumBytes)
2976
Alignment = NumBytes;
2977
if (Alignment < 8 && Alignment < NumBytes)
2978
Alignment = 0;
2979
// Alignment must be a power of two; make sure of that.
2980
Alignment = (Alignment & -Alignment);
2981
if (Alignment == 1)
2982
Alignment = 0;
2983
}
2984
Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2985
2986
unsigned OpcodeIndex;
2987
switch (VT.getSimpleVT().SimpleTy) {
2988
default: llvm_unreachable("unhandled vld-dup type");
2989
case MVT::v8i8:
2990
case MVT::v16i8: OpcodeIndex = 0; break;
2991
case MVT::v4i16:
2992
case MVT::v8i16:
2993
case MVT::v4f16:
2994
case MVT::v8f16:
2995
case MVT::v4bf16:
2996
case MVT::v8bf16:
2997
OpcodeIndex = 1; break;
2998
case MVT::v2f32:
2999
case MVT::v2i32:
3000
case MVT::v4f32:
3001
case MVT::v4i32: OpcodeIndex = 2; break;
3002
case MVT::v1f64:
3003
case MVT::v1i64: OpcodeIndex = 3; break;
3004
}
3005
3006
unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
3007
if (!is64BitVector)
3008
ResTyElts *= 2;
3009
EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3010
3011
std::vector<EVT> ResTys;
3012
ResTys.push_back(ResTy);
3013
if (isUpdating)
3014
ResTys.push_back(MVT::i32);
3015
ResTys.push_back(MVT::Other);
3016
3017
SDValue Pred = getAL(CurDAG, dl);
3018
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3019
3020
SmallVector<SDValue, 6> Ops;
3021
Ops.push_back(MemAddr);
3022
Ops.push_back(Align);
3023
unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]
3024
: (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3025
: QOpcodes1[OpcodeIndex];
3026
if (isUpdating) {
3027
SDValue Inc = N->getOperand(2);
3028
bool IsImmUpdate =
3029
isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3030
if (IsImmUpdate) {
3031
if (!isVLDfixed(Opc))
3032
Ops.push_back(Reg0);
3033
} else {
3034
if (isVLDfixed(Opc))
3035
Opc = getVLDSTRegisterUpdateOpcode(Opc);
3036
Ops.push_back(Inc);
3037
}
3038
}
3039
if (is64BitVector || NumVecs == 1) {
3040
// Double registers and VLD1 quad registers are directly supported.
3041
} else {
3042
SDValue ImplDef = SDValue(
3043
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3044
const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3045
SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3046
MVT::Other, OpsA);
3047
Ops.push_back(SDValue(VLdA, 0));
3048
Chain = SDValue(VLdA, 1);
3049
}
3050
3051
Ops.push_back(Pred);
3052
Ops.push_back(Reg0);
3053
Ops.push_back(Chain);
3054
3055
SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3056
3057
// Transfer memoperands.
3058
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3059
CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3060
3061
// Extract the subregisters.
3062
if (NumVecs == 1) {
3063
ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3064
} else {
3065
SDValue SuperReg = SDValue(VLdDup, 0);
3066
static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3067
unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3068
for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3069
ReplaceUses(SDValue(N, Vec),
3070
CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3071
}
3072
}
3073
ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3074
if (isUpdating)
3075
ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3076
CurDAG->RemoveDeadNode(N);
3077
}
3078
3079
bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3080
if (!Subtarget->hasMVEIntegerOps())
3081
return false;
3082
3083
SDLoc dl(N);
3084
3085
// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3086
// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3087
// inserts of the correct type:
3088
SDValue Ins1 = SDValue(N, 0);
3089
SDValue Ins2 = N->getOperand(0);
3090
EVT VT = Ins1.getValueType();
3091
if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3092
!isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3093
!isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3094
(VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3095
return false;
3096
3097
unsigned Lane1 = Ins1.getConstantOperandVal(2);
3098
unsigned Lane2 = Ins2.getConstantOperandVal(2);
3099
if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3100
return false;
3101
3102
// If the inserted values will be able to use T/B already, leave it to the
3103
// existing tablegen patterns. For example VCVTT/VCVTB.
3104
SDValue Val1 = Ins1.getOperand(1);
3105
SDValue Val2 = Ins2.getOperand(1);
3106
if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3107
return false;
3108
3109
// Check if the inserted values are both extracts.
3110
if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3111
Val1.getOpcode() == ARMISD::VGETLANEu) &&
3112
(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3113
Val2.getOpcode() == ARMISD::VGETLANEu) &&
3114
isa<ConstantSDNode>(Val1.getOperand(1)) &&
3115
isa<ConstantSDNode>(Val2.getOperand(1)) &&
3116
(Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3117
Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3118
(Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3119
Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3120
unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3121
unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3122
3123
// If the two extracted lanes are from the same place and adjacent, this
3124
// simplifies into a f32 lane move.
3125
if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3126
ExtractLane1 == ExtractLane2 + 1) {
3127
SDValue NewExt = CurDAG->getTargetExtractSubreg(
3128
ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3129
SDValue NewIns = CurDAG->getTargetInsertSubreg(
3130
ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3131
NewExt);
3132
ReplaceUses(Ins1, NewIns);
3133
return true;
3134
}
3135
3136
// Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3137
// extracting odd lanes.
3138
if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3139
SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3140
ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3141
SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3142
ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3143
if (ExtractLane1 % 2 != 0)
3144
Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3145
if (ExtractLane2 % 2 != 0)
3146
Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3147
SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3148
SDValue NewIns =
3149
CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3150
Ins2.getOperand(0), SDValue(VINS, 0));
3151
ReplaceUses(Ins1, NewIns);
3152
return true;
3153
}
3154
}
3155
3156
// The inserted values are not extracted - if they are f16 then insert them
3157
// directly using a VINS.
3158
if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3159
SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3160
SDValue NewIns =
3161
CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3162
Ins2.getOperand(0), SDValue(VINS, 0));
3163
ReplaceUses(Ins1, NewIns);
3164
return true;
3165
}
3166
3167
return false;
3168
}
3169
3170
bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3171
SDNode *FMul,
3172
bool IsUnsigned,
3173
bool FixedToFloat) {
3174
auto Type = N->getValueType(0);
3175
unsigned ScalarBits = Type.getScalarSizeInBits();
3176
if (ScalarBits > 32)
3177
return false;
3178
3179
SDNodeFlags FMulFlags = FMul->getFlags();
3180
// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3181
// allowed in 16 bit unsigned floats
3182
if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3183
return false;
3184
3185
SDValue ImmNode = FMul->getOperand(1);
3186
SDValue VecVal = FMul->getOperand(0);
3187
if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3188
VecVal->getOpcode() == ISD::SINT_TO_FP)
3189
VecVal = VecVal->getOperand(0);
3190
3191
if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3192
return false;
3193
3194
if (ImmNode.getOpcode() == ISD::BITCAST) {
3195
if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3196
return false;
3197
ImmNode = ImmNode.getOperand(0);
3198
}
3199
3200
if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3201
return false;
3202
3203
APFloat ImmAPF(0.0f);
3204
switch (ImmNode.getOpcode()) {
3205
case ARMISD::VMOVIMM:
3206
case ARMISD::VDUP: {
3207
if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3208
return false;
3209
unsigned Imm = ImmNode.getConstantOperandVal(0);
3210
if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3211
Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3212
ImmAPF =
3213
APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3214
APInt(ScalarBits, Imm));
3215
break;
3216
}
3217
case ARMISD::VMOVFPIMM: {
3218
ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3219
break;
3220
}
3221
default:
3222
return false;
3223
}
3224
3225
// Where n is the number of fractional bits, multiplying by 2^n will convert
3226
// from float to fixed and multiplying by 2^-n will convert from fixed to
3227
// float. Taking log2 of the factor (after taking the inverse in the case of
3228
// float to fixed) will give n.
3229
APFloat ToConvert = ImmAPF;
3230
if (FixedToFloat) {
3231
if (!ImmAPF.getExactInverse(&ToConvert))
3232
return false;
3233
}
3234
APSInt Converted(64, false);
3235
bool IsExact;
3236
ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
3237
&IsExact);
3238
if (!IsExact || !Converted.isPowerOf2())
3239
return false;
3240
3241
unsigned FracBits = Converted.logBase2();
3242
if (FracBits > ScalarBits)
3243
return false;
3244
3245
SmallVector<SDValue, 3> Ops{
3246
VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3247
AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3248
3249
unsigned int Opcode;
3250
switch (ScalarBits) {
3251
case 16:
3252
if (FixedToFloat)
3253
Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3254
else
3255
Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3256
break;
3257
case 32:
3258
if (FixedToFloat)
3259
Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3260
else
3261
Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3262
break;
3263
default:
3264
llvm_unreachable("unexpected number of scalar bits");
3265
break;
3266
}
3267
3268
ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3269
return true;
3270
}
3271
3272
bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3273
// Transform a floating-point to fixed-point conversion to a VCVT
3274
if (!Subtarget->hasMVEFloatOps())
3275
return false;
3276
EVT Type = N->getValueType(0);
3277
if (!Type.isVector())
3278
return false;
3279
unsigned int ScalarBits = Type.getScalarSizeInBits();
3280
3281
bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3282
N->getOpcode() == ISD::FP_TO_UINT_SAT;
3283
SDNode *Node = N->getOperand(0).getNode();
3284
3285
// floating-point to fixed-point with one fractional bit gets turned into an
3286
// FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3287
if (Node->getOpcode() == ISD::FADD) {
3288
if (Node->getOperand(0) != Node->getOperand(1))
3289
return false;
3290
SDNodeFlags Flags = Node->getFlags();
3291
// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3292
// allowed in 16 bit unsigned floats
3293
if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3294
return false;
3295
3296
unsigned Opcode;
3297
switch (ScalarBits) {
3298
case 16:
3299
Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3300
break;
3301
case 32:
3302
Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3303
break;
3304
}
3305
SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3306
CurDAG->getConstant(1, dl, MVT::i32)};
3307
AddEmptyMVEPredicateToOps(Ops, dl, Type);
3308
3309
ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3310
return true;
3311
}
3312
3313
if (Node->getOpcode() != ISD::FMUL)
3314
return false;
3315
3316
return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3317
}
3318
3319
bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3320
// Transform a fixed-point to floating-point conversion to a VCVT
3321
if (!Subtarget->hasMVEFloatOps())
3322
return false;
3323
auto Type = N->getValueType(0);
3324
if (!Type.isVector())
3325
return false;
3326
3327
auto LHS = N->getOperand(0);
3328
if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3329
return false;
3330
3331
return transformFixedFloatingPointConversion(
3332
N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3333
}
3334
3335
bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3336
if (!Subtarget->hasV6T2Ops())
3337
return false;
3338
3339
unsigned Opc = isSigned
3340
? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3341
: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3342
SDLoc dl(N);
3343
3344
// For unsigned extracts, check for a shift right and mask
3345
unsigned And_imm = 0;
3346
if (N->getOpcode() == ISD::AND) {
3347
if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3348
3349
// The immediate is a mask of the low bits iff imm & (imm+1) == 0
3350
if (And_imm & (And_imm + 1))
3351
return false;
3352
3353
unsigned Srl_imm = 0;
3354
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3355
Srl_imm)) {
3356
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3357
3358
// Mask off the unnecessary bits of the AND immediate; normally
3359
// DAGCombine will do this, but that might not happen if
3360
// targetShrinkDemandedConstant chooses a different immediate.
3361
And_imm &= -1U >> Srl_imm;
3362
3363
// Note: The width operand is encoded as width-1.
3364
unsigned Width = llvm::countr_one(And_imm) - 1;
3365
unsigned LSB = Srl_imm;
3366
3367
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3368
3369
if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3370
// It's cheaper to use a right shift to extract the top bits.
3371
if (Subtarget->isThumb()) {
3372
Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3373
SDValue Ops[] = { N->getOperand(0).getOperand(0),
3374
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3375
getAL(CurDAG, dl), Reg0, Reg0 };
3376
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3377
return true;
3378
}
3379
3380
// ARM models shift instructions as MOVsi with shifter operand.
3381
ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
3382
SDValue ShOpc =
3383
CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3384
MVT::i32);
3385
SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3386
getAL(CurDAG, dl), Reg0, Reg0 };
3387
CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3388
return true;
3389
}
3390
3391
assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3392
SDValue Ops[] = { N->getOperand(0).getOperand(0),
3393
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3394
CurDAG->getTargetConstant(Width, dl, MVT::i32),
3395
getAL(CurDAG, dl), Reg0 };
3396
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3397
return true;
3398
}
3399
}
3400
return false;
3401
}
3402
3403
// Otherwise, we're looking for a shift of a shift
3404
unsigned Shl_imm = 0;
3405
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3406
assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3407
unsigned Srl_imm = 0;
3408
if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3409
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3410
// Note: The width operand is encoded as width-1.
3411
unsigned Width = 32 - Srl_imm - 1;
3412
int LSB = Srl_imm - Shl_imm;
3413
if (LSB < 0)
3414
return false;
3415
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3416
assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3417
SDValue Ops[] = { N->getOperand(0).getOperand(0),
3418
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3419
CurDAG->getTargetConstant(Width, dl, MVT::i32),
3420
getAL(CurDAG, dl), Reg0 };
3421
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3422
return true;
3423
}
3424
}
3425
3426
// Or we are looking for a shift of an and, with a mask operand
3427
if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3428
isShiftedMask_32(And_imm)) {
3429
unsigned Srl_imm = 0;
3430
unsigned LSB = llvm::countr_zero(And_imm);
3431
// Shift must be the same as the ands lsb
3432
if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3433
assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3434
unsigned MSB = llvm::Log2_32(And_imm);
3435
// Note: The width operand is encoded as width-1.
3436
unsigned Width = MSB - LSB;
3437
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3438
assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3439
SDValue Ops[] = { N->getOperand(0).getOperand(0),
3440
CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3441
CurDAG->getTargetConstant(Width, dl, MVT::i32),
3442
getAL(CurDAG, dl), Reg0 };
3443
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3444
return true;
3445
}
3446
}
3447
3448
if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3449
unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3450
unsigned LSB = 0;
3451
if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3452
!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3453
return false;
3454
3455
if (LSB + Width > 32)
3456
return false;
3457
3458
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3459
assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3460
SDValue Ops[] = { N->getOperand(0).getOperand(0),
3461
CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3462
CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3463
getAL(CurDAG, dl), Reg0 };
3464
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3465
return true;
3466
}
3467
3468
return false;
3469
}
3470
3471
/// Target-specific DAG combining for ISD::SUB.
3472
/// Target-independent combining lowers SELECT_CC nodes of the form
3473
/// select_cc setg[ge] X, 0, X, -X
3474
/// select_cc setgt X, -1, X, -X
3475
/// select_cc setl[te] X, 0, -X, X
3476
/// select_cc setlt X, 1, -X, X
3477
/// which represent Integer ABS into:
3478
/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3479
/// ARM instruction selection detects the latter and matches it to
3480
/// ARM::ABS or ARM::t2ABS machine node.
3481
bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3482
SDValue SUBSrc0 = N->getOperand(0);
3483
SDValue SUBSrc1 = N->getOperand(1);
3484
EVT VT = N->getValueType(0);
3485
3486
if (Subtarget->isThumb1Only())
3487
return false;
3488
3489
if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3490
return false;
3491
3492
SDValue XORSrc0 = SUBSrc0.getOperand(0);
3493
SDValue XORSrc1 = SUBSrc0.getOperand(1);
3494
SDValue SRASrc0 = SUBSrc1.getOperand(0);
3495
SDValue SRASrc1 = SUBSrc1.getOperand(1);
3496
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
3497
EVT XType = SRASrc0.getValueType();
3498
unsigned Size = XType.getSizeInBits() - 1;
3499
3500
if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3501
SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3502
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3503
CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3504
return true;
3505
}
3506
3507
return false;
3508
}
3509
3510
/// We've got special pseudo-instructions for these
3511
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3512
unsigned Opcode;
3513
EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3514
if (MemTy == MVT::i8)
3515
Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3516
else if (MemTy == MVT::i16)
3517
Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3518
else if (MemTy == MVT::i32)
3519
Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3520
else
3521
llvm_unreachable("Unknown AtomicCmpSwap type");
3522
3523
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3524
N->getOperand(0)};
3525
SDNode *CmpSwap = CurDAG->getMachineNode(
3526
Opcode, SDLoc(N),
3527
CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3528
3529
MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3530
CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3531
3532
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3533
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3534
CurDAG->RemoveDeadNode(N);
3535
}
3536
3537
static std::optional<std::pair<unsigned, unsigned>>
3538
getContiguousRangeOfSetBits(const APInt &A) {
3539
unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3540
unsigned LastOne = A.countr_zero();
3541
if (A.popcount() != (FirstOne - LastOne + 1))
3542
return std::nullopt;
3543
return std::make_pair(FirstOne, LastOne);
3544
}
3545
3546
void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3547
assert(N->getOpcode() == ARMISD::CMPZ);
3548
SwitchEQNEToPLMI = false;
3549
3550
if (!Subtarget->isThumb())
3551
// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3552
// LSR don't exist as standalone instructions - they need the barrel shifter.
3553
return;
3554
3555
// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3556
SDValue And = N->getOperand(0);
3557
if (!And->hasOneUse())
3558
return;
3559
3560
SDValue Zero = N->getOperand(1);
3561
if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3562
return;
3563
SDValue X = And.getOperand(0);
3564
auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3565
3566
if (!C)
3567
return;
3568
auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3569
if (!Range)
3570
return;
3571
3572
// There are several ways to lower this:
3573
SDNode *NewN;
3574
SDLoc dl(N);
3575
3576
auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3577
if (Subtarget->isThumb2()) {
3578
Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3579
SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3580
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3581
CurDAG->getRegister(0, MVT::i32) };
3582
return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3583
} else {
3584
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3585
CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3586
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3587
return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3588
}
3589
};
3590
3591
if (Range->second == 0) {
3592
// 1. Mask includes the LSB -> Simply shift the top N bits off
3593
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3594
ReplaceNode(And.getNode(), NewN);
3595
} else if (Range->first == 31) {
3596
// 2. Mask includes the MSB -> Simply shift the bottom N bits off
3597
NewN = EmitShift(ARM::tLSRri, X, Range->second);
3598
ReplaceNode(And.getNode(), NewN);
3599
} else if (Range->first == Range->second) {
3600
// 3. Only one bit is set. We can shift this into the sign bit and use a
3601
// PL/MI comparison.
3602
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3603
ReplaceNode(And.getNode(), NewN);
3604
3605
SwitchEQNEToPLMI = true;
3606
} else if (!Subtarget->hasV6T2Ops()) {
3607
// 4. Do a double shift to clear bottom and top bits, but only in
3608
// thumb-1 mode as in thumb-2 we can use UBFX.
3609
NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3610
NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3611
Range->second + (31 - Range->first));
3612
ReplaceNode(And.getNode(), NewN);
3613
}
3614
}
3615
3616
static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3617
unsigned Opc128[3]) {
3618
assert((VT.is64BitVector() || VT.is128BitVector()) &&
3619
"Unexpected vector shuffle length");
3620
switch (VT.getScalarSizeInBits()) {
3621
default:
3622
llvm_unreachable("Unexpected vector shuffle element size");
3623
case 8:
3624
return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3625
case 16:
3626
return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3627
case 32:
3628
return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3629
}
3630
}
3631
3632
void ARMDAGToDAGISel::Select(SDNode *N) {
3633
SDLoc dl(N);
3634
3635
if (N->isMachineOpcode()) {
3636
N->setNodeId(-1);
3637
return; // Already selected.
3638
}
3639
3640
switch (N->getOpcode()) {
3641
default: break;
3642
case ISD::STORE: {
3643
// For Thumb1, match an sp-relative store in C++. This is a little
3644
// unfortunate, but I don't think I can make the chain check work
3645
// otherwise. (The chain of the store has to be the same as the chain
3646
// of the CopyFromReg, or else we can't replace the CopyFromReg with
3647
// a direct reference to "SP".)
3648
//
3649
// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3650
// a different addressing mode from other four-byte stores.
3651
//
3652
// This pattern usually comes up with call arguments.
3653
StoreSDNode *ST = cast<StoreSDNode>(N);
3654
SDValue Ptr = ST->getBasePtr();
3655
if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3656
int RHSC = 0;
3657
if (Ptr.getOpcode() == ISD::ADD &&
3658
isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3659
Ptr = Ptr.getOperand(0);
3660
3661
if (Ptr.getOpcode() == ISD::CopyFromReg &&
3662
cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3663
Ptr.getOperand(0) == ST->getChain()) {
3664
SDValue Ops[] = {ST->getValue(),
3665
CurDAG->getRegister(ARM::SP, MVT::i32),
3666
CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3667
getAL(CurDAG, dl),
3668
CurDAG->getRegister(0, MVT::i32),
3669
ST->getChain()};
3670
MachineSDNode *ResNode =
3671
CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3672
MachineMemOperand *MemOp = ST->getMemOperand();
3673
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3674
ReplaceNode(N, ResNode);
3675
return;
3676
}
3677
}
3678
break;
3679
}
3680
case ISD::WRITE_REGISTER:
3681
if (tryWriteRegister(N))
3682
return;
3683
break;
3684
case ISD::READ_REGISTER:
3685
if (tryReadRegister(N))
3686
return;
3687
break;
3688
case ISD::INLINEASM:
3689
case ISD::INLINEASM_BR:
3690
if (tryInlineAsm(N))
3691
return;
3692
break;
3693
case ISD::SUB:
3694
// Select special operations if SUB node forms integer ABS pattern
3695
if (tryABSOp(N))
3696
return;
3697
// Other cases are autogenerated.
3698
break;
3699
case ISD::Constant: {
3700
unsigned Val = N->getAsZExtVal();
3701
// If we can't materialize the constant we need to use a literal pool
3702
if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3703
!Subtarget->genExecuteOnly()) {
3704
SDValue CPIdx = CurDAG->getTargetConstantPool(
3705
ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3706
TLI->getPointerTy(CurDAG->getDataLayout()));
3707
3708
SDNode *ResNode;
3709
if (Subtarget->isThumb()) {
3710
SDValue Ops[] = {
3711
CPIdx,
3712
getAL(CurDAG, dl),
3713
CurDAG->getRegister(0, MVT::i32),
3714
CurDAG->getEntryNode()
3715
};
3716
ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3717
Ops);
3718
} else {
3719
SDValue Ops[] = {
3720
CPIdx,
3721
CurDAG->getTargetConstant(0, dl, MVT::i32),
3722
getAL(CurDAG, dl),
3723
CurDAG->getRegister(0, MVT::i32),
3724
CurDAG->getEntryNode()
3725
};
3726
ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3727
Ops);
3728
}
3729
// Annotate the Node with memory operand information so that MachineInstr
3730
// queries work properly. This e.g. gives the register allocation the
3731
// required information for rematerialization.
3732
MachineFunction& MF = CurDAG->getMachineFunction();
3733
MachineMemOperand *MemOp =
3734
MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3735
MachineMemOperand::MOLoad, 4, Align(4));
3736
3737
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3738
3739
ReplaceNode(N, ResNode);
3740
return;
3741
}
3742
3743
// Other cases are autogenerated.
3744
break;
3745
}
3746
case ISD::FrameIndex: {
3747
// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3748
int FI = cast<FrameIndexSDNode>(N)->getIndex();
3749
SDValue TFI = CurDAG->getTargetFrameIndex(
3750
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3751
if (Subtarget->isThumb1Only()) {
3752
// Set the alignment of the frame object to 4, to avoid having to generate
3753
// more than one ADD
3754
MachineFrameInfo &MFI = MF->getFrameInfo();
3755
if (MFI.getObjectAlign(FI) < Align(4))
3756
MFI.setObjectAlignment(FI, Align(4));
3757
CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3758
CurDAG->getTargetConstant(0, dl, MVT::i32));
3759
return;
3760
} else {
3761
unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3762
ARM::t2ADDri : ARM::ADDri);
3763
SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3764
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3765
CurDAG->getRegister(0, MVT::i32) };
3766
CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3767
return;
3768
}
3769
}
3770
case ISD::INSERT_VECTOR_ELT: {
3771
if (tryInsertVectorElt(N))
3772
return;
3773
break;
3774
}
3775
case ISD::SRL:
3776
if (tryV6T2BitfieldExtractOp(N, false))
3777
return;
3778
break;
3779
case ISD::SIGN_EXTEND_INREG:
3780
case ISD::SRA:
3781
if (tryV6T2BitfieldExtractOp(N, true))
3782
return;
3783
break;
3784
case ISD::FP_TO_UINT:
3785
case ISD::FP_TO_SINT:
3786
case ISD::FP_TO_UINT_SAT:
3787
case ISD::FP_TO_SINT_SAT:
3788
if (tryFP_TO_INT(N, dl))
3789
return;
3790
break;
3791
case ISD::FMUL:
3792
if (tryFMULFixed(N, dl))
3793
return;
3794
break;
3795
case ISD::MUL:
3796
if (Subtarget->isThumb1Only())
3797
break;
3798
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3799
unsigned RHSV = C->getZExtValue();
3800
if (!RHSV) break;
3801
if (isPowerOf2_32(RHSV-1)) { // 2^n+1?
3802
unsigned ShImm = Log2_32(RHSV-1);
3803
if (ShImm >= 32)
3804
break;
3805
SDValue V = N->getOperand(0);
3806
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3807
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3808
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3809
if (Subtarget->isThumb()) {
3810
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3811
CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3812
return;
3813
} else {
3814
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3815
Reg0 };
3816
CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3817
return;
3818
}
3819
}
3820
if (isPowerOf2_32(RHSV+1)) { // 2^n-1?
3821
unsigned ShImm = Log2_32(RHSV+1);
3822
if (ShImm >= 32)
3823
break;
3824
SDValue V = N->getOperand(0);
3825
ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3826
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3827
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3828
if (Subtarget->isThumb()) {
3829
SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3830
CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3831
return;
3832
} else {
3833
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3834
Reg0 };
3835
CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3836
return;
3837
}
3838
}
3839
}
3840
break;
3841
case ISD::AND: {
3842
// Check for unsigned bitfield extract
3843
if (tryV6T2BitfieldExtractOp(N, false))
3844
return;
3845
3846
// If an immediate is used in an AND node, it is possible that the immediate
3847
// can be more optimally materialized when negated. If this is the case we
3848
// can negate the immediate and use a BIC instead.
3849
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3850
if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3851
uint32_t Imm = (uint32_t) N1C->getZExtValue();
3852
3853
// In Thumb2 mode, an AND can take a 12-bit immediate. If this
3854
// immediate can be negated and fit in the immediate operand of
3855
// a t2BIC, don't do any manual transform here as this can be
3856
// handled by the generic ISel machinery.
3857
bool PreferImmediateEncoding =
3858
Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3859
if (!PreferImmediateEncoding &&
3860
ConstantMaterializationCost(Imm, Subtarget) >
3861
ConstantMaterializationCost(~Imm, Subtarget)) {
3862
// The current immediate costs more to materialize than a negated
3863
// immediate, so negate the immediate and use a BIC.
3864
SDValue NewImm =
3865
CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
3866
// If the new constant didn't exist before, reposition it in the topological
3867
// ordering so it is just before N. Otherwise, don't touch its location.
3868
if (NewImm->getNodeId() == -1)
3869
CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3870
3871
if (!Subtarget->hasThumb2()) {
3872
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3873
N->getOperand(0), NewImm, getAL(CurDAG, dl),
3874
CurDAG->getRegister(0, MVT::i32)};
3875
ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3876
return;
3877
} else {
3878
SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3879
CurDAG->getRegister(0, MVT::i32),
3880
CurDAG->getRegister(0, MVT::i32)};
3881
ReplaceNode(N,
3882
CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3883
return;
3884
}
3885
}
3886
}
3887
3888
// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3889
// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3890
// are entirely contributed by c2 and lower 16-bits are entirely contributed
3891
// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3892
// Select it to: "movt x, ((c1 & 0xffff) >> 16)
3893
EVT VT = N->getValueType(0);
3894
if (VT != MVT::i32)
3895
break;
3896
unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3897
? ARM::t2MOVTi16
3898
: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3899
if (!Opc)
3900
break;
3901
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3902
N1C = dyn_cast<ConstantSDNode>(N1);
3903
if (!N1C)
3904
break;
3905
if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3906
SDValue N2 = N0.getOperand(1);
3907
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3908
if (!N2C)
3909
break;
3910
unsigned N1CVal = N1C->getZExtValue();
3911
unsigned N2CVal = N2C->getZExtValue();
3912
if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3913
(N1CVal & 0xffffU) == 0xffffU &&
3914
(N2CVal & 0xffffU) == 0x0U) {
3915
SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3916
dl, MVT::i32);
3917
SDValue Ops[] = { N0.getOperand(0), Imm16,
3918
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3919
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3920
return;
3921
}
3922
}
3923
3924
break;
3925
}
3926
case ARMISD::UMAAL: {
3927
unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3928
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3929
N->getOperand(2), N->getOperand(3),
3930
getAL(CurDAG, dl),
3931
CurDAG->getRegister(0, MVT::i32) };
3932
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3933
return;
3934
}
3935
case ARMISD::UMLAL:{
3936
if (Subtarget->isThumb()) {
3937
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3938
N->getOperand(3), getAL(CurDAG, dl),
3939
CurDAG->getRegister(0, MVT::i32)};
3940
ReplaceNode(
3941
N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3942
return;
3943
}else{
3944
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3945
N->getOperand(3), getAL(CurDAG, dl),
3946
CurDAG->getRegister(0, MVT::i32),
3947
CurDAG->getRegister(0, MVT::i32) };
3948
ReplaceNode(N, CurDAG->getMachineNode(
3949
Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3950
MVT::i32, MVT::i32, Ops));
3951
return;
3952
}
3953
}
3954
case ARMISD::SMLAL:{
3955
if (Subtarget->isThumb()) {
3956
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3957
N->getOperand(3), getAL(CurDAG, dl),
3958
CurDAG->getRegister(0, MVT::i32)};
3959
ReplaceNode(
3960
N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3961
return;
3962
}else{
3963
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3964
N->getOperand(3), getAL(CurDAG, dl),
3965
CurDAG->getRegister(0, MVT::i32),
3966
CurDAG->getRegister(0, MVT::i32) };
3967
ReplaceNode(N, CurDAG->getMachineNode(
3968
Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3969
MVT::i32, MVT::i32, Ops));
3970
return;
3971
}
3972
}
3973
case ARMISD::SUBE: {
3974
if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3975
break;
3976
// Look for a pattern to match SMMLS
3977
// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3978
if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3979
N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3980
!SDValue(N, 1).use_empty())
3981
break;
3982
3983
if (Subtarget->isThumb())
3984
assert(Subtarget->hasThumb2() &&
3985
"This pattern should not be generated for Thumb");
3986
3987
SDValue SmulLoHi = N->getOperand(1);
3988
SDValue Subc = N->getOperand(2);
3989
SDValue Zero = Subc.getOperand(0);
3990
3991
if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3992
N->getOperand(1) != SmulLoHi.getValue(1) ||
3993
N->getOperand(2) != Subc.getValue(1))
3994
break;
3995
3996
unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3997
SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3998
N->getOperand(0), getAL(CurDAG, dl),
3999
CurDAG->getRegister(0, MVT::i32) };
4000
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
4001
return;
4002
}
4003
case ISD::LOAD: {
4004
if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4005
return;
4006
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4007
if (tryT2IndexedLoad(N))
4008
return;
4009
} else if (Subtarget->isThumb()) {
4010
if (tryT1IndexedLoad(N))
4011
return;
4012
} else if (tryARMIndexedLoad(N))
4013
return;
4014
// Other cases are autogenerated.
4015
break;
4016
}
4017
case ISD::MLOAD:
4018
if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4019
return;
4020
// Other cases are autogenerated.
4021
break;
4022
case ARMISD::WLSSETUP: {
4023
SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4024
N->getOperand(0));
4025
ReplaceUses(N, New);
4026
CurDAG->RemoveDeadNode(N);
4027
return;
4028
}
4029
case ARMISD::WLS: {
4030
SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4031
N->getOperand(1), N->getOperand(2),
4032
N->getOperand(0));
4033
ReplaceUses(N, New);
4034
CurDAG->RemoveDeadNode(N);
4035
return;
4036
}
4037
case ARMISD::LE: {
4038
SDValue Ops[] = { N->getOperand(1),
4039
N->getOperand(2),
4040
N->getOperand(0) };
4041
unsigned Opc = ARM::t2LoopEnd;
4042
SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4043
ReplaceUses(N, New);
4044
CurDAG->RemoveDeadNode(N);
4045
return;
4046
}
4047
case ARMISD::LDRD: {
4048
if (Subtarget->isThumb2())
4049
break; // TableGen handles isel in this case.
4050
SDValue Base, RegOffset, ImmOffset;
4051
const SDValue &Chain = N->getOperand(0);
4052
const SDValue &Addr = N->getOperand(1);
4053
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4054
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4055
// The register-offset variant of LDRD mandates that the register
4056
// allocated to RegOffset is not reused in any of the remaining operands.
4057
// This restriction is currently not enforced. Therefore emitting this
4058
// variant is explicitly avoided.
4059
Base = Addr;
4060
RegOffset = CurDAG->getRegister(0, MVT::i32);
4061
}
4062
SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4063
SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4064
{MVT::Untyped, MVT::Other}, Ops);
4065
SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4066
SDValue(New, 0));
4067
SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4068
SDValue(New, 0));
4069
transferMemOperands(N, New);
4070
ReplaceUses(SDValue(N, 0), Lo);
4071
ReplaceUses(SDValue(N, 1), Hi);
4072
ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4073
CurDAG->RemoveDeadNode(N);
4074
return;
4075
}
4076
case ARMISD::STRD: {
4077
if (Subtarget->isThumb2())
4078
break; // TableGen handles isel in this case.
4079
SDValue Base, RegOffset, ImmOffset;
4080
const SDValue &Chain = N->getOperand(0);
4081
const SDValue &Addr = N->getOperand(3);
4082
SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4083
if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4084
// The register-offset variant of STRD mandates that the register
4085
// allocated to RegOffset is not reused in any of the remaining operands.
4086
// This restriction is currently not enforced. Therefore emitting this
4087
// variant is explicitly avoided.
4088
Base = Addr;
4089
RegOffset = CurDAG->getRegister(0, MVT::i32);
4090
}
4091
SDNode *RegPair =
4092
createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4093
SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4094
SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4095
transferMemOperands(N, New);
4096
ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4097
CurDAG->RemoveDeadNode(N);
4098
return;
4099
}
4100
case ARMISD::LOOP_DEC: {
4101
SDValue Ops[] = { N->getOperand(1),
4102
N->getOperand(2),
4103
N->getOperand(0) };
4104
SDNode *Dec =
4105
CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4106
CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4107
ReplaceUses(N, Dec);
4108
CurDAG->RemoveDeadNode(N);
4109
return;
4110
}
4111
case ARMISD::BRCOND: {
4112
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4113
// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4114
// Pattern complexity = 6 cost = 1 size = 0
4115
4116
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4117
// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4118
// Pattern complexity = 6 cost = 1 size = 0
4119
4120
// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4121
// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4122
// Pattern complexity = 6 cost = 1 size = 0
4123
4124
unsigned Opc = Subtarget->isThumb() ?
4125
((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4126
SDValue Chain = N->getOperand(0);
4127
SDValue N1 = N->getOperand(1);
4128
SDValue N2 = N->getOperand(2);
4129
SDValue N3 = N->getOperand(3);
4130
SDValue InGlue = N->getOperand(4);
4131
assert(N1.getOpcode() == ISD::BasicBlock);
4132
assert(N2.getOpcode() == ISD::Constant);
4133
assert(N3.getOpcode() == ISD::Register);
4134
4135
unsigned CC = (unsigned)N2->getAsZExtVal();
4136
4137
if (InGlue.getOpcode() == ARMISD::CMPZ) {
4138
if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4139
SDValue Int = InGlue.getOperand(0);
4140
uint64_t ID = Int->getConstantOperandVal(1);
4141
4142
// Handle low-overhead loops.
4143
if (ID == Intrinsic::loop_decrement_reg) {
4144
SDValue Elements = Int.getOperand(2);
4145
SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4146
dl, MVT::i32);
4147
4148
SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4149
SDNode *LoopDec =
4150
CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4151
CurDAG->getVTList(MVT::i32, MVT::Other),
4152
Args);
4153
ReplaceUses(Int.getNode(), LoopDec);
4154
4155
SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4156
SDNode *LoopEnd =
4157
CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4158
4159
ReplaceUses(N, LoopEnd);
4160
CurDAG->RemoveDeadNode(N);
4161
CurDAG->RemoveDeadNode(InGlue.getNode());
4162
CurDAG->RemoveDeadNode(Int.getNode());
4163
return;
4164
}
4165
}
4166
4167
bool SwitchEQNEToPLMI;
4168
SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4169
InGlue = N->getOperand(4);
4170
4171
if (SwitchEQNEToPLMI) {
4172
switch ((ARMCC::CondCodes)CC) {
4173
default: llvm_unreachable("CMPZ must be either NE or EQ!");
4174
case ARMCC::NE:
4175
CC = (unsigned)ARMCC::MI;
4176
break;
4177
case ARMCC::EQ:
4178
CC = (unsigned)ARMCC::PL;
4179
break;
4180
}
4181
}
4182
}
4183
4184
SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4185
SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };
4186
SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
4187
MVT::Glue, Ops);
4188
Chain = SDValue(ResNode, 0);
4189
if (N->getNumValues() == 2) {
4190
InGlue = SDValue(ResNode, 1);
4191
ReplaceUses(SDValue(N, 1), InGlue);
4192
}
4193
ReplaceUses(SDValue(N, 0),
4194
SDValue(Chain.getNode(), Chain.getResNo()));
4195
CurDAG->RemoveDeadNode(N);
4196
return;
4197
}
4198
4199
case ARMISD::CMPZ: {
4200
// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4201
// This allows us to avoid materializing the expensive negative constant.
4202
// The CMPZ #0 is useless and will be peepholed away but we need to keep it
4203
// for its glue output.
4204
SDValue X = N->getOperand(0);
4205
auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4206
if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4207
int64_t Addend = -C->getSExtValue();
4208
4209
SDNode *Add = nullptr;
4210
// ADDS can be better than CMN if the immediate fits in a
4211
// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4212
// Outside that range we can just use a CMN which is 32-bit but has a
4213
// 12-bit immediate range.
4214
if (Addend < 1<<8) {
4215
if (Subtarget->isThumb2()) {
4216
SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4217
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4218
CurDAG->getRegister(0, MVT::i32) };
4219
Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4220
} else {
4221
unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4222
SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4223
CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4224
getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4225
Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4226
}
4227
}
4228
if (Add) {
4229
SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4230
CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
4231
}
4232
}
4233
// Other cases are autogenerated.
4234
break;
4235
}
4236
4237
case ARMISD::CMOV: {
4238
SDValue InGlue = N->getOperand(4);
4239
4240
if (InGlue.getOpcode() == ARMISD::CMPZ) {
4241
bool SwitchEQNEToPLMI;
4242
SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);
4243
4244
if (SwitchEQNEToPLMI) {
4245
SDValue ARMcc = N->getOperand(2);
4246
ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4247
4248
switch (CC) {
4249
default: llvm_unreachable("CMPZ must be either NE or EQ!");
4250
case ARMCC::NE:
4251
CC = ARMCC::MI;
4252
break;
4253
case ARMCC::EQ:
4254
CC = ARMCC::PL;
4255
break;
4256
}
4257
SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4258
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4259
N->getOperand(3), N->getOperand(4)};
4260
CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4261
}
4262
4263
}
4264
// Other cases are autogenerated.
4265
break;
4266
}
4267
case ARMISD::VZIP: {
4268
EVT VT = N->getValueType(0);
4269
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4270
unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4271
unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4272
unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4273
SDValue Pred = getAL(CurDAG, dl);
4274
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4275
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4276
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4277
return;
4278
}
4279
case ARMISD::VUZP: {
4280
EVT VT = N->getValueType(0);
4281
// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4282
unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4283
unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4284
unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4285
SDValue Pred = getAL(CurDAG, dl);
4286
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4287
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4288
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4289
return;
4290
}
4291
case ARMISD::VTRN: {
4292
EVT VT = N->getValueType(0);
4293
unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4294
unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4295
unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4296
SDValue Pred = getAL(CurDAG, dl);
4297
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4298
SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4299
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4300
return;
4301
}
4302
case ARMISD::BUILD_VECTOR: {
4303
EVT VecVT = N->getValueType(0);
4304
EVT EltVT = VecVT.getVectorElementType();
4305
unsigned NumElts = VecVT.getVectorNumElements();
4306
if (EltVT == MVT::f64) {
4307
assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4308
ReplaceNode(
4309
N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4310
return;
4311
}
4312
assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4313
if (NumElts == 2) {
4314
ReplaceNode(
4315
N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4316
return;
4317
}
4318
assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4319
ReplaceNode(N,
4320
createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4321
N->getOperand(2), N->getOperand(3)));
4322
return;
4323
}
4324
4325
case ARMISD::VLD1DUP: {
4326
static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4327
ARM::VLD1DUPd32 };
4328
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4329
ARM::VLD1DUPq32 };
4330
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4331
return;
4332
}
4333
4334
case ARMISD::VLD2DUP: {
4335
static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4336
ARM::VLD2DUPd32 };
4337
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4338
return;
4339
}
4340
4341
case ARMISD::VLD3DUP: {
4342
static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4343
ARM::VLD3DUPd16Pseudo,
4344
ARM::VLD3DUPd32Pseudo };
4345
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4346
return;
4347
}
4348
4349
case ARMISD::VLD4DUP: {
4350
static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4351
ARM::VLD4DUPd16Pseudo,
4352
ARM::VLD4DUPd32Pseudo };
4353
SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4354
return;
4355
}
4356
4357
case ARMISD::VLD1DUP_UPD: {
4358
static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4359
ARM::VLD1DUPd16wb_fixed,
4360
ARM::VLD1DUPd32wb_fixed };
4361
static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4362
ARM::VLD1DUPq16wb_fixed,
4363
ARM::VLD1DUPq32wb_fixed };
4364
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4365
return;
4366
}
4367
4368
case ARMISD::VLD2DUP_UPD: {
4369
static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4370
ARM::VLD2DUPd16wb_fixed,
4371
ARM::VLD2DUPd32wb_fixed,
4372
ARM::VLD1q64wb_fixed };
4373
static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4374
ARM::VLD2DUPq16EvenPseudo,
4375
ARM::VLD2DUPq32EvenPseudo };
4376
static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4377
ARM::VLD2DUPq16OddPseudoWB_fixed,
4378
ARM::VLD2DUPq32OddPseudoWB_fixed };
4379
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4380
return;
4381
}
4382
4383
case ARMISD::VLD3DUP_UPD: {
4384
static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4385
ARM::VLD3DUPd16Pseudo_UPD,
4386
ARM::VLD3DUPd32Pseudo_UPD,
4387
ARM::VLD1d64TPseudoWB_fixed };
4388
static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4389
ARM::VLD3DUPq16EvenPseudo,
4390
ARM::VLD3DUPq32EvenPseudo };
4391
static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4392
ARM::VLD3DUPq16OddPseudo_UPD,
4393
ARM::VLD3DUPq32OddPseudo_UPD };
4394
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4395
return;
4396
}
4397
4398
case ARMISD::VLD4DUP_UPD: {
4399
static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4400
ARM::VLD4DUPd16Pseudo_UPD,
4401
ARM::VLD4DUPd32Pseudo_UPD,
4402
ARM::VLD1d64QPseudoWB_fixed };
4403
static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4404
ARM::VLD4DUPq16EvenPseudo,
4405
ARM::VLD4DUPq32EvenPseudo };
4406
static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4407
ARM::VLD4DUPq16OddPseudo_UPD,
4408
ARM::VLD4DUPq32OddPseudo_UPD };
4409
SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4410
return;
4411
}
4412
4413
case ARMISD::VLD1_UPD: {
4414
static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4415
ARM::VLD1d16wb_fixed,
4416
ARM::VLD1d32wb_fixed,
4417
ARM::VLD1d64wb_fixed };
4418
static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4419
ARM::VLD1q16wb_fixed,
4420
ARM::VLD1q32wb_fixed,
4421
ARM::VLD1q64wb_fixed };
4422
SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4423
return;
4424
}
4425
4426
case ARMISD::VLD2_UPD: {
4427
if (Subtarget->hasNEON()) {
4428
static const uint16_t DOpcodes[] = {
4429
ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4430
ARM::VLD1q64wb_fixed};
4431
static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4432
ARM::VLD2q16PseudoWB_fixed,
4433
ARM::VLD2q32PseudoWB_fixed};
4434
SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4435
} else {
4436
static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4437
ARM::MVE_VLD21_8_wb};
4438
static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4439
ARM::MVE_VLD21_16_wb};
4440
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4441
ARM::MVE_VLD21_32_wb};
4442
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4443
SelectMVE_VLD(N, 2, Opcodes, true);
4444
}
4445
return;
4446
}
4447
4448
case ARMISD::VLD3_UPD: {
4449
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4450
ARM::VLD3d16Pseudo_UPD,
4451
ARM::VLD3d32Pseudo_UPD,
4452
ARM::VLD1d64TPseudoWB_fixed};
4453
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4454
ARM::VLD3q16Pseudo_UPD,
4455
ARM::VLD3q32Pseudo_UPD };
4456
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4457
ARM::VLD3q16oddPseudo_UPD,
4458
ARM::VLD3q32oddPseudo_UPD };
4459
SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4460
return;
4461
}
4462
4463
case ARMISD::VLD4_UPD: {
4464
if (Subtarget->hasNEON()) {
4465
static const uint16_t DOpcodes[] = {
4466
ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4467
ARM::VLD1d64QPseudoWB_fixed};
4468
static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4469
ARM::VLD4q16Pseudo_UPD,
4470
ARM::VLD4q32Pseudo_UPD};
4471
static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4472
ARM::VLD4q16oddPseudo_UPD,
4473
ARM::VLD4q32oddPseudo_UPD};
4474
SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4475
} else {
4476
static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4477
ARM::MVE_VLD42_8,
4478
ARM::MVE_VLD43_8_wb};
4479
static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4480
ARM::MVE_VLD42_16,
4481
ARM::MVE_VLD43_16_wb};
4482
static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4483
ARM::MVE_VLD42_32,
4484
ARM::MVE_VLD43_32_wb};
4485
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4486
SelectMVE_VLD(N, 4, Opcodes, true);
4487
}
4488
return;
4489
}
4490
4491
case ARMISD::VLD1x2_UPD: {
4492
if (Subtarget->hasNEON()) {
4493
static const uint16_t DOpcodes[] = {
4494
ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4495
ARM::VLD1q64wb_fixed};
4496
static const uint16_t QOpcodes[] = {
4497
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4498
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4499
SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4500
return;
4501
}
4502
break;
4503
}
4504
4505
case ARMISD::VLD1x3_UPD: {
4506
if (Subtarget->hasNEON()) {
4507
static const uint16_t DOpcodes[] = {
4508
ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4509
ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4510
static const uint16_t QOpcodes0[] = {
4511
ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4512
ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4513
static const uint16_t QOpcodes1[] = {
4514
ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4515
ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4516
SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4517
return;
4518
}
4519
break;
4520
}
4521
4522
case ARMISD::VLD1x4_UPD: {
4523
if (Subtarget->hasNEON()) {
4524
static const uint16_t DOpcodes[] = {
4525
ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4526
ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4527
static const uint16_t QOpcodes0[] = {
4528
ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4529
ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4530
static const uint16_t QOpcodes1[] = {
4531
ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4532
ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4533
SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4534
return;
4535
}
4536
break;
4537
}
4538
4539
case ARMISD::VLD2LN_UPD: {
4540
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4541
ARM::VLD2LNd16Pseudo_UPD,
4542
ARM::VLD2LNd32Pseudo_UPD };
4543
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4544
ARM::VLD2LNq32Pseudo_UPD };
4545
SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4546
return;
4547
}
4548
4549
case ARMISD::VLD3LN_UPD: {
4550
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4551
ARM::VLD3LNd16Pseudo_UPD,
4552
ARM::VLD3LNd32Pseudo_UPD };
4553
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4554
ARM::VLD3LNq32Pseudo_UPD };
4555
SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4556
return;
4557
}
4558
4559
case ARMISD::VLD4LN_UPD: {
4560
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4561
ARM::VLD4LNd16Pseudo_UPD,
4562
ARM::VLD4LNd32Pseudo_UPD };
4563
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4564
ARM::VLD4LNq32Pseudo_UPD };
4565
SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4566
return;
4567
}
4568
4569
case ARMISD::VST1_UPD: {
4570
static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4571
ARM::VST1d16wb_fixed,
4572
ARM::VST1d32wb_fixed,
4573
ARM::VST1d64wb_fixed };
4574
static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4575
ARM::VST1q16wb_fixed,
4576
ARM::VST1q32wb_fixed,
4577
ARM::VST1q64wb_fixed };
4578
SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4579
return;
4580
}
4581
4582
case ARMISD::VST2_UPD: {
4583
if (Subtarget->hasNEON()) {
4584
static const uint16_t DOpcodes[] = {
4585
ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4586
ARM::VST1q64wb_fixed};
4587
static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4588
ARM::VST2q16PseudoWB_fixed,
4589
ARM::VST2q32PseudoWB_fixed};
4590
SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4591
return;
4592
}
4593
break;
4594
}
4595
4596
case ARMISD::VST3_UPD: {
4597
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4598
ARM::VST3d16Pseudo_UPD,
4599
ARM::VST3d32Pseudo_UPD,
4600
ARM::VST1d64TPseudoWB_fixed};
4601
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4602
ARM::VST3q16Pseudo_UPD,
4603
ARM::VST3q32Pseudo_UPD };
4604
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4605
ARM::VST3q16oddPseudo_UPD,
4606
ARM::VST3q32oddPseudo_UPD };
4607
SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4608
return;
4609
}
4610
4611
case ARMISD::VST4_UPD: {
4612
if (Subtarget->hasNEON()) {
4613
static const uint16_t DOpcodes[] = {
4614
ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4615
ARM::VST1d64QPseudoWB_fixed};
4616
static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4617
ARM::VST4q16Pseudo_UPD,
4618
ARM::VST4q32Pseudo_UPD};
4619
static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4620
ARM::VST4q16oddPseudo_UPD,
4621
ARM::VST4q32oddPseudo_UPD};
4622
SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4623
return;
4624
}
4625
break;
4626
}
4627
4628
case ARMISD::VST1x2_UPD: {
4629
if (Subtarget->hasNEON()) {
4630
static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4631
ARM::VST1q16wb_fixed,
4632
ARM::VST1q32wb_fixed,
4633
ARM::VST1q64wb_fixed};
4634
static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4635
ARM::VST1d16QPseudoWB_fixed,
4636
ARM::VST1d32QPseudoWB_fixed,
4637
ARM::VST1d64QPseudoWB_fixed };
4638
SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4639
return;
4640
}
4641
break;
4642
}
4643
4644
case ARMISD::VST1x3_UPD: {
4645
if (Subtarget->hasNEON()) {
4646
static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4647
ARM::VST1d16TPseudoWB_fixed,
4648
ARM::VST1d32TPseudoWB_fixed,
4649
ARM::VST1d64TPseudoWB_fixed };
4650
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4651
ARM::VST1q16LowTPseudo_UPD,
4652
ARM::VST1q32LowTPseudo_UPD,
4653
ARM::VST1q64LowTPseudo_UPD };
4654
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4655
ARM::VST1q16HighTPseudo_UPD,
4656
ARM::VST1q32HighTPseudo_UPD,
4657
ARM::VST1q64HighTPseudo_UPD };
4658
SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4659
return;
4660
}
4661
break;
4662
}
4663
4664
case ARMISD::VST1x4_UPD: {
4665
if (Subtarget->hasNEON()) {
4666
static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4667
ARM::VST1d16QPseudoWB_fixed,
4668
ARM::VST1d32QPseudoWB_fixed,
4669
ARM::VST1d64QPseudoWB_fixed };
4670
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4671
ARM::VST1q16LowQPseudo_UPD,
4672
ARM::VST1q32LowQPseudo_UPD,
4673
ARM::VST1q64LowQPseudo_UPD };
4674
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4675
ARM::VST1q16HighQPseudo_UPD,
4676
ARM::VST1q32HighQPseudo_UPD,
4677
ARM::VST1q64HighQPseudo_UPD };
4678
SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4679
return;
4680
}
4681
break;
4682
}
4683
case ARMISD::VST2LN_UPD: {
4684
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4685
ARM::VST2LNd16Pseudo_UPD,
4686
ARM::VST2LNd32Pseudo_UPD };
4687
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4688
ARM::VST2LNq32Pseudo_UPD };
4689
SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4690
return;
4691
}
4692
4693
case ARMISD::VST3LN_UPD: {
4694
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4695
ARM::VST3LNd16Pseudo_UPD,
4696
ARM::VST3LNd32Pseudo_UPD };
4697
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4698
ARM::VST3LNq32Pseudo_UPD };
4699
SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4700
return;
4701
}
4702
4703
case ARMISD::VST4LN_UPD: {
4704
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4705
ARM::VST4LNd16Pseudo_UPD,
4706
ARM::VST4LNd32Pseudo_UPD };
4707
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4708
ARM::VST4LNq32Pseudo_UPD };
4709
SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4710
return;
4711
}
4712
4713
case ISD::INTRINSIC_VOID:
4714
case ISD::INTRINSIC_W_CHAIN: {
4715
unsigned IntNo = N->getConstantOperandVal(1);
4716
switch (IntNo) {
4717
default:
4718
break;
4719
4720
case Intrinsic::arm_mrrc:
4721
case Intrinsic::arm_mrrc2: {
4722
SDLoc dl(N);
4723
SDValue Chain = N->getOperand(0);
4724
unsigned Opc;
4725
4726
if (Subtarget->isThumb())
4727
Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4728
else
4729
Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4730
4731
SmallVector<SDValue, 5> Ops;
4732
Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4733
Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4734
Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4735
4736
// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4737
// instruction will always be '1111' but it is possible in assembly language to specify
4738
// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4739
if (Opc != ARM::MRRC2) {
4740
Ops.push_back(getAL(CurDAG, dl));
4741
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4742
}
4743
4744
Ops.push_back(Chain);
4745
4746
// Writes to two registers.
4747
const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4748
4749
ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4750
return;
4751
}
4752
case Intrinsic::arm_ldaexd:
4753
case Intrinsic::arm_ldrexd: {
4754
SDLoc dl(N);
4755
SDValue Chain = N->getOperand(0);
4756
SDValue MemAddr = N->getOperand(2);
4757
bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4758
4759
bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4760
unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4761
: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4762
4763
// arm_ldrexd returns a i64 value in {i32, i32}
4764
std::vector<EVT> ResTys;
4765
if (isThumb) {
4766
ResTys.push_back(MVT::i32);
4767
ResTys.push_back(MVT::i32);
4768
} else
4769
ResTys.push_back(MVT::Untyped);
4770
ResTys.push_back(MVT::Other);
4771
4772
// Place arguments in the right order.
4773
SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4774
CurDAG->getRegister(0, MVT::i32), Chain};
4775
SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4776
// Transfer memoperands.
4777
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4778
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4779
4780
// Remap uses.
4781
SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4782
if (!SDValue(N, 0).use_empty()) {
4783
SDValue Result;
4784
if (isThumb)
4785
Result = SDValue(Ld, 0);
4786
else {
4787
SDValue SubRegIdx =
4788
CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4789
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4790
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4791
Result = SDValue(ResNode,0);
4792
}
4793
ReplaceUses(SDValue(N, 0), Result);
4794
}
4795
if (!SDValue(N, 1).use_empty()) {
4796
SDValue Result;
4797
if (isThumb)
4798
Result = SDValue(Ld, 1);
4799
else {
4800
SDValue SubRegIdx =
4801
CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4802
SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4803
dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4804
Result = SDValue(ResNode,0);
4805
}
4806
ReplaceUses(SDValue(N, 1), Result);
4807
}
4808
ReplaceUses(SDValue(N, 2), OutChain);
4809
CurDAG->RemoveDeadNode(N);
4810
return;
4811
}
4812
case Intrinsic::arm_stlexd:
4813
case Intrinsic::arm_strexd: {
4814
SDLoc dl(N);
4815
SDValue Chain = N->getOperand(0);
4816
SDValue Val0 = N->getOperand(2);
4817
SDValue Val1 = N->getOperand(3);
4818
SDValue MemAddr = N->getOperand(4);
4819
4820
// Store exclusive double return a i32 value which is the return status
4821
// of the issued store.
4822
const EVT ResTys[] = {MVT::i32, MVT::Other};
4823
4824
bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4825
// Place arguments in the right order.
4826
SmallVector<SDValue, 7> Ops;
4827
if (isThumb) {
4828
Ops.push_back(Val0);
4829
Ops.push_back(Val1);
4830
} else
4831
// arm_strexd uses GPRPair.
4832
Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4833
Ops.push_back(MemAddr);
4834
Ops.push_back(getAL(CurDAG, dl));
4835
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4836
Ops.push_back(Chain);
4837
4838
bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4839
unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4840
: (IsRelease ? ARM::STLEXD : ARM::STREXD);
4841
4842
SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4843
// Transfer memoperands.
4844
MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4845
CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4846
4847
ReplaceNode(N, St);
4848
return;
4849
}
4850
4851
case Intrinsic::arm_neon_vld1: {
4852
static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4853
ARM::VLD1d32, ARM::VLD1d64 };
4854
static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4855
ARM::VLD1q32, ARM::VLD1q64};
4856
SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4857
return;
4858
}
4859
4860
case Intrinsic::arm_neon_vld1x2: {
4861
static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4862
ARM::VLD1q32, ARM::VLD1q64 };
4863
static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4864
ARM::VLD1d16QPseudo,
4865
ARM::VLD1d32QPseudo,
4866
ARM::VLD1d64QPseudo };
4867
SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4868
return;
4869
}
4870
4871
case Intrinsic::arm_neon_vld1x3: {
4872
static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4873
ARM::VLD1d16TPseudo,
4874
ARM::VLD1d32TPseudo,
4875
ARM::VLD1d64TPseudo };
4876
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4877
ARM::VLD1q16LowTPseudo_UPD,
4878
ARM::VLD1q32LowTPseudo_UPD,
4879
ARM::VLD1q64LowTPseudo_UPD };
4880
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4881
ARM::VLD1q16HighTPseudo,
4882
ARM::VLD1q32HighTPseudo,
4883
ARM::VLD1q64HighTPseudo };
4884
SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4885
return;
4886
}
4887
4888
case Intrinsic::arm_neon_vld1x4: {
4889
static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4890
ARM::VLD1d16QPseudo,
4891
ARM::VLD1d32QPseudo,
4892
ARM::VLD1d64QPseudo };
4893
static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4894
ARM::VLD1q16LowQPseudo_UPD,
4895
ARM::VLD1q32LowQPseudo_UPD,
4896
ARM::VLD1q64LowQPseudo_UPD };
4897
static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4898
ARM::VLD1q16HighQPseudo,
4899
ARM::VLD1q32HighQPseudo,
4900
ARM::VLD1q64HighQPseudo };
4901
SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4902
return;
4903
}
4904
4905
case Intrinsic::arm_neon_vld2: {
4906
static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4907
ARM::VLD2d32, ARM::VLD1q64 };
4908
static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4909
ARM::VLD2q32Pseudo };
4910
SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4911
return;
4912
}
4913
4914
case Intrinsic::arm_neon_vld3: {
4915
static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4916
ARM::VLD3d16Pseudo,
4917
ARM::VLD3d32Pseudo,
4918
ARM::VLD1d64TPseudo };
4919
static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4920
ARM::VLD3q16Pseudo_UPD,
4921
ARM::VLD3q32Pseudo_UPD };
4922
static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4923
ARM::VLD3q16oddPseudo,
4924
ARM::VLD3q32oddPseudo };
4925
SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4926
return;
4927
}
4928
4929
case Intrinsic::arm_neon_vld4: {
4930
static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4931
ARM::VLD4d16Pseudo,
4932
ARM::VLD4d32Pseudo,
4933
ARM::VLD1d64QPseudo };
4934
static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4935
ARM::VLD4q16Pseudo_UPD,
4936
ARM::VLD4q32Pseudo_UPD };
4937
static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4938
ARM::VLD4q16oddPseudo,
4939
ARM::VLD4q32oddPseudo };
4940
SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4941
return;
4942
}
4943
4944
case Intrinsic::arm_neon_vld2dup: {
4945
static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4946
ARM::VLD2DUPd32, ARM::VLD1q64 };
4947
static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4948
ARM::VLD2DUPq16EvenPseudo,
4949
ARM::VLD2DUPq32EvenPseudo };
4950
static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4951
ARM::VLD2DUPq16OddPseudo,
4952
ARM::VLD2DUPq32OddPseudo };
4953
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4954
DOpcodes, QOpcodes0, QOpcodes1);
4955
return;
4956
}
4957
4958
case Intrinsic::arm_neon_vld3dup: {
4959
static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4960
ARM::VLD3DUPd16Pseudo,
4961
ARM::VLD3DUPd32Pseudo,
4962
ARM::VLD1d64TPseudo };
4963
static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4964
ARM::VLD3DUPq16EvenPseudo,
4965
ARM::VLD3DUPq32EvenPseudo };
4966
static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4967
ARM::VLD3DUPq16OddPseudo,
4968
ARM::VLD3DUPq32OddPseudo };
4969
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4970
DOpcodes, QOpcodes0, QOpcodes1);
4971
return;
4972
}
4973
4974
case Intrinsic::arm_neon_vld4dup: {
4975
static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4976
ARM::VLD4DUPd16Pseudo,
4977
ARM::VLD4DUPd32Pseudo,
4978
ARM::VLD1d64QPseudo };
4979
static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4980
ARM::VLD4DUPq16EvenPseudo,
4981
ARM::VLD4DUPq32EvenPseudo };
4982
static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4983
ARM::VLD4DUPq16OddPseudo,
4984
ARM::VLD4DUPq32OddPseudo };
4985
SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4986
DOpcodes, QOpcodes0, QOpcodes1);
4987
return;
4988
}
4989
4990
case Intrinsic::arm_neon_vld2lane: {
4991
static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4992
ARM::VLD2LNd16Pseudo,
4993
ARM::VLD2LNd32Pseudo };
4994
static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4995
ARM::VLD2LNq32Pseudo };
4996
SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4997
return;
4998
}
4999
5000
case Intrinsic::arm_neon_vld3lane: {
5001
static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
5002
ARM::VLD3LNd16Pseudo,
5003
ARM::VLD3LNd32Pseudo };
5004
static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
5005
ARM::VLD3LNq32Pseudo };
5006
SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
5007
return;
5008
}
5009
5010
case Intrinsic::arm_neon_vld4lane: {
5011
static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
5012
ARM::VLD4LNd16Pseudo,
5013
ARM::VLD4LNd32Pseudo };
5014
static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
5015
ARM::VLD4LNq32Pseudo };
5016
SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5017
return;
5018
}
5019
5020
case Intrinsic::arm_neon_vst1: {
5021
static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5022
ARM::VST1d32, ARM::VST1d64 };
5023
static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5024
ARM::VST1q32, ARM::VST1q64 };
5025
SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5026
return;
5027
}
5028
5029
case Intrinsic::arm_neon_vst1x2: {
5030
static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5031
ARM::VST1q32, ARM::VST1q64 };
5032
static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5033
ARM::VST1d16QPseudo,
5034
ARM::VST1d32QPseudo,
5035
ARM::VST1d64QPseudo };
5036
SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5037
return;
5038
}
5039
5040
case Intrinsic::arm_neon_vst1x3: {
5041
static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5042
ARM::VST1d16TPseudo,
5043
ARM::VST1d32TPseudo,
5044
ARM::VST1d64TPseudo };
5045
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5046
ARM::VST1q16LowTPseudo_UPD,
5047
ARM::VST1q32LowTPseudo_UPD,
5048
ARM::VST1q64LowTPseudo_UPD };
5049
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5050
ARM::VST1q16HighTPseudo,
5051
ARM::VST1q32HighTPseudo,
5052
ARM::VST1q64HighTPseudo };
5053
SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5054
return;
5055
}
5056
5057
case Intrinsic::arm_neon_vst1x4: {
5058
static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5059
ARM::VST1d16QPseudo,
5060
ARM::VST1d32QPseudo,
5061
ARM::VST1d64QPseudo };
5062
static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5063
ARM::VST1q16LowQPseudo_UPD,
5064
ARM::VST1q32LowQPseudo_UPD,
5065
ARM::VST1q64LowQPseudo_UPD };
5066
static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5067
ARM::VST1q16HighQPseudo,
5068
ARM::VST1q32HighQPseudo,
5069
ARM::VST1q64HighQPseudo };
5070
SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5071
return;
5072
}
5073
5074
case Intrinsic::arm_neon_vst2: {
5075
static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5076
ARM::VST2d32, ARM::VST1q64 };
5077
static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5078
ARM::VST2q32Pseudo };
5079
SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5080
return;
5081
}
5082
5083
case Intrinsic::arm_neon_vst3: {
5084
static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5085
ARM::VST3d16Pseudo,
5086
ARM::VST3d32Pseudo,
5087
ARM::VST1d64TPseudo };
5088
static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5089
ARM::VST3q16Pseudo_UPD,
5090
ARM::VST3q32Pseudo_UPD };
5091
static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5092
ARM::VST3q16oddPseudo,
5093
ARM::VST3q32oddPseudo };
5094
SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5095
return;
5096
}
5097
5098
case Intrinsic::arm_neon_vst4: {
5099
static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5100
ARM::VST4d16Pseudo,
5101
ARM::VST4d32Pseudo,
5102
ARM::VST1d64QPseudo };
5103
static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5104
ARM::VST4q16Pseudo_UPD,
5105
ARM::VST4q32Pseudo_UPD };
5106
static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5107
ARM::VST4q16oddPseudo,
5108
ARM::VST4q32oddPseudo };
5109
SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5110
return;
5111
}
5112
5113
case Intrinsic::arm_neon_vst2lane: {
5114
static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5115
ARM::VST2LNd16Pseudo,
5116
ARM::VST2LNd32Pseudo };
5117
static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5118
ARM::VST2LNq32Pseudo };
5119
SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5120
return;
5121
}
5122
5123
case Intrinsic::arm_neon_vst3lane: {
5124
static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5125
ARM::VST3LNd16Pseudo,
5126
ARM::VST3LNd32Pseudo };
5127
static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5128
ARM::VST3LNq32Pseudo };
5129
SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5130
return;
5131
}
5132
5133
case Intrinsic::arm_neon_vst4lane: {
5134
static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5135
ARM::VST4LNd16Pseudo,
5136
ARM::VST4LNd32Pseudo };
5137
static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5138
ARM::VST4LNq32Pseudo };
5139
SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5140
return;
5141
}
5142
5143
case Intrinsic::arm_mve_vldr_gather_base_wb:
5144
case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5145
static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5146
ARM::MVE_VLDRDU64_qi_pre};
5147
SelectMVE_WB(N, Opcodes,
5148
IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5149
return;
5150
}
5151
5152
case Intrinsic::arm_mve_vld2q: {
5153
static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5154
static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5155
ARM::MVE_VLD21_16};
5156
static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5157
ARM::MVE_VLD21_32};
5158
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5159
SelectMVE_VLD(N, 2, Opcodes, false);
5160
return;
5161
}
5162
5163
case Intrinsic::arm_mve_vld4q: {
5164
static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5165
ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5166
static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5167
ARM::MVE_VLD42_16,
5168
ARM::MVE_VLD43_16};
5169
static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5170
ARM::MVE_VLD42_32,
5171
ARM::MVE_VLD43_32};
5172
static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5173
SelectMVE_VLD(N, 4, Opcodes, false);
5174
return;
5175
}
5176
}
5177
break;
5178
}
5179
5180
case ISD::INTRINSIC_WO_CHAIN: {
5181
unsigned IntNo = N->getConstantOperandVal(0);
5182
switch (IntNo) {
5183
default:
5184
break;
5185
5186
// Scalar f32 -> bf16
5187
case Intrinsic::arm_neon_vcvtbfp2bf: {
5188
SDLoc dl(N);
5189
const SDValue &Src = N->getOperand(1);
5190
llvm::EVT DestTy = N->getValueType(0);
5191
SDValue Pred = getAL(CurDAG, dl);
5192
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5193
SDValue Ops[] = { Src, Src, Pred, Reg0 };
5194
CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5195
return;
5196
}
5197
5198
// Vector v4f32 -> v4bf16
5199
case Intrinsic::arm_neon_vcvtfp2bf: {
5200
SDLoc dl(N);
5201
const SDValue &Src = N->getOperand(1);
5202
SDValue Pred = getAL(CurDAG, dl);
5203
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5204
SDValue Ops[] = { Src, Pred, Reg0 };
5205
CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5206
return;
5207
}
5208
5209
case Intrinsic::arm_mve_urshrl:
5210
SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5211
return;
5212
case Intrinsic::arm_mve_uqshll:
5213
SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5214
return;
5215
case Intrinsic::arm_mve_srshrl:
5216
SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5217
return;
5218
case Intrinsic::arm_mve_sqshll:
5219
SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5220
return;
5221
case Intrinsic::arm_mve_uqrshll:
5222
SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5223
return;
5224
case Intrinsic::arm_mve_sqrshrl:
5225
SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5226
return;
5227
5228
case Intrinsic::arm_mve_vadc:
5229
case Intrinsic::arm_mve_vadc_predicated:
5230
SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5231
IntNo == Intrinsic::arm_mve_vadc_predicated);
5232
return;
5233
case Intrinsic::arm_mve_vsbc:
5234
case Intrinsic::arm_mve_vsbc_predicated:
5235
SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,
5236
IntNo == Intrinsic::arm_mve_vsbc_predicated);
5237
return;
5238
case Intrinsic::arm_mve_vshlc:
5239
case Intrinsic::arm_mve_vshlc_predicated:
5240
SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5241
return;
5242
5243
case Intrinsic::arm_mve_vmlldava:
5244
case Intrinsic::arm_mve_vmlldava_predicated: {
5245
static const uint16_t OpcodesU[] = {
5246
ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,
5247
ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,
5248
};
5249
static const uint16_t OpcodesS[] = {
5250
ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,
5251
ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,
5252
ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,
5253
ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5254
ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,
5255
ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,
5256
ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,
5257
ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5258
};
5259
SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5260
OpcodesS, OpcodesU);
5261
return;
5262
}
5263
5264
case Intrinsic::arm_mve_vrmlldavha:
5265
case Intrinsic::arm_mve_vrmlldavha_predicated: {
5266
static const uint16_t OpcodesU[] = {
5267
ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,
5268
};
5269
static const uint16_t OpcodesS[] = {
5270
ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,
5271
ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5272
ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,
5273
ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5274
};
5275
SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5276
OpcodesS, OpcodesU);
5277
return;
5278
}
5279
5280
case Intrinsic::arm_mve_vidup:
5281
case Intrinsic::arm_mve_vidup_predicated: {
5282
static const uint16_t Opcodes[] = {
5283
ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5284
};
5285
SelectMVE_VxDUP(N, Opcodes, false,
5286
IntNo == Intrinsic::arm_mve_vidup_predicated);
5287
return;
5288
}
5289
5290
case Intrinsic::arm_mve_vddup:
5291
case Intrinsic::arm_mve_vddup_predicated: {
5292
static const uint16_t Opcodes[] = {
5293
ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5294
};
5295
SelectMVE_VxDUP(N, Opcodes, false,
5296
IntNo == Intrinsic::arm_mve_vddup_predicated);
5297
return;
5298
}
5299
5300
case Intrinsic::arm_mve_viwdup:
5301
case Intrinsic::arm_mve_viwdup_predicated: {
5302
static const uint16_t Opcodes[] = {
5303
ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5304
};
5305
SelectMVE_VxDUP(N, Opcodes, true,
5306
IntNo == Intrinsic::arm_mve_viwdup_predicated);
5307
return;
5308
}
5309
5310
case Intrinsic::arm_mve_vdwdup:
5311
case Intrinsic::arm_mve_vdwdup_predicated: {
5312
static const uint16_t Opcodes[] = {
5313
ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5314
};
5315
SelectMVE_VxDUP(N, Opcodes, true,
5316
IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5317
return;
5318
}
5319
5320
case Intrinsic::arm_cde_cx1d:
5321
case Intrinsic::arm_cde_cx1da:
5322
case Intrinsic::arm_cde_cx2d:
5323
case Intrinsic::arm_cde_cx2da:
5324
case Intrinsic::arm_cde_cx3d:
5325
case Intrinsic::arm_cde_cx3da: {
5326
bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5327
IntNo == Intrinsic::arm_cde_cx2da ||
5328
IntNo == Intrinsic::arm_cde_cx3da;
5329
size_t NumExtraOps;
5330
uint16_t Opcode;
5331
switch (IntNo) {
5332
case Intrinsic::arm_cde_cx1d:
5333
case Intrinsic::arm_cde_cx1da:
5334
NumExtraOps = 0;
5335
Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5336
break;
5337
case Intrinsic::arm_cde_cx2d:
5338
case Intrinsic::arm_cde_cx2da:
5339
NumExtraOps = 1;
5340
Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5341
break;
5342
case Intrinsic::arm_cde_cx3d:
5343
case Intrinsic::arm_cde_cx3da:
5344
NumExtraOps = 2;
5345
Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5346
break;
5347
default:
5348
llvm_unreachable("Unexpected opcode");
5349
}
5350
SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5351
return;
5352
}
5353
}
5354
break;
5355
}
5356
5357
case ISD::ATOMIC_CMP_SWAP:
5358
SelectCMP_SWAP(N);
5359
return;
5360
}
5361
5362
SelectCode(N);
5363
}
5364
5365
// Inspect a register string of the form
5366
// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5367
// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5368
// and obtain the integer operands from them, adding these operands to the
5369
// provided vector.
5370
static void getIntOperandsFromRegisterString(StringRef RegString,
5371
SelectionDAG *CurDAG,
5372
const SDLoc &DL,
5373
std::vector<SDValue> &Ops) {
5374
SmallVector<StringRef, 5> Fields;
5375
RegString.split(Fields, ':');
5376
5377
if (Fields.size() > 1) {
5378
bool AllIntFields = true;
5379
5380
for (StringRef Field : Fields) {
5381
// Need to trim out leading 'cp' characters and get the integer field.
5382
unsigned IntField;
5383
AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5384
Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5385
}
5386
5387
assert(AllIntFields &&
5388
"Unexpected non-integer value in special register string.");
5389
(void)AllIntFields;
5390
}
5391
}
5392
5393
// Maps a Banked Register string to its mask value. The mask value returned is
5394
// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5395
// mask operand, which expresses which register is to be used, e.g. r8, and in
5396
// which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5397
// was invalid.
5398
static inline int getBankedRegisterMask(StringRef RegString) {
5399
auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5400
if (!TheReg)
5401
return -1;
5402
return TheReg->Encoding;
5403
}
5404
5405
// The flags here are common to those allowed for apsr in the A class cores and
5406
// those allowed for the special registers in the M class cores. Returns a
5407
// value representing which flags were present, -1 if invalid.
5408
static inline int getMClassFlagsMask(StringRef Flags) {
5409
return StringSwitch<int>(Flags)
5410
.Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5411
// correct when flags are not permitted
5412
.Case("g", 0x1)
5413
.Case("nzcvq", 0x2)
5414
.Case("nzcvqg", 0x3)
5415
.Default(-1);
5416
}
5417
5418
// Maps MClass special registers string to its value for use in the
5419
// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5420
// Returns -1 to signify that the string was invalid.
5421
static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5422
auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5423
const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5424
if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5425
return -1;
5426
return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5427
}
5428
5429
static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5430
// The mask operand contains the special register (R Bit) in bit 4, whether
5431
// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5432
// bits 3-0 contains the fields to be accessed in the special register, set by
5433
// the flags provided with the register.
5434
int Mask = 0;
5435
if (Reg == "apsr") {
5436
// The flags permitted for apsr are the same flags that are allowed in
5437
// M class registers. We get the flag value and then shift the flags into
5438
// the correct place to combine with the mask.
5439
Mask = getMClassFlagsMask(Flags);
5440
if (Mask == -1)
5441
return -1;
5442
return Mask << 2;
5443
}
5444
5445
if (Reg != "cpsr" && Reg != "spsr") {
5446
return -1;
5447
}
5448
5449
// This is the same as if the flags were "fc"
5450
if (Flags.empty() || Flags == "all")
5451
return Mask | 0x9;
5452
5453
// Inspect the supplied flags string and set the bits in the mask for
5454
// the relevant and valid flags allowed for cpsr and spsr.
5455
for (char Flag : Flags) {
5456
int FlagVal;
5457
switch (Flag) {
5458
case 'c':
5459
FlagVal = 0x1;
5460
break;
5461
case 'x':
5462
FlagVal = 0x2;
5463
break;
5464
case 's':
5465
FlagVal = 0x4;
5466
break;
5467
case 'f':
5468
FlagVal = 0x8;
5469
break;
5470
default:
5471
FlagVal = 0;
5472
}
5473
5474
// This avoids allowing strings where the same flag bit appears twice.
5475
if (!FlagVal || (Mask & FlagVal))
5476
return -1;
5477
Mask |= FlagVal;
5478
}
5479
5480
// If the register is spsr then we need to set the R bit.
5481
if (Reg == "spsr")
5482
Mask |= 0x10;
5483
5484
return Mask;
5485
}
5486
5487
// Lower the read_register intrinsic to ARM specific DAG nodes
5488
// using the supplied metadata string to select the instruction node to use
5489
// and the registers/masks to construct as operands for the node.
5490
bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5491
const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5492
const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5493
bool IsThumb2 = Subtarget->isThumb2();
5494
SDLoc DL(N);
5495
5496
std::vector<SDValue> Ops;
5497
getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5498
5499
if (!Ops.empty()) {
5500
// If the special register string was constructed of fields (as defined
5501
// in the ACLE) then need to lower to MRC node (32 bit) or
5502
// MRRC node(64 bit), we can make the distinction based on the number of
5503
// operands we have.
5504
unsigned Opcode;
5505
SmallVector<EVT, 3> ResTypes;
5506
if (Ops.size() == 5){
5507
Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5508
ResTypes.append({ MVT::i32, MVT::Other });
5509
} else {
5510
assert(Ops.size() == 3 &&
5511
"Invalid number of fields in special register string.");
5512
Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5513
ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5514
}
5515
5516
Ops.push_back(getAL(CurDAG, DL));
5517
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5518
Ops.push_back(N->getOperand(0));
5519
ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5520
return true;
5521
}
5522
5523
std::string SpecialReg = RegString->getString().lower();
5524
5525
int BankedReg = getBankedRegisterMask(SpecialReg);
5526
if (BankedReg != -1) {
5527
Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5528
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5529
N->getOperand(0) };
5530
ReplaceNode(
5531
N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5532
DL, MVT::i32, MVT::Other, Ops));
5533
return true;
5534
}
5535
5536
// The VFP registers are read by creating SelectionDAG nodes with opcodes
5537
// corresponding to the register that is being read from. So we switch on the
5538
// string to find which opcode we need to use.
5539
unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5540
.Case("fpscr", ARM::VMRS)
5541
.Case("fpexc", ARM::VMRS_FPEXC)
5542
.Case("fpsid", ARM::VMRS_FPSID)
5543
.Case("mvfr0", ARM::VMRS_MVFR0)
5544
.Case("mvfr1", ARM::VMRS_MVFR1)
5545
.Case("mvfr2", ARM::VMRS_MVFR2)
5546
.Case("fpinst", ARM::VMRS_FPINST)
5547
.Case("fpinst2", ARM::VMRS_FPINST2)
5548
.Default(0);
5549
5550
// If an opcode was found then we can lower the read to a VFP instruction.
5551
if (Opcode) {
5552
if (!Subtarget->hasVFP2Base())
5553
return false;
5554
if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5555
return false;
5556
5557
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5558
N->getOperand(0) };
5559
ReplaceNode(N,
5560
CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5561
return true;
5562
}
5563
5564
// If the target is M Class then need to validate that the register string
5565
// is an acceptable value, so check that a mask can be constructed from the
5566
// string.
5567
if (Subtarget->isMClass()) {
5568
int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5569
if (SYSmValue == -1)
5570
return false;
5571
5572
SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5573
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5574
N->getOperand(0) };
5575
ReplaceNode(
5576
N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5577
return true;
5578
}
5579
5580
// Here we know the target is not M Class so we need to check if it is one
5581
// of the remaining possible values which are apsr, cpsr or spsr.
5582
if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5583
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5584
N->getOperand(0) };
5585
ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5586
DL, MVT::i32, MVT::Other, Ops));
5587
return true;
5588
}
5589
5590
if (SpecialReg == "spsr") {
5591
Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5592
N->getOperand(0) };
5593
ReplaceNode(
5594
N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5595
MVT::i32, MVT::Other, Ops));
5596
return true;
5597
}
5598
5599
return false;
5600
}
5601
5602
// Lower the write_register intrinsic to ARM specific DAG nodes
5603
// using the supplied metadata string to select the instruction node to use
5604
// and the registers/masks to use in the nodes
5605
bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5606
const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5607
const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5608
bool IsThumb2 = Subtarget->isThumb2();
5609
SDLoc DL(N);
5610
5611
std::vector<SDValue> Ops;
5612
getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5613
5614
if (!Ops.empty()) {
5615
// If the special register string was constructed of fields (as defined
5616
// in the ACLE) then need to lower to MCR node (32 bit) or
5617
// MCRR node(64 bit), we can make the distinction based on the number of
5618
// operands we have.
5619
unsigned Opcode;
5620
if (Ops.size() == 5) {
5621
Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5622
Ops.insert(Ops.begin()+2, N->getOperand(2));
5623
} else {
5624
assert(Ops.size() == 3 &&
5625
"Invalid number of fields in special register string.");
5626
Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5627
SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5628
Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5629
}
5630
5631
Ops.push_back(getAL(CurDAG, DL));
5632
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5633
Ops.push_back(N->getOperand(0));
5634
5635
ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5636
return true;
5637
}
5638
5639
std::string SpecialReg = RegString->getString().lower();
5640
int BankedReg = getBankedRegisterMask(SpecialReg);
5641
if (BankedReg != -1) {
5642
Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5643
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5644
N->getOperand(0) };
5645
ReplaceNode(
5646
N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5647
DL, MVT::Other, Ops));
5648
return true;
5649
}
5650
5651
// The VFP registers are written to by creating SelectionDAG nodes with
5652
// opcodes corresponding to the register that is being written. So we switch
5653
// on the string to find which opcode we need to use.
5654
unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5655
.Case("fpscr", ARM::VMSR)
5656
.Case("fpexc", ARM::VMSR_FPEXC)
5657
.Case("fpsid", ARM::VMSR_FPSID)
5658
.Case("fpinst", ARM::VMSR_FPINST)
5659
.Case("fpinst2", ARM::VMSR_FPINST2)
5660
.Default(0);
5661
5662
if (Opcode) {
5663
if (!Subtarget->hasVFP2Base())
5664
return false;
5665
Ops = { N->getOperand(2), getAL(CurDAG, DL),
5666
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5667
ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5668
return true;
5669
}
5670
5671
std::pair<StringRef, StringRef> Fields;
5672
Fields = StringRef(SpecialReg).rsplit('_');
5673
std::string Reg = Fields.first.str();
5674
StringRef Flags = Fields.second;
5675
5676
// If the target was M Class then need to validate the special register value
5677
// and retrieve the mask for use in the instruction node.
5678
if (Subtarget->isMClass()) {
5679
int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5680
if (SYSmValue == -1)
5681
return false;
5682
5683
SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5684
N->getOperand(2), getAL(CurDAG, DL),
5685
CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5686
ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5687
return true;
5688
}
5689
5690
// We then check to see if a valid mask can be constructed for one of the
5691
// register string values permitted for the A and R class cores. These values
5692
// are apsr, spsr and cpsr; these are also valid on older cores.
5693
int Mask = getARClassRegisterMask(Reg, Flags);
5694
if (Mask != -1) {
5695
Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5696
getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5697
N->getOperand(0) };
5698
ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5699
DL, MVT::Other, Ops));
5700
return true;
5701
}
5702
5703
return false;
5704
}
5705
5706
bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5707
std::vector<SDValue> AsmNodeOperands;
5708
InlineAsm::Flag Flag;
5709
bool Changed = false;
5710
unsigned NumOps = N->getNumOperands();
5711
5712
// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5713
// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5714
// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5715
// respectively. Since there is no constraint to explicitly specify a
5716
// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5717
// the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5718
// them into a GPRPair.
5719
5720
SDLoc dl(N);
5721
SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5722
5723
SmallVector<bool, 8> OpChanged;
5724
// Glue node will be appended late.
5725
for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5726
SDValue op = N->getOperand(i);
5727
AsmNodeOperands.push_back(op);
5728
5729
if (i < InlineAsm::Op_FirstOperand)
5730
continue;
5731
5732
if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5733
Flag = InlineAsm::Flag(C->getZExtValue());
5734
else
5735
continue;
5736
5737
// Immediate operands to inline asm in the SelectionDAG are modeled with
5738
// two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5739
// the second is a constant with the value of the immediate. If we get here
5740
// and we have a Kind::Imm, skip the next operand, and continue.
5741
if (Flag.isImmKind()) {
5742
SDValue op = N->getOperand(++i);
5743
AsmNodeOperands.push_back(op);
5744
continue;
5745
}
5746
5747
const unsigned NumRegs = Flag.getNumOperandRegisters();
5748
if (NumRegs)
5749
OpChanged.push_back(false);
5750
5751
unsigned DefIdx = 0;
5752
bool IsTiedToChangedOp = false;
5753
// If it's a use that is tied with a previous def, it has no
5754
// reg class constraint.
5755
if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5756
IsTiedToChangedOp = OpChanged[DefIdx];
5757
5758
// Memory operands to inline asm in the SelectionDAG are modeled with two
5759
// operands: a constant of value InlineAsm::Kind::Mem followed by the input
5760
// operand. If we get here and we have a Kind::Mem, skip the next operand
5761
// (so it doesn't get misinterpreted), and continue. We do this here because
5762
// it's important to update the OpChanged array correctly before moving on.
5763
if (Flag.isMemKind()) {
5764
SDValue op = N->getOperand(++i);
5765
AsmNodeOperands.push_back(op);
5766
continue;
5767
}
5768
5769
if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5770
!Flag.isRegDefEarlyClobberKind())
5771
continue;
5772
5773
unsigned RC;
5774
const bool HasRC = Flag.hasRegClassConstraint(RC);
5775
if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5776
|| NumRegs != 2)
5777
continue;
5778
5779
assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5780
SDValue V0 = N->getOperand(i+1);
5781
SDValue V1 = N->getOperand(i+2);
5782
Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5783
Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5784
SDValue PairedReg;
5785
MachineRegisterInfo &MRI = MF->getRegInfo();
5786
5787
if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5788
// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5789
// the original GPRs.
5790
5791
Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5792
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5793
SDValue Chain = SDValue(N,0);
5794
5795
SDNode *GU = N->getGluedUser();
5796
SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5797
Chain.getValue(1));
5798
5799
// Extract values from a GPRPair reg and copy to the original GPR reg.
5800
SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5801
RegCopy);
5802
SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5803
RegCopy);
5804
SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5805
RegCopy.getValue(1));
5806
SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5807
5808
// Update the original glue user.
5809
std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5810
Ops.push_back(T1.getValue(1));
5811
CurDAG->UpdateNodeOperands(GU, Ops);
5812
} else {
5813
// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5814
// GPRPair and then pass the GPRPair to the inline asm.
5815
SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5816
5817
// As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5818
SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5819
Chain.getValue(1));
5820
SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5821
T0.getValue(1));
5822
SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5823
5824
// Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5825
// i32 VRs of inline asm with it.
5826
Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5827
PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5828
Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5829
5830
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5831
Glue = Chain.getValue(1);
5832
}
5833
5834
Changed = true;
5835
5836
if(PairedReg.getNode()) {
5837
OpChanged[OpChanged.size() -1 ] = true;
5838
Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5839
if (IsTiedToChangedOp)
5840
Flag.setMatchingOp(DefIdx);
5841
else
5842
Flag.setRegClass(ARM::GPRPairRegClassID);
5843
// Replace the current flag.
5844
AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5845
Flag, dl, MVT::i32);
5846
// Add the new register node and skip the original two GPRs.
5847
AsmNodeOperands.push_back(PairedReg);
5848
// Skip the next two GPRs.
5849
i += 2;
5850
}
5851
}
5852
5853
if (Glue.getNode())
5854
AsmNodeOperands.push_back(Glue);
5855
if (!Changed)
5856
return false;
5857
5858
SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5859
CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5860
New->setNodeId(-1);
5861
ReplaceNode(N, New.getNode());
5862
return true;
5863
}
5864
5865
bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5866
const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5867
std::vector<SDValue> &OutOps) {
5868
switch(ConstraintID) {
5869
default:
5870
llvm_unreachable("Unexpected asm memory constraint");
5871
case InlineAsm::ConstraintCode::m:
5872
case InlineAsm::ConstraintCode::o:
5873
case InlineAsm::ConstraintCode::Q:
5874
case InlineAsm::ConstraintCode::Um:
5875
case InlineAsm::ConstraintCode::Un:
5876
case InlineAsm::ConstraintCode::Uq:
5877
case InlineAsm::ConstraintCode::Us:
5878
case InlineAsm::ConstraintCode::Ut:
5879
case InlineAsm::ConstraintCode::Uv:
5880
case InlineAsm::ConstraintCode::Uy:
5881
// Require the address to be in a register. That is safe for all ARM
5882
// variants and it is hard to do anything much smarter without knowing
5883
// how the operand is used.
5884
OutOps.push_back(Op);
5885
return false;
5886
}
5887
return true;
5888
}
5889
5890
/// createARMISelDag - This pass converts a legalized DAG into a
5891
/// ARM-specific DAG, ready for instruction scheduling.
5892
///
5893
FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5894
CodeGenOptLevel OptLevel) {
5895
return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5896
}
5897
5898