Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
35294 views
1
//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the interfaces that VE uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "VEISelLowering.h"
15
#include "MCTargetDesc/VEMCExpr.h"
16
#include "VECustomDAG.h"
17
#include "VEInstrBuilder.h"
18
#include "VEMachineFunctionInfo.h"
19
#include "VERegisterInfo.h"
20
#include "VETargetMachine.h"
21
#include "llvm/ADT/StringSwitch.h"
22
#include "llvm/CodeGen/CallingConvLower.h"
23
#include "llvm/CodeGen/MachineFrameInfo.h"
24
#include "llvm/CodeGen/MachineFunction.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineJumpTableInfo.h"
27
#include "llvm/CodeGen/MachineModuleInfo.h"
28
#include "llvm/CodeGen/MachineRegisterInfo.h"
29
#include "llvm/CodeGen/SelectionDAG.h"
30
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31
#include "llvm/IR/DerivedTypes.h"
32
#include "llvm/IR/Function.h"
33
#include "llvm/IR/IRBuilder.h"
34
#include "llvm/IR/Module.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/KnownBits.h"
37
using namespace llvm;
38
39
#define DEBUG_TYPE "ve-lower"
40
41
//===----------------------------------------------------------------------===//
42
// Calling Convention Implementation
43
//===----------------------------------------------------------------------===//
44
45
#include "VEGenCallingConv.inc"
46
47
CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48
switch (CallConv) {
49
default:
50
return RetCC_VE_C;
51
case CallingConv::Fast:
52
return RetCC_VE_Fast;
53
}
54
}
55
56
CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57
if (IsVarArg)
58
return CC_VE2;
59
switch (CallConv) {
60
default:
61
return CC_VE_C;
62
case CallingConv::Fast:
63
return CC_VE_Fast;
64
}
65
}
66
67
bool VETargetLowering::CanLowerReturn(
68
CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69
const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70
CCAssignFn *RetCC = getReturnCC(CallConv);
71
SmallVector<CCValAssign, 16> RVLocs;
72
CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73
return CCInfo.CheckReturn(Outs, RetCC);
74
}
75
76
static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77
MVT::v256f32, MVT::v512f32, MVT::v256f64};
78
79
static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80
81
static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82
83
void VETargetLowering::initRegisterClasses() {
84
// Set up the register classes.
85
addRegisterClass(MVT::i32, &VE::I32RegClass);
86
addRegisterClass(MVT::i64, &VE::I64RegClass);
87
addRegisterClass(MVT::f32, &VE::F32RegClass);
88
addRegisterClass(MVT::f64, &VE::I64RegClass);
89
addRegisterClass(MVT::f128, &VE::F128RegClass);
90
91
if (Subtarget->enableVPU()) {
92
for (MVT VecVT : AllVectorVTs)
93
addRegisterClass(VecVT, &VE::V64RegClass);
94
addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95
addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96
}
97
}
98
99
void VETargetLowering::initSPUActions() {
100
const auto &TM = getTargetMachine();
101
/// Load & Store {
102
103
// VE doesn't have i1 sign extending load.
104
for (MVT VT : MVT::integer_valuetypes()) {
105
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108
setTruncStoreAction(VT, MVT::i1, Expand);
109
}
110
111
// VE doesn't have floating point extload/truncstore, so expand them.
112
for (MVT FPVT : MVT::fp_valuetypes()) {
113
for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114
setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115
setTruncStoreAction(FPVT, OtherFPVT, Expand);
116
}
117
}
118
119
// VE doesn't have fp128 load/store, so expand them in custom lower.
120
setOperationAction(ISD::LOAD, MVT::f128, Custom);
121
setOperationAction(ISD::STORE, MVT::f128, Custom);
122
123
/// } Load & Store
124
125
// Custom legalize address nodes into LO/HI parts.
126
MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127
setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128
setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129
setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130
setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131
setOperationAction(ISD::JumpTable, PtrVT, Custom);
132
133
/// VAARG handling {
134
setOperationAction(ISD::VASTART, MVT::Other, Custom);
135
// VAARG needs to be lowered to access with 8 bytes alignment.
136
setOperationAction(ISD::VAARG, MVT::Other, Custom);
137
// Use the default implementation.
138
setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139
setOperationAction(ISD::VAEND, MVT::Other, Expand);
140
/// } VAARG handling
141
142
/// Stack {
143
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145
146
// Use the default implementation.
147
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149
/// } Stack
150
151
/// Branch {
152
153
// VE doesn't have BRCOND
154
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155
156
// BR_JT is not implemented yet.
157
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158
159
/// } Branch
160
161
/// Int Ops {
162
for (MVT IntVT : {MVT::i32, MVT::i64}) {
163
// VE has no REM or DIVREM operations.
164
setOperationAction(ISD::UREM, IntVT, Expand);
165
setOperationAction(ISD::SREM, IntVT, Expand);
166
setOperationAction(ISD::SDIVREM, IntVT, Expand);
167
setOperationAction(ISD::UDIVREM, IntVT, Expand);
168
169
// VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170
setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171
setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172
setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173
174
// VE has no MULHU/S or U/SMUL_LOHI operations.
175
// TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176
setOperationAction(ISD::MULHU, IntVT, Expand);
177
setOperationAction(ISD::MULHS, IntVT, Expand);
178
setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179
setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180
181
// VE has no CTTZ, ROTL, ROTR operations.
182
setOperationAction(ISD::CTTZ, IntVT, Expand);
183
setOperationAction(ISD::ROTL, IntVT, Expand);
184
setOperationAction(ISD::ROTR, IntVT, Expand);
185
186
// VE has 64 bits instruction which works as i64 BSWAP operation. This
187
// instruction works fine as i32 BSWAP operation with an additional
188
// parameter. Use isel patterns to lower BSWAP.
189
setOperationAction(ISD::BSWAP, IntVT, Legal);
190
191
// VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192
// operations. Use isel patterns for i64, promote for i32.
193
LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194
setOperationAction(ISD::BITREVERSE, IntVT, Act);
195
setOperationAction(ISD::CTLZ, IntVT, Act);
196
setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197
setOperationAction(ISD::CTPOP, IntVT, Act);
198
199
// VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200
// Use isel patterns for i64, promote for i32.
201
setOperationAction(ISD::AND, IntVT, Act);
202
setOperationAction(ISD::OR, IntVT, Act);
203
setOperationAction(ISD::XOR, IntVT, Act);
204
205
// Legal smax and smin
206
setOperationAction(ISD::SMAX, IntVT, Legal);
207
setOperationAction(ISD::SMIN, IntVT, Legal);
208
}
209
/// } Int Ops
210
211
/// Conversion {
212
// VE doesn't have instructions for fp<->uint, so expand them by llvm
213
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
214
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
215
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
216
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217
218
// fp16 not supported
219
for (MVT FPVT : MVT::fp_valuetypes()) {
220
setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
221
setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222
}
223
/// } Conversion
224
225
/// Floating-point Ops {
226
/// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227
/// and fcmp.
228
229
// VE doesn't have following floating point operations.
230
for (MVT VT : MVT::fp_valuetypes()) {
231
setOperationAction(ISD::FNEG, VT, Expand);
232
setOperationAction(ISD::FREM, VT, Expand);
233
}
234
235
// VE doesn't have fdiv of f128.
236
setOperationAction(ISD::FDIV, MVT::f128, Expand);
237
238
for (MVT FPVT : {MVT::f32, MVT::f64}) {
239
// f32 and f64 uses ConstantFP. f128 uses ConstantPool.
240
setOperationAction(ISD::ConstantFP, FPVT, Legal);
241
}
242
/// } Floating-point Ops
243
244
/// Floating-point math functions {
245
246
// VE doesn't have following floating point math functions.
247
for (MVT VT : MVT::fp_valuetypes()) {
248
setOperationAction(ISD::FABS, VT, Expand);
249
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
250
setOperationAction(ISD::FCOS, VT, Expand);
251
setOperationAction(ISD::FMA, VT, Expand);
252
setOperationAction(ISD::FPOW, VT, Expand);
253
setOperationAction(ISD::FSIN, VT, Expand);
254
setOperationAction(ISD::FSQRT, VT, Expand);
255
}
256
257
// VE has single and double FMINNUM and FMAXNUM
258
for (MVT VT : {MVT::f32, MVT::f64}) {
259
setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
260
}
261
262
/// } Floating-point math functions
263
264
/// Atomic instructions {
265
266
setMaxAtomicSizeInBitsSupported(64);
267
setMinCmpXchgSizeInBits(32);
268
setSupportsUnalignedAtomics(false);
269
270
// Use custom inserter for ATOMIC_FENCE.
271
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272
273
// Other atomic instructions.
274
for (MVT VT : MVT::integer_valuetypes()) {
275
// Support i8/i16 atomic swap.
276
setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277
278
// FIXME: Support "atmam" instructions.
279
setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
280
setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
281
setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
282
setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283
284
// VE doesn't have follwing instructions.
285
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
286
setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
287
setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
288
setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
289
setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
290
setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
291
setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
292
setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
293
}
294
295
/// } Atomic instructions
296
297
/// SJLJ instructions {
298
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
299
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
300
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
301
if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
303
/// } SJLJ instructions
304
305
// Intrinsic instructions
306
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
307
}
308
309
void VETargetLowering::initVPUActions() {
310
for (MVT LegalMaskVT : AllMaskVTs)
311
setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
312
313
for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314
setOperationAction(Opc, MVT::v512i1, Custom);
315
316
for (MVT LegalVecVT : AllVectorVTs) {
317
setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
318
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
319
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
320
// Translate all vector instructions with legal element types to VVP_*
321
// nodes.
322
// TODO We will custom-widen into VVP_* nodes in the future. While we are
323
// buildling the infrastructure for this, we only do this for legal vector
324
// VTs.
325
#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326
setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328
setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329
setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
330
setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
331
#include "VVPNodes.def"
332
}
333
334
for (MVT LegalPackedVT : AllPackedVTs) {
335
setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
336
setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
337
}
338
339
// vNt32, vNt64 ops (legal element types)
340
for (MVT VT : MVT::vector_valuetypes()) {
341
MVT ElemVT = VT.getVectorElementType();
342
unsigned ElemBits = ElemVT.getScalarSizeInBits();
343
if (ElemBits != 32 && ElemBits != 64)
344
continue;
345
346
for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347
setOperationAction(MemOpc, VT, Custom);
348
349
const ISD::NodeType IntReductionOCs[] = {
350
ISD::VECREDUCE_ADD, ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND,
351
ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMIN,
352
ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353
354
for (unsigned IntRedOpc : IntReductionOCs)
355
setOperationAction(IntRedOpc, VT, Custom);
356
}
357
358
// v256i1 and v512i1 ops
359
for (MVT MaskVT : AllMaskVTs) {
360
// Custom lower mask ops
361
setOperationAction(ISD::STORE, MaskVT, Custom);
362
setOperationAction(ISD::LOAD, MaskVT, Custom);
363
}
364
}
365
366
SDValue
367
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368
bool IsVarArg,
369
const SmallVectorImpl<ISD::OutputArg> &Outs,
370
const SmallVectorImpl<SDValue> &OutVals,
371
const SDLoc &DL, SelectionDAG &DAG) const {
372
// CCValAssign - represent the assignment of the return value to locations.
373
SmallVector<CCValAssign, 16> RVLocs;
374
375
// CCState - Info about the registers and stack slot.
376
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377
*DAG.getContext());
378
379
// Analyze return values.
380
CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
381
382
SDValue Glue;
383
SmallVector<SDValue, 4> RetOps(1, Chain);
384
385
// Copy the result values into the output registers.
386
for (unsigned i = 0; i != RVLocs.size(); ++i) {
387
CCValAssign &VA = RVLocs[i];
388
assert(VA.isRegLoc() && "Can only return in registers!");
389
assert(!VA.needsCustom() && "Unexpected custom lowering");
390
SDValue OutVal = OutVals[i];
391
392
// Integer return values must be sign or zero extended by the callee.
393
switch (VA.getLocInfo()) {
394
case CCValAssign::Full:
395
break;
396
case CCValAssign::SExt:
397
OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
398
break;
399
case CCValAssign::ZExt:
400
OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
401
break;
402
case CCValAssign::AExt:
403
OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
404
break;
405
case CCValAssign::BCvt: {
406
// Convert a float return value to i64 with padding.
407
// 63 31 0
408
// +------+------+
409
// | float| 0 |
410
// +------+------+
411
assert(VA.getLocVT() == MVT::i64);
412
assert(VA.getValVT() == MVT::f32);
413
SDValue Undef = SDValue(
414
DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
415
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
416
OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
417
MVT::i64, Undef, OutVal, Sub_f32),
418
0);
419
break;
420
}
421
default:
422
llvm_unreachable("Unknown loc info!");
423
}
424
425
Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
426
427
// Guarantee that all emitted copies are stuck together with flags.
428
Glue = Chain.getValue(1);
429
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
430
}
431
432
RetOps[0] = Chain; // Update chain.
433
434
// Add the glue if we have it.
435
if (Glue.getNode())
436
RetOps.push_back(Glue);
437
438
return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
439
}
440
441
SDValue VETargetLowering::LowerFormalArguments(
442
SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445
MachineFunction &MF = DAG.getMachineFunction();
446
447
// Get the base offset of the incoming arguments stack space.
448
unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449
// Get the size of the preserved arguments area
450
unsigned ArgsPreserved = 64;
451
452
// Analyze arguments according to CC_VE.
453
SmallVector<CCValAssign, 16> ArgLocs;
454
CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455
*DAG.getContext());
456
// Allocate the preserved area first.
457
CCInfo.AllocateStack(ArgsPreserved, Align(8));
458
// We already allocated the preserved area, so the stack offset computed
459
// by CC_VE would be correct now.
460
CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
461
462
for (const CCValAssign &VA : ArgLocs) {
463
assert(!VA.needsCustom() && "Unexpected custom lowering");
464
if (VA.isRegLoc()) {
465
// This argument is passed in a register.
466
// All integer register arguments are promoted by the caller to i64.
467
468
// Create a virtual register for the promoted live-in value.
469
Register VReg =
470
MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
471
SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
472
473
// The caller promoted the argument, so insert an Assert?ext SDNode so we
474
// won't promote the value again in this function.
475
switch (VA.getLocInfo()) {
476
case CCValAssign::SExt:
477
Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
478
DAG.getValueType(VA.getValVT()));
479
break;
480
case CCValAssign::ZExt:
481
Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
482
DAG.getValueType(VA.getValVT()));
483
break;
484
case CCValAssign::BCvt: {
485
// Extract a float argument from i64 with padding.
486
// 63 31 0
487
// +------+------+
488
// | float| 0 |
489
// +------+------+
490
assert(VA.getLocVT() == MVT::i64);
491
assert(VA.getValVT() == MVT::f32);
492
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
493
Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
494
MVT::f32, Arg, Sub_f32),
495
0);
496
break;
497
}
498
default:
499
break;
500
}
501
502
// Truncate the register down to the argument type.
503
if (VA.isExtInLoc())
504
Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
505
506
InVals.push_back(Arg);
507
continue;
508
}
509
510
// The registers are exhausted. This argument was passed on the stack.
511
assert(VA.isMemLoc());
512
// The CC_VE_Full/Half functions compute stack offsets relative to the
513
// beginning of the arguments area at %fp + the size of reserved area.
514
unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
515
unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
516
517
// Adjust offset for a float argument by adding 4 since the argument is
518
// stored in 8 bytes buffer with offset like below. LLVM generates
519
// 4 bytes load instruction, so need to adjust offset here. This
520
// adjustment is required in only LowerFormalArguments. In LowerCall,
521
// a float argument is converted to i64 first, and stored as 8 bytes
522
// data, which is required by ABI, so no need for adjustment.
523
// 0 4
524
// +------+------+
525
// | empty| float|
526
// +------+------+
527
if (VA.getValVT() == MVT::f32)
528
Offset += 4;
529
530
int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
531
InVals.push_back(
532
DAG.getLoad(VA.getValVT(), DL, Chain,
533
DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
534
MachinePointerInfo::getFixedStack(MF, FI)));
535
}
536
537
if (!IsVarArg)
538
return Chain;
539
540
// This function takes variable arguments, some of which may have been passed
541
// in registers %s0-%s8.
542
//
543
// The va_start intrinsic needs to know the offset to the first variable
544
// argument.
545
// TODO: need to calculate offset correctly once we support f128.
546
unsigned ArgOffset = ArgLocs.size() * 8;
547
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
548
// Skip the reserved area at the top of stack.
549
FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
550
551
return Chain;
552
}
553
554
// FIXME? Maybe this could be a TableGen attribute on some registers and
555
// this table could be generated automatically from RegInfo.
556
Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
557
const MachineFunction &MF) const {
558
Register Reg = StringSwitch<Register>(RegName)
559
.Case("sp", VE::SX11) // Stack pointer
560
.Case("fp", VE::SX9) // Frame pointer
561
.Case("sl", VE::SX8) // Stack limit
562
.Case("lr", VE::SX10) // Link register
563
.Case("tp", VE::SX14) // Thread pointer
564
.Case("outer", VE::SX12) // Outer regiser
565
.Case("info", VE::SX17) // Info area register
566
.Case("got", VE::SX15) // Global offset table register
567
.Case("plt", VE::SX16) // Procedure linkage table register
568
.Default(0);
569
570
if (Reg)
571
return Reg;
572
573
report_fatal_error("Invalid register name global variable");
574
}
575
576
//===----------------------------------------------------------------------===//
577
// TargetLowering Implementation
578
//===----------------------------------------------------------------------===//
579
580
SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
581
SmallVectorImpl<SDValue> &InVals) const {
582
SelectionDAG &DAG = CLI.DAG;
583
SDLoc DL = CLI.DL;
584
SDValue Chain = CLI.Chain;
585
auto PtrVT = getPointerTy(DAG.getDataLayout());
586
587
// VE target does not yet support tail call optimization.
588
CLI.IsTailCall = false;
589
590
// Get the base offset of the outgoing arguments stack space.
591
unsigned ArgsBaseOffset = Subtarget->getRsaSize();
592
// Get the size of the preserved arguments area
593
unsigned ArgsPreserved = 8 * 8u;
594
595
// Analyze operands of the call, assigning locations to each operand.
596
SmallVector<CCValAssign, 16> ArgLocs;
597
CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
598
*DAG.getContext());
599
// Allocate the preserved area first.
600
CCInfo.AllocateStack(ArgsPreserved, Align(8));
601
// We already allocated the preserved area, so the stack offset computed
602
// by CC_VE would be correct now.
603
CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
604
605
// VE requires to use both register and stack for varargs or no-prototyped
606
// functions.
607
bool UseBoth = CLI.IsVarArg;
608
609
// Analyze operands again if it is required to store BOTH.
610
SmallVector<CCValAssign, 16> ArgLocs2;
611
CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
612
ArgLocs2, *DAG.getContext());
613
if (UseBoth)
614
CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
615
616
// Get the size of the outgoing arguments stack space requirement.
617
unsigned ArgsSize = CCInfo.getStackSize();
618
619
// Keep stack frames 16-byte aligned.
620
ArgsSize = alignTo(ArgsSize, 16);
621
622
// Adjust the stack pointer to make room for the arguments.
623
// FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
624
// with more than 6 arguments.
625
Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
626
627
// Collect the set of registers to pass to the function and their values.
628
// This will be emitted as a sequence of CopyToReg nodes glued to the call
629
// instruction.
630
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
631
632
// Collect chains from all the memory opeations that copy arguments to the
633
// stack. They must follow the stack pointer adjustment above and precede the
634
// call instruction itself.
635
SmallVector<SDValue, 8> MemOpChains;
636
637
// VE needs to get address of callee function in a register
638
// So, prepare to copy it to SX12 here.
639
640
// If the callee is a GlobalAddress node (quite common, every direct call is)
641
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
642
// Likewise ExternalSymbol -> TargetExternalSymbol.
643
SDValue Callee = CLI.Callee;
644
645
bool IsPICCall = isPositionIndependent();
646
647
// PC-relative references to external symbols should go through $stub.
648
// If so, we need to prepare GlobalBaseReg first.
649
const TargetMachine &TM = DAG.getTarget();
650
const GlobalValue *GV = nullptr;
651
auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
652
if (CalleeG)
653
GV = CalleeG->getGlobal();
654
bool Local = TM.shouldAssumeDSOLocal(GV);
655
bool UsePlt = !Local;
656
MachineFunction &MF = DAG.getMachineFunction();
657
658
// Turn GlobalAddress/ExternalSymbol node into a value node
659
// containing the address of them here.
660
if (CalleeG) {
661
if (IsPICCall) {
662
if (UsePlt)
663
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
664
Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
665
Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
666
} else {
667
Callee =
668
makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
669
}
670
} else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
671
if (IsPICCall) {
672
if (UsePlt)
673
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
674
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
675
Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
676
} else {
677
Callee =
678
makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
679
}
680
}
681
682
RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
683
684
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
685
CCValAssign &VA = ArgLocs[i];
686
SDValue Arg = CLI.OutVals[i];
687
688
// Promote the value if needed.
689
switch (VA.getLocInfo()) {
690
default:
691
llvm_unreachable("Unknown location info!");
692
case CCValAssign::Full:
693
break;
694
case CCValAssign::SExt:
695
Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
696
break;
697
case CCValAssign::ZExt:
698
Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
699
break;
700
case CCValAssign::AExt:
701
Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
702
break;
703
case CCValAssign::BCvt: {
704
// Convert a float argument to i64 with padding.
705
// 63 31 0
706
// +------+------+
707
// | float| 0 |
708
// +------+------+
709
assert(VA.getLocVT() == MVT::i64);
710
assert(VA.getValVT() == MVT::f32);
711
SDValue Undef = SDValue(
712
DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
713
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
714
Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
715
MVT::i64, Undef, Arg, Sub_f32),
716
0);
717
break;
718
}
719
}
720
721
if (VA.isRegLoc()) {
722
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
723
if (!UseBoth)
724
continue;
725
VA = ArgLocs2[i];
726
}
727
728
assert(VA.isMemLoc());
729
730
// Create a store off the stack pointer for this argument.
731
SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
732
// The argument area starts at %fp/%sp + the size of reserved area.
733
SDValue PtrOff =
734
DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
735
PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
736
MemOpChains.push_back(
737
DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
738
}
739
740
// Emit all stores, make sure they occur before the call.
741
if (!MemOpChains.empty())
742
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
743
744
// Build a sequence of CopyToReg nodes glued together with token chain and
745
// glue operands which copy the outgoing args into registers. The InGlue is
746
// necessary since all emitted instructions must be stuck together in order
747
// to pass the live physical registers.
748
SDValue InGlue;
749
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
750
Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
751
RegsToPass[i].second, InGlue);
752
InGlue = Chain.getValue(1);
753
}
754
755
// Build the operands for the call instruction itself.
756
SmallVector<SDValue, 8> Ops;
757
Ops.push_back(Chain);
758
for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
759
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
760
RegsToPass[i].second.getValueType()));
761
762
// Add a register mask operand representing the call-preserved registers.
763
const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
764
const uint32_t *Mask =
765
TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
766
assert(Mask && "Missing call preserved mask for calling convention");
767
Ops.push_back(DAG.getRegisterMask(Mask));
768
769
// Make sure the CopyToReg nodes are glued to the call instruction which
770
// consumes the registers.
771
if (InGlue.getNode())
772
Ops.push_back(InGlue);
773
774
// Now the call itself.
775
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
776
Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
777
InGlue = Chain.getValue(1);
778
779
// Revert the stack pointer immediately after the call.
780
Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
781
InGlue = Chain.getValue(1);
782
783
// Now extract the return values. This is more or less the same as
784
// LowerFormalArguments.
785
786
// Assign locations to each value returned by this call.
787
SmallVector<CCValAssign, 16> RVLocs;
788
CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
789
*DAG.getContext());
790
791
// Set inreg flag manually for codegen generated library calls that
792
// return float.
793
if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
794
CLI.Ins[0].Flags.setInReg();
795
796
RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
797
798
// Copy all of the result registers out of their specified physreg.
799
for (unsigned i = 0; i != RVLocs.size(); ++i) {
800
CCValAssign &VA = RVLocs[i];
801
assert(!VA.needsCustom() && "Unexpected custom lowering");
802
Register Reg = VA.getLocReg();
803
804
// When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
805
// reside in the same register in the high and low bits. Reuse the
806
// CopyFromReg previous node to avoid duplicate copies.
807
SDValue RV;
808
if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
809
if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
810
RV = Chain.getValue(0);
811
812
// But usually we'll create a new CopyFromReg for a different register.
813
if (!RV.getNode()) {
814
RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
815
Chain = RV.getValue(1);
816
InGlue = Chain.getValue(2);
817
}
818
819
// The callee promoted the return value, so insert an Assert?ext SDNode so
820
// we won't promote the value again in this function.
821
switch (VA.getLocInfo()) {
822
case CCValAssign::SExt:
823
RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
824
DAG.getValueType(VA.getValVT()));
825
break;
826
case CCValAssign::ZExt:
827
RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
828
DAG.getValueType(VA.getValVT()));
829
break;
830
case CCValAssign::BCvt: {
831
// Extract a float return value from i64 with padding.
832
// 63 31 0
833
// +------+------+
834
// | float| 0 |
835
// +------+------+
836
assert(VA.getLocVT() == MVT::i64);
837
assert(VA.getValVT() == MVT::f32);
838
SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
839
RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
840
MVT::f32, RV, Sub_f32),
841
0);
842
break;
843
}
844
default:
845
break;
846
}
847
848
// Truncate the register down to the return value type.
849
if (VA.isExtInLoc())
850
RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
851
852
InVals.push_back(RV);
853
}
854
855
return Chain;
856
}
857
858
bool VETargetLowering::isOffsetFoldingLegal(
859
const GlobalAddressSDNode *GA) const {
860
// VE uses 64 bit addressing, so we need multiple instructions to generate
861
// an address. Folding address with offset increases the number of
862
// instructions, so that we disable it here. Offsets will be folded in
863
// the DAG combine later if it worth to do so.
864
return false;
865
}
866
867
/// isFPImmLegal - Returns true if the target can instruction select the
868
/// specified FP immediate natively. If false, the legalizer will
869
/// materialize the FP immediate as a load from a constant pool.
870
bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
871
bool ForCodeSize) const {
872
return VT == MVT::f32 || VT == MVT::f64;
873
}
874
875
/// Determine if the target supports unaligned memory accesses.
876
///
877
/// This function returns true if the target allows unaligned memory accesses
878
/// of the specified type in the given address space. If true, it also returns
879
/// whether the unaligned memory access is "fast" in the last argument by
880
/// reference. This is used, for example, in situations where an array
881
/// copy/move/set is converted to a sequence of store operations. Its use
882
/// helps to ensure that such replacements don't generate code that causes an
883
/// alignment error (trap) on the target machine.
884
bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
885
unsigned AddrSpace,
886
Align A,
887
MachineMemOperand::Flags,
888
unsigned *Fast) const {
889
if (Fast) {
890
// It's fast anytime on VE
891
*Fast = 1;
892
}
893
return true;
894
}
895
896
VETargetLowering::VETargetLowering(const TargetMachine &TM,
897
const VESubtarget &STI)
898
: TargetLowering(TM), Subtarget(&STI) {
899
// Instructions which use registers as conditionals examine all the
900
// bits (as does the pseudo SELECT_CC expansion). I don't think it
901
// matters much whether it's ZeroOrOneBooleanContent, or
902
// ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
903
// former.
904
setBooleanContents(ZeroOrOneBooleanContent);
905
setBooleanVectorContents(ZeroOrOneBooleanContent);
906
907
initRegisterClasses();
908
initSPUActions();
909
initVPUActions();
910
911
setStackPointerRegisterToSaveRestore(VE::SX11);
912
913
// We have target-specific dag combine patterns for the following nodes:
914
setTargetDAGCombine(ISD::TRUNCATE);
915
setTargetDAGCombine(ISD::SELECT);
916
setTargetDAGCombine(ISD::SELECT_CC);
917
918
// Set function alignment to 16 bytes
919
setMinFunctionAlignment(Align(16));
920
921
// VE stores all argument by 8 bytes alignment
922
setMinStackArgumentAlignment(Align(8));
923
924
computeRegisterProperties(Subtarget->getRegisterInfo());
925
}
926
927
const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
928
#define TARGET_NODE_CASE(NAME) \
929
case VEISD::NAME: \
930
return "VEISD::" #NAME;
931
switch ((VEISD::NodeType)Opcode) {
932
case VEISD::FIRST_NUMBER:
933
break;
934
TARGET_NODE_CASE(CMPI)
935
TARGET_NODE_CASE(CMPU)
936
TARGET_NODE_CASE(CMPF)
937
TARGET_NODE_CASE(CMPQ)
938
TARGET_NODE_CASE(CMOV)
939
TARGET_NODE_CASE(CALL)
940
TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
941
TARGET_NODE_CASE(EH_SJLJ_SETJMP)
942
TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
943
TARGET_NODE_CASE(GETFUNPLT)
944
TARGET_NODE_CASE(GETSTACKTOP)
945
TARGET_NODE_CASE(GETTLSADDR)
946
TARGET_NODE_CASE(GLOBAL_BASE_REG)
947
TARGET_NODE_CASE(Hi)
948
TARGET_NODE_CASE(Lo)
949
TARGET_NODE_CASE(RET_GLUE)
950
TARGET_NODE_CASE(TS1AM)
951
TARGET_NODE_CASE(VEC_UNPACK_LO)
952
TARGET_NODE_CASE(VEC_UNPACK_HI)
953
TARGET_NODE_CASE(VEC_PACK)
954
TARGET_NODE_CASE(VEC_BROADCAST)
955
TARGET_NODE_CASE(REPL_I32)
956
TARGET_NODE_CASE(REPL_F32)
957
958
TARGET_NODE_CASE(LEGALAVL)
959
960
// Register the VVP_* SDNodes.
961
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
962
#include "VVPNodes.def"
963
}
964
#undef TARGET_NODE_CASE
965
return nullptr;
966
}
967
968
EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
969
EVT VT) const {
970
return MVT::i32;
971
}
972
973
// Convert to a target node and set target flags.
974
SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
975
SelectionDAG &DAG) const {
976
if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
977
return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
978
GA->getValueType(0), GA->getOffset(), TF);
979
980
if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
981
return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
982
0, TF);
983
984
if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
985
return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
986
CP->getAlign(), CP->getOffset(), TF);
987
988
if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
989
return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
990
TF);
991
992
if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
993
return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
994
995
llvm_unreachable("Unhandled address SDNode");
996
}
997
998
// Split Op into high and low parts according to HiTF and LoTF.
999
// Return an ADD node combining the parts.
1000
SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1001
SelectionDAG &DAG) const {
1002
SDLoc DL(Op);
1003
EVT VT = Op.getValueType();
1004
SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1005
SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1006
return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1007
}
1008
1009
// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1010
// or ExternalSymbol SDNode.
1011
SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1012
SDLoc DL(Op);
1013
EVT PtrVT = Op.getValueType();
1014
1015
// Handle PIC mode first. VE needs a got load for every variable!
1016
if (isPositionIndependent()) {
1017
auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1018
1019
if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1020
(GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1021
// Create following instructions for local linkage PIC code.
1022
// lea %reg, label@gotoff_lo
1023
// and %reg, %reg, (32)0
1024
// lea.sl %reg, label@gotoff_hi(%reg, %got)
1025
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1026
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1027
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1028
return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1029
}
1030
// Create following instructions for not local linkage PIC code.
1031
// lea %reg, label@got_lo
1032
// and %reg, %reg, (32)0
1033
// lea.sl %reg, label@got_hi(%reg)
1034
// ld %reg, (%reg, %got)
1035
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1036
VEMCExpr::VK_VE_GOT_LO32, DAG);
1037
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1038
SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1039
return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1040
MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1041
}
1042
1043
// This is one of the absolute code models.
1044
switch (getTargetMachine().getCodeModel()) {
1045
default:
1046
llvm_unreachable("Unsupported absolute code model");
1047
case CodeModel::Small:
1048
case CodeModel::Medium:
1049
case CodeModel::Large:
1050
// abs64.
1051
return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1052
}
1053
}
1054
1055
/// Custom Lower {
1056
1057
// The mappings for emitLeading/TrailingFence for VE is designed by following
1058
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1059
Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1060
Instruction *Inst,
1061
AtomicOrdering Ord) const {
1062
switch (Ord) {
1063
case AtomicOrdering::NotAtomic:
1064
case AtomicOrdering::Unordered:
1065
llvm_unreachable("Invalid fence: unordered/non-atomic");
1066
case AtomicOrdering::Monotonic:
1067
case AtomicOrdering::Acquire:
1068
return nullptr; // Nothing to do
1069
case AtomicOrdering::Release:
1070
case AtomicOrdering::AcquireRelease:
1071
return Builder.CreateFence(AtomicOrdering::Release);
1072
case AtomicOrdering::SequentiallyConsistent:
1073
if (!Inst->hasAtomicStore())
1074
return nullptr; // Nothing to do
1075
return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1076
}
1077
llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1078
}
1079
1080
Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1081
Instruction *Inst,
1082
AtomicOrdering Ord) const {
1083
switch (Ord) {
1084
case AtomicOrdering::NotAtomic:
1085
case AtomicOrdering::Unordered:
1086
llvm_unreachable("Invalid fence: unordered/not-atomic");
1087
case AtomicOrdering::Monotonic:
1088
case AtomicOrdering::Release:
1089
return nullptr; // Nothing to do
1090
case AtomicOrdering::Acquire:
1091
case AtomicOrdering::AcquireRelease:
1092
return Builder.CreateFence(AtomicOrdering::Acquire);
1093
case AtomicOrdering::SequentiallyConsistent:
1094
return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1095
}
1096
llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1097
}
1098
1099
SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1100
SelectionDAG &DAG) const {
1101
SDLoc DL(Op);
1102
AtomicOrdering FenceOrdering =
1103
static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1104
SyncScope::ID FenceSSID =
1105
static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1106
1107
// VE uses Release consistency, so need a fence instruction if it is a
1108
// cross-thread fence.
1109
if (FenceSSID == SyncScope::System) {
1110
switch (FenceOrdering) {
1111
case AtomicOrdering::NotAtomic:
1112
case AtomicOrdering::Unordered:
1113
case AtomicOrdering::Monotonic:
1114
// No need to generate fencem instruction here.
1115
break;
1116
case AtomicOrdering::Acquire:
1117
// Generate "fencem 2" as acquire fence.
1118
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1119
DAG.getTargetConstant(2, DL, MVT::i32),
1120
Op.getOperand(0)),
1121
0);
1122
case AtomicOrdering::Release:
1123
// Generate "fencem 1" as release fence.
1124
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1125
DAG.getTargetConstant(1, DL, MVT::i32),
1126
Op.getOperand(0)),
1127
0);
1128
case AtomicOrdering::AcquireRelease:
1129
case AtomicOrdering::SequentiallyConsistent:
1130
// Generate "fencem 3" as acq_rel and seq_cst fence.
1131
// FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1132
// so seq_cst may require more instruction for them.
1133
return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1134
DAG.getTargetConstant(3, DL, MVT::i32),
1135
Op.getOperand(0)),
1136
0);
1137
}
1138
}
1139
1140
// MEMBARRIER is a compiler barrier; it codegens to a no-op.
1141
return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1142
}
1143
1144
TargetLowering::AtomicExpansionKind
1145
VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1146
// We have TS1AM implementation for i8/i16/i32/i64, so use it.
1147
if (AI->getOperation() == AtomicRMWInst::Xchg) {
1148
return AtomicExpansionKind::None;
1149
}
1150
// FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1151
1152
// Otherwise, expand it using compare and exchange instruction to not call
1153
// __sync_fetch_and_* functions.
1154
return AtomicExpansionKind::CmpXChg;
1155
}
1156
1157
static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1158
SDValue &Bits) {
1159
SDLoc DL(Op);
1160
AtomicSDNode *N = cast<AtomicSDNode>(Op);
1161
SDValue Ptr = N->getOperand(1);
1162
SDValue Val = N->getOperand(2);
1163
EVT PtrVT = Ptr.getValueType();
1164
bool Byte = N->getMemoryVT() == MVT::i8;
1165
// Remainder = AND Ptr, 3
1166
// Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1167
// Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1168
// Bits = Remainder << 3
1169
// NewVal = Val << Bits
1170
SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1171
SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1172
SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1173
: DAG.getConstant(3, DL, MVT::i32);
1174
Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1175
Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1176
return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1177
}
1178
1179
static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1180
SDValue Bits) {
1181
SDLoc DL(Op);
1182
EVT VT = Data.getValueType();
1183
bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1184
// NewData = Data >> Bits
1185
// Result = NewData & 0xff ; If Byte is true (1 byte)
1186
// Result = NewData & 0xffff ; If Byte is false (2 bytes)
1187
1188
SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1189
return DAG.getNode(ISD::AND, DL, VT,
1190
{NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1191
}
1192
1193
SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1194
SelectionDAG &DAG) const {
1195
SDLoc DL(Op);
1196
AtomicSDNode *N = cast<AtomicSDNode>(Op);
1197
1198
if (N->getMemoryVT() == MVT::i8) {
1199
// For i8, use "ts1am"
1200
// Input:
1201
// ATOMIC_SWAP Ptr, Val, Order
1202
//
1203
// Output:
1204
// Remainder = AND Ptr, 3
1205
// Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1206
// Bits = Remainder << 3
1207
// NewVal = Val << Bits
1208
//
1209
// Aligned = AND Ptr, -4
1210
// Data = TS1AM Aligned, Flag, NewVal
1211
//
1212
// NewData = Data >> Bits
1213
// Result = NewData & 0xff ; 1 byte result
1214
SDValue Flag;
1215
SDValue Bits;
1216
SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1217
1218
SDValue Ptr = N->getOperand(1);
1219
SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1220
{Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1221
SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1222
DAG.getVTList(Op.getNode()->getValueType(0),
1223
Op.getNode()->getValueType(1)),
1224
{N->getChain(), Aligned, Flag, NewVal},
1225
N->getMemOperand());
1226
1227
SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1228
SDValue Chain = TS1AM.getValue(1);
1229
return DAG.getMergeValues({Result, Chain}, DL);
1230
}
1231
if (N->getMemoryVT() == MVT::i16) {
1232
// For i16, use "ts1am"
1233
SDValue Flag;
1234
SDValue Bits;
1235
SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1236
1237
SDValue Ptr = N->getOperand(1);
1238
SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1239
{Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1240
SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1241
DAG.getVTList(Op.getNode()->getValueType(0),
1242
Op.getNode()->getValueType(1)),
1243
{N->getChain(), Aligned, Flag, NewVal},
1244
N->getMemOperand());
1245
1246
SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1247
SDValue Chain = TS1AM.getValue(1);
1248
return DAG.getMergeValues({Result, Chain}, DL);
1249
}
1250
// Otherwise, let llvm legalize it.
1251
return Op;
1252
}
1253
1254
SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1255
SelectionDAG &DAG) const {
1256
return makeAddress(Op, DAG);
1257
}
1258
1259
SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1260
SelectionDAG &DAG) const {
1261
return makeAddress(Op, DAG);
1262
}
1263
1264
SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1265
SelectionDAG &DAG) const {
1266
return makeAddress(Op, DAG);
1267
}
1268
1269
SDValue
1270
VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1271
SelectionDAG &DAG) const {
1272
SDLoc DL(Op);
1273
1274
// Generate the following code:
1275
// t1: ch,glue = callseq_start t0, 0, 0
1276
// t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1277
// t3: ch,glue = callseq_end t2, 0, 0, t2:2
1278
// t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1279
SDValue Label = withTargetFlags(Op, 0, DAG);
1280
EVT PtrVT = Op.getValueType();
1281
1282
// Lowering the machine isd will make sure everything is in the right
1283
// location.
1284
SDValue Chain = DAG.getEntryNode();
1285
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1286
const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1287
DAG.getMachineFunction(), CallingConv::C);
1288
Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1289
SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1290
Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1291
Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1292
Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1293
1294
// GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1295
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1296
MFI.setHasCalls(true);
1297
1298
// Also generate code to prepare a GOT register if it is PIC.
1299
if (isPositionIndependent()) {
1300
MachineFunction &MF = DAG.getMachineFunction();
1301
Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1302
}
1303
1304
return Chain;
1305
}
1306
1307
SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1308
SelectionDAG &DAG) const {
1309
// The current implementation of nld (2.26) doesn't allow local exec model
1310
// code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1311
// generate the general dynamic model code sequence.
1312
//
1313
// *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1314
return lowerToTLSGeneralDynamicModel(Op, DAG);
1315
}
1316
1317
SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1318
return makeAddress(Op, DAG);
1319
}
1320
1321
// Lower a f128 load into two f64 loads.
1322
static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1323
SDLoc DL(Op);
1324
LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1325
assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1326
Align Alignment = LdNode->getAlign();
1327
if (Alignment > 8)
1328
Alignment = Align(8);
1329
1330
SDValue Lo64 =
1331
DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1332
LdNode->getPointerInfo(), Alignment,
1333
LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1334
: MachineMemOperand::MONone);
1335
EVT AddrVT = LdNode->getBasePtr().getValueType();
1336
SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1337
DAG.getConstant(8, DL, AddrVT));
1338
SDValue Hi64 =
1339
DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1340
LdNode->getPointerInfo(), Alignment,
1341
LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1342
: MachineMemOperand::MONone);
1343
1344
SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1345
SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1346
1347
// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1348
SDNode *InFP128 =
1349
DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1350
InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1351
SDValue(InFP128, 0), Hi64, SubRegEven);
1352
InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1353
SDValue(InFP128, 0), Lo64, SubRegOdd);
1354
SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1355
SDValue(Hi64.getNode(), 1)};
1356
SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1357
SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1358
return DAG.getMergeValues(Ops, DL);
1359
}
1360
1361
// Lower a vXi1 load into following instructions
1362
// LDrii %1, (,%addr)
1363
// LVMxir %vm, 0, %1
1364
// LDrii %2, 8(,%addr)
1365
// LVMxir %vm, 0, %2
1366
// ...
1367
static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1368
SDLoc DL(Op);
1369
LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1370
assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1371
1372
SDValue BasePtr = LdNode->getBasePtr();
1373
Align Alignment = LdNode->getAlign();
1374
if (Alignment > 8)
1375
Alignment = Align(8);
1376
1377
EVT AddrVT = BasePtr.getValueType();
1378
EVT MemVT = LdNode->getMemoryVT();
1379
if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1380
SDValue OutChains[4];
1381
SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1382
for (int i = 0; i < 4; ++i) {
1383
// Generate load dag and prepare chains.
1384
SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1385
DAG.getConstant(8 * i, DL, AddrVT));
1386
SDValue Val =
1387
DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1388
LdNode->getPointerInfo(), Alignment,
1389
LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1390
: MachineMemOperand::MONone);
1391
OutChains[i] = SDValue(Val.getNode(), 1);
1392
1393
VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1394
DAG.getTargetConstant(i, DL, MVT::i64), Val,
1395
SDValue(VM, 0));
1396
}
1397
SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1398
SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1399
return DAG.getMergeValues(Ops, DL);
1400
} else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1401
SDValue OutChains[8];
1402
SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1403
for (int i = 0; i < 8; ++i) {
1404
// Generate load dag and prepare chains.
1405
SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1406
DAG.getConstant(8 * i, DL, AddrVT));
1407
SDValue Val =
1408
DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1409
LdNode->getPointerInfo(), Alignment,
1410
LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1411
: MachineMemOperand::MONone);
1412
OutChains[i] = SDValue(Val.getNode(), 1);
1413
1414
VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1415
DAG.getTargetConstant(i, DL, MVT::i64), Val,
1416
SDValue(VM, 0));
1417
}
1418
SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1419
SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1420
return DAG.getMergeValues(Ops, DL);
1421
} else {
1422
// Otherwise, ask llvm to expand it.
1423
return SDValue();
1424
}
1425
}
1426
1427
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1428
LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1429
EVT MemVT = LdNode->getMemoryVT();
1430
1431
// If VPU is enabled, always expand non-mask vector loads to VVP
1432
if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1433
return lowerToVVP(Op, DAG);
1434
1435
SDValue BasePtr = LdNode->getBasePtr();
1436
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1437
// Do not expand store instruction with frame index here because of
1438
// dependency problems. We expand it later in eliminateFrameIndex().
1439
return Op;
1440
}
1441
1442
if (MemVT == MVT::f128)
1443
return lowerLoadF128(Op, DAG);
1444
if (isMaskType(MemVT))
1445
return lowerLoadI1(Op, DAG);
1446
1447
return Op;
1448
}
1449
1450
// Lower a f128 store into two f64 stores.
1451
static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1452
SDLoc DL(Op);
1453
StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1454
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1455
1456
SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1457
SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1458
1459
SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1460
StNode->getValue(), SubRegEven);
1461
SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1462
StNode->getValue(), SubRegOdd);
1463
1464
Align Alignment = StNode->getAlign();
1465
if (Alignment > 8)
1466
Alignment = Align(8);
1467
1468
// VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1469
SDValue OutChains[2];
1470
OutChains[0] =
1471
DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1472
StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1473
StNode->isVolatile() ? MachineMemOperand::MOVolatile
1474
: MachineMemOperand::MONone);
1475
EVT AddrVT = StNode->getBasePtr().getValueType();
1476
SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1477
DAG.getConstant(8, DL, AddrVT));
1478
OutChains[1] =
1479
DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1480
MachinePointerInfo(), Alignment,
1481
StNode->isVolatile() ? MachineMemOperand::MOVolatile
1482
: MachineMemOperand::MONone);
1483
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1484
}
1485
1486
// Lower a vXi1 store into following instructions
1487
// SVMi %1, %vm, 0
1488
// STrii %1, (,%addr)
1489
// SVMi %2, %vm, 1
1490
// STrii %2, 8(,%addr)
1491
// ...
1492
static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1493
SDLoc DL(Op);
1494
StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1495
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1496
1497
SDValue BasePtr = StNode->getBasePtr();
1498
Align Alignment = StNode->getAlign();
1499
if (Alignment > 8)
1500
Alignment = Align(8);
1501
EVT AddrVT = BasePtr.getValueType();
1502
EVT MemVT = StNode->getMemoryVT();
1503
if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1504
SDValue OutChains[4];
1505
for (int i = 0; i < 4; ++i) {
1506
SDNode *V =
1507
DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1508
DAG.getTargetConstant(i, DL, MVT::i64));
1509
SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1510
DAG.getConstant(8 * i, DL, AddrVT));
1511
OutChains[i] =
1512
DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1513
MachinePointerInfo(), Alignment,
1514
StNode->isVolatile() ? MachineMemOperand::MOVolatile
1515
: MachineMemOperand::MONone);
1516
}
1517
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1518
} else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1519
SDValue OutChains[8];
1520
for (int i = 0; i < 8; ++i) {
1521
SDNode *V =
1522
DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1523
DAG.getTargetConstant(i, DL, MVT::i64));
1524
SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1525
DAG.getConstant(8 * i, DL, AddrVT));
1526
OutChains[i] =
1527
DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1528
MachinePointerInfo(), Alignment,
1529
StNode->isVolatile() ? MachineMemOperand::MOVolatile
1530
: MachineMemOperand::MONone);
1531
}
1532
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1533
} else {
1534
// Otherwise, ask llvm to expand it.
1535
return SDValue();
1536
}
1537
}
1538
1539
SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1540
StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1541
assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1542
EVT MemVT = StNode->getMemoryVT();
1543
1544
// If VPU is enabled, always expand non-mask vector stores to VVP
1545
if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1546
return lowerToVVP(Op, DAG);
1547
1548
SDValue BasePtr = StNode->getBasePtr();
1549
if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1550
// Do not expand store instruction with frame index here because of
1551
// dependency problems. We expand it later in eliminateFrameIndex().
1552
return Op;
1553
}
1554
1555
if (MemVT == MVT::f128)
1556
return lowerStoreF128(Op, DAG);
1557
if (isMaskType(MemVT))
1558
return lowerStoreI1(Op, DAG);
1559
1560
// Otherwise, ask llvm to expand it.
1561
return SDValue();
1562
}
1563
1564
SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1565
MachineFunction &MF = DAG.getMachineFunction();
1566
VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1567
auto PtrVT = getPointerTy(DAG.getDataLayout());
1568
1569
// Need frame address to find the address of VarArgsFrameIndex.
1570
MF.getFrameInfo().setFrameAddressIsTaken(true);
1571
1572
// vastart just stores the address of the VarArgsFrameIndex slot into the
1573
// memory location argument.
1574
SDLoc DL(Op);
1575
SDValue Offset =
1576
DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1577
DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1578
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1579
return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1580
MachinePointerInfo(SV));
1581
}
1582
1583
SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1584
SDNode *Node = Op.getNode();
1585
EVT VT = Node->getValueType(0);
1586
SDValue InChain = Node->getOperand(0);
1587
SDValue VAListPtr = Node->getOperand(1);
1588
EVT PtrVT = VAListPtr.getValueType();
1589
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1590
SDLoc DL(Node);
1591
SDValue VAList =
1592
DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1593
SDValue Chain = VAList.getValue(1);
1594
SDValue NextPtr;
1595
1596
if (VT == MVT::f128) {
1597
// VE f128 values must be stored with 16 bytes alignment. We don't
1598
// know the actual alignment of VAList, so we take alignment of it
1599
// dynamically.
1600
int Align = 16;
1601
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1602
DAG.getConstant(Align - 1, DL, PtrVT));
1603
VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1604
DAG.getConstant(-Align, DL, PtrVT));
1605
// Increment the pointer, VAList, by 16 to the next vaarg.
1606
NextPtr =
1607
DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1608
} else if (VT == MVT::f32) {
1609
// float --> need special handling like below.
1610
// 0 4
1611
// +------+------+
1612
// | empty| float|
1613
// +------+------+
1614
// Increment the pointer, VAList, by 8 to the next vaarg.
1615
NextPtr =
1616
DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1617
// Then, adjust VAList.
1618
unsigned InternalOffset = 4;
1619
VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1620
DAG.getConstant(InternalOffset, DL, PtrVT));
1621
} else {
1622
// Increment the pointer, VAList, by 8 to the next vaarg.
1623
NextPtr =
1624
DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1625
}
1626
1627
// Store the incremented VAList to the legalized pointer.
1628
InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1629
1630
// Load the actual argument out of the pointer VAList.
1631
// We can't count on greater alignment than the word size.
1632
return DAG.getLoad(
1633
VT, DL, InChain, VAList, MachinePointerInfo(),
1634
Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1635
}
1636
1637
SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1638
SelectionDAG &DAG) const {
1639
// Generate following code.
1640
// (void)__llvm_grow_stack(size);
1641
// ret = GETSTACKTOP; // pseudo instruction
1642
SDLoc DL(Op);
1643
1644
// Get the inputs.
1645
SDNode *Node = Op.getNode();
1646
SDValue Chain = Op.getOperand(0);
1647
SDValue Size = Op.getOperand(1);
1648
MaybeAlign Alignment(Op.getConstantOperandVal(2));
1649
EVT VT = Node->getValueType(0);
1650
1651
// Chain the dynamic stack allocation so that it doesn't modify the stack
1652
// pointer when other instructions are using the stack.
1653
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1654
1655
const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1656
Align StackAlign = TFI.getStackAlign();
1657
bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1658
1659
// Prepare arguments
1660
TargetLowering::ArgListTy Args;
1661
TargetLowering::ArgListEntry Entry;
1662
Entry.Node = Size;
1663
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1664
Args.push_back(Entry);
1665
if (NeedsAlign) {
1666
Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1667
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1668
Args.push_back(Entry);
1669
}
1670
Type *RetTy = Type::getVoidTy(*DAG.getContext());
1671
1672
EVT PtrVT = Op.getValueType();
1673
SDValue Callee;
1674
if (NeedsAlign) {
1675
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1676
} else {
1677
Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1678
}
1679
1680
TargetLowering::CallLoweringInfo CLI(DAG);
1681
CLI.setDebugLoc(DL)
1682
.setChain(Chain)
1683
.setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1684
.setDiscardResult(true);
1685
std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1686
Chain = pair.second;
1687
SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1688
if (NeedsAlign) {
1689
Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1690
DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1691
Result = DAG.getNode(ISD::AND, DL, VT, Result,
1692
DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1693
}
1694
// Chain = Result.getValue(1);
1695
Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1696
1697
SDValue Ops[2] = {Result, Chain};
1698
return DAG.getMergeValues(Ops, DL);
1699
}
1700
1701
SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1702
SelectionDAG &DAG) const {
1703
SDLoc DL(Op);
1704
return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1705
Op.getOperand(1));
1706
}
1707
1708
SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1709
SelectionDAG &DAG) const {
1710
SDLoc DL(Op);
1711
return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1712
DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1713
Op.getOperand(1));
1714
}
1715
1716
SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1717
SelectionDAG &DAG) const {
1718
SDLoc DL(Op);
1719
return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1720
Op.getOperand(0));
1721
}
1722
1723
static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1724
const VETargetLowering &TLI,
1725
const VESubtarget *Subtarget) {
1726
SDLoc DL(Op);
1727
MachineFunction &MF = DAG.getMachineFunction();
1728
EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1729
1730
MachineFrameInfo &MFI = MF.getFrameInfo();
1731
MFI.setFrameAddressIsTaken(true);
1732
1733
unsigned Depth = Op.getConstantOperandVal(0);
1734
const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1735
Register FrameReg = RegInfo->getFrameRegister(MF);
1736
SDValue FrameAddr =
1737
DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1738
while (Depth--)
1739
FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1740
FrameAddr, MachinePointerInfo());
1741
return FrameAddr;
1742
}
1743
1744
static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1745
const VETargetLowering &TLI,
1746
const VESubtarget *Subtarget) {
1747
MachineFunction &MF = DAG.getMachineFunction();
1748
MachineFrameInfo &MFI = MF.getFrameInfo();
1749
MFI.setReturnAddressIsTaken(true);
1750
1751
if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1752
return SDValue();
1753
1754
SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1755
1756
SDLoc DL(Op);
1757
EVT VT = Op.getValueType();
1758
SDValue Offset = DAG.getConstant(8, DL, VT);
1759
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1760
DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1761
MachinePointerInfo());
1762
}
1763
1764
SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1765
SelectionDAG &DAG) const {
1766
SDLoc DL(Op);
1767
unsigned IntNo = Op.getConstantOperandVal(0);
1768
switch (IntNo) {
1769
default: // Don't custom lower most intrinsics.
1770
return SDValue();
1771
case Intrinsic::eh_sjlj_lsda: {
1772
MachineFunction &MF = DAG.getMachineFunction();
1773
MVT VT = Op.getSimpleValueType();
1774
const VETargetMachine *TM =
1775
static_cast<const VETargetMachine *>(&DAG.getTarget());
1776
1777
// Create GCC_except_tableXX string. The real symbol for that will be
1778
// generated in EHStreamer::emitExceptionTable() later. So, we just
1779
// borrow it's name here.
1780
TM->getStrList()->push_back(std::string(
1781
(Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1782
SDValue Addr =
1783
DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1784
if (isPositionIndependent()) {
1785
Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1786
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1787
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1788
return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1789
}
1790
return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1791
}
1792
}
1793
}
1794
1795
static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1796
if (!isa<BuildVectorSDNode>(N))
1797
return false;
1798
const auto *BVN = cast<BuildVectorSDNode>(N);
1799
1800
// Find first non-undef insertion.
1801
unsigned Idx;
1802
for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1803
auto ElemV = BVN->getOperand(Idx);
1804
if (!ElemV->isUndef())
1805
break;
1806
}
1807
// Catch the (hypothetical) all-undef case.
1808
if (Idx == BVN->getNumOperands())
1809
return false;
1810
// Remember insertion.
1811
UniqueIdx = Idx++;
1812
// Verify that all other insertions are undef.
1813
for (; Idx < BVN->getNumOperands(); ++Idx) {
1814
auto ElemV = BVN->getOperand(Idx);
1815
if (!ElemV->isUndef())
1816
return false;
1817
}
1818
return true;
1819
}
1820
1821
static SDValue getSplatValue(SDNode *N) {
1822
if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1823
return BuildVec->getSplatValue();
1824
}
1825
return SDValue();
1826
}
1827
1828
SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1829
SelectionDAG &DAG) const {
1830
VECustomDAG CDAG(DAG, Op);
1831
MVT ResultVT = Op.getSimpleValueType();
1832
1833
// If there is just one element, expand to INSERT_VECTOR_ELT.
1834
unsigned UniqueIdx;
1835
if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1836
SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1837
auto ElemV = Op->getOperand(UniqueIdx);
1838
SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1839
return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1840
}
1841
1842
// Else emit a broadcast.
1843
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1844
unsigned NumEls = ResultVT.getVectorNumElements();
1845
auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1846
return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1847
}
1848
1849
// Expand
1850
return SDValue();
1851
}
1852
1853
TargetLowering::LegalizeAction
1854
VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1855
// Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1856
// these operations (transform nodes such that their AVL parameter refers to
1857
// packs of 64bit, instead of number of elements.
1858
1859
// Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1860
// re-visit them.
1861
if (isPackingSupportOpcode(Op.getOpcode()))
1862
return Legal;
1863
1864
// Custom lower to legalize AVL for packed mode.
1865
if (isVVPOrVEC(Op.getOpcode()))
1866
return Custom;
1867
return Legal;
1868
}
1869
1870
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1871
LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1872
unsigned Opcode = Op.getOpcode();
1873
1874
/// Scalar isel.
1875
switch (Opcode) {
1876
case ISD::ATOMIC_FENCE:
1877
return lowerATOMIC_FENCE(Op, DAG);
1878
case ISD::ATOMIC_SWAP:
1879
return lowerATOMIC_SWAP(Op, DAG);
1880
case ISD::BlockAddress:
1881
return lowerBlockAddress(Op, DAG);
1882
case ISD::ConstantPool:
1883
return lowerConstantPool(Op, DAG);
1884
case ISD::DYNAMIC_STACKALLOC:
1885
return lowerDYNAMIC_STACKALLOC(Op, DAG);
1886
case ISD::EH_SJLJ_LONGJMP:
1887
return lowerEH_SJLJ_LONGJMP(Op, DAG);
1888
case ISD::EH_SJLJ_SETJMP:
1889
return lowerEH_SJLJ_SETJMP(Op, DAG);
1890
case ISD::EH_SJLJ_SETUP_DISPATCH:
1891
return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1892
case ISD::FRAMEADDR:
1893
return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1894
case ISD::GlobalAddress:
1895
return lowerGlobalAddress(Op, DAG);
1896
case ISD::GlobalTLSAddress:
1897
return lowerGlobalTLSAddress(Op, DAG);
1898
case ISD::INTRINSIC_WO_CHAIN:
1899
return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1900
case ISD::JumpTable:
1901
return lowerJumpTable(Op, DAG);
1902
case ISD::LOAD:
1903
return lowerLOAD(Op, DAG);
1904
case ISD::RETURNADDR:
1905
return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1906
case ISD::BUILD_VECTOR:
1907
return lowerBUILD_VECTOR(Op, DAG);
1908
case ISD::STORE:
1909
return lowerSTORE(Op, DAG);
1910
case ISD::VASTART:
1911
return lowerVASTART(Op, DAG);
1912
case ISD::VAARG:
1913
return lowerVAARG(Op, DAG);
1914
1915
case ISD::INSERT_VECTOR_ELT:
1916
return lowerINSERT_VECTOR_ELT(Op, DAG);
1917
case ISD::EXTRACT_VECTOR_ELT:
1918
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1919
}
1920
1921
/// Vector isel.
1922
if (ISD::isVPOpcode(Opcode))
1923
return lowerToVVP(Op, DAG);
1924
1925
switch (Opcode) {
1926
default:
1927
llvm_unreachable("Should not custom lower this!");
1928
1929
// Legalize the AVL of this internal node.
1930
case VEISD::VEC_BROADCAST:
1931
#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1932
#include "VVPNodes.def"
1933
// AVL already legalized.
1934
if (getAnnotatedNodeAVL(Op).second)
1935
return Op;
1936
return legalizeInternalVectorOp(Op, DAG);
1937
1938
// Translate into a VEC_*/VVP_* layer operation.
1939
case ISD::MLOAD:
1940
case ISD::MSTORE:
1941
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1942
#include "VVPNodes.def"
1943
if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1944
return splitMaskArithmetic(Op, DAG);
1945
return lowerToVVP(Op, DAG);
1946
}
1947
}
1948
/// } Custom Lower
1949
1950
void VETargetLowering::ReplaceNodeResults(SDNode *N,
1951
SmallVectorImpl<SDValue> &Results,
1952
SelectionDAG &DAG) const {
1953
switch (N->getOpcode()) {
1954
case ISD::ATOMIC_SWAP:
1955
// Let LLVM expand atomic swap instruction through LowerOperation.
1956
return;
1957
default:
1958
LLVM_DEBUG(N->dumpr(&DAG));
1959
llvm_unreachable("Do not know how to custom type legalize this operation!");
1960
}
1961
}
1962
1963
/// JumpTable for VE.
1964
///
1965
/// VE cannot generate relocatable symbol in jump table. VE cannot
1966
/// generate expressions using symbols in both text segment and data
1967
/// segment like below.
1968
/// .4byte .LBB0_2-.LJTI0_0
1969
/// So, we generate offset from the top of function like below as
1970
/// a custom label.
1971
/// .4byte .LBB0_2-<function name>
1972
1973
unsigned VETargetLowering::getJumpTableEncoding() const {
1974
// Use custom label for PIC.
1975
if (isPositionIndependent())
1976
return MachineJumpTableInfo::EK_Custom32;
1977
1978
// Otherwise, use the normal jump table encoding heuristics.
1979
return TargetLowering::getJumpTableEncoding();
1980
}
1981
1982
const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1983
const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1984
unsigned Uid, MCContext &Ctx) const {
1985
assert(isPositionIndependent());
1986
1987
// Generate custom label for PIC like below.
1988
// .4bytes .LBB0_2-<function name>
1989
const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1990
MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1991
const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1992
return MCBinaryExpr::createSub(Value, Base, Ctx);
1993
}
1994
1995
SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1996
SelectionDAG &DAG) const {
1997
assert(isPositionIndependent());
1998
SDLoc DL(Table);
1999
Function *Function = &DAG.getMachineFunction().getFunction();
2000
assert(Function != nullptr);
2001
auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2002
2003
// In the jump table, we have following values in PIC mode.
2004
// .4bytes .LBB0_2-<function name>
2005
// We need to add this value and the address of this function to generate
2006
// .LBB0_2 label correctly under PIC mode. So, we want to generate following
2007
// instructions:
2008
// lea %reg, fun@gotoff_lo
2009
// and %reg, %reg, (32)0
2010
// lea.sl %reg, fun@gotoff_hi(%reg, %got)
2011
// In order to do so, we need to genarate correctly marked DAG node using
2012
// makeHiLoPair.
2013
SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2014
SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2015
VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2016
SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2017
return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2018
}
2019
2020
Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2021
MachineBasicBlock::iterator I,
2022
MachineBasicBlock *TargetBB,
2023
const DebugLoc &DL) const {
2024
MachineFunction *MF = MBB.getParent();
2025
MachineRegisterInfo &MRI = MF->getRegInfo();
2026
const VEInstrInfo *TII = Subtarget->getInstrInfo();
2027
2028
const TargetRegisterClass *RC = &VE::I64RegClass;
2029
Register Tmp1 = MRI.createVirtualRegister(RC);
2030
Register Tmp2 = MRI.createVirtualRegister(RC);
2031
Register Result = MRI.createVirtualRegister(RC);
2032
2033
if (isPositionIndependent()) {
2034
// Create following instructions for local linkage PIC code.
2035
// lea %Tmp1, TargetBB@gotoff_lo
2036
// and %Tmp2, %Tmp1, (32)0
2037
// lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2038
BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2039
.addImm(0)
2040
.addImm(0)
2041
.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2042
BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2043
.addReg(Tmp1, getKillRegState(true))
2044
.addImm(M0(32));
2045
BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2046
.addReg(VE::SX15)
2047
.addReg(Tmp2, getKillRegState(true))
2048
.addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2049
} else {
2050
// Create following instructions for non-PIC code.
2051
// lea %Tmp1, TargetBB@lo
2052
// and %Tmp2, %Tmp1, (32)0
2053
// lea.sl %Result, TargetBB@hi(%Tmp2)
2054
BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2055
.addImm(0)
2056
.addImm(0)
2057
.addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2058
BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2059
.addReg(Tmp1, getKillRegState(true))
2060
.addImm(M0(32));
2061
BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2062
.addReg(Tmp2, getKillRegState(true))
2063
.addImm(0)
2064
.addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2065
}
2066
return Result;
2067
}
2068
2069
Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2070
MachineBasicBlock::iterator I,
2071
StringRef Symbol, const DebugLoc &DL,
2072
bool IsLocal = false,
2073
bool IsCall = false) const {
2074
MachineFunction *MF = MBB.getParent();
2075
MachineRegisterInfo &MRI = MF->getRegInfo();
2076
const VEInstrInfo *TII = Subtarget->getInstrInfo();
2077
2078
const TargetRegisterClass *RC = &VE::I64RegClass;
2079
Register Result = MRI.createVirtualRegister(RC);
2080
2081
if (isPositionIndependent()) {
2082
if (IsCall && !IsLocal) {
2083
// Create following instructions for non-local linkage PIC code function
2084
// calls. These instructions uses IC and magic number -24, so we expand
2085
// them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2086
// lea %Reg, Symbol@plt_lo(-24)
2087
// and %Reg, %Reg, (32)0
2088
// sic %s16
2089
// lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2090
BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2091
.addExternalSymbol("abort");
2092
} else if (IsLocal) {
2093
Register Tmp1 = MRI.createVirtualRegister(RC);
2094
Register Tmp2 = MRI.createVirtualRegister(RC);
2095
// Create following instructions for local linkage PIC code.
2096
// lea %Tmp1, Symbol@gotoff_lo
2097
// and %Tmp2, %Tmp1, (32)0
2098
// lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2099
BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2100
.addImm(0)
2101
.addImm(0)
2102
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2103
BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2104
.addReg(Tmp1, getKillRegState(true))
2105
.addImm(M0(32));
2106
BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2107
.addReg(VE::SX15)
2108
.addReg(Tmp2, getKillRegState(true))
2109
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2110
} else {
2111
Register Tmp1 = MRI.createVirtualRegister(RC);
2112
Register Tmp2 = MRI.createVirtualRegister(RC);
2113
// Create following instructions for not local linkage PIC code.
2114
// lea %Tmp1, Symbol@got_lo
2115
// and %Tmp2, %Tmp1, (32)0
2116
// lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2117
// ld %Result, 0(%Tmp3)
2118
Register Tmp3 = MRI.createVirtualRegister(RC);
2119
BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2120
.addImm(0)
2121
.addImm(0)
2122
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2123
BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2124
.addReg(Tmp1, getKillRegState(true))
2125
.addImm(M0(32));
2126
BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2127
.addReg(VE::SX15)
2128
.addReg(Tmp2, getKillRegState(true))
2129
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2130
BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2131
.addReg(Tmp3, getKillRegState(true))
2132
.addImm(0)
2133
.addImm(0);
2134
}
2135
} else {
2136
Register Tmp1 = MRI.createVirtualRegister(RC);
2137
Register Tmp2 = MRI.createVirtualRegister(RC);
2138
// Create following instructions for non-PIC code.
2139
// lea %Tmp1, Symbol@lo
2140
// and %Tmp2, %Tmp1, (32)0
2141
// lea.sl %Result, Symbol@hi(%Tmp2)
2142
BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2143
.addImm(0)
2144
.addImm(0)
2145
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2146
BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2147
.addReg(Tmp1, getKillRegState(true))
2148
.addImm(M0(32));
2149
BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2150
.addReg(Tmp2, getKillRegState(true))
2151
.addImm(0)
2152
.addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2153
}
2154
return Result;
2155
}
2156
2157
void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2158
MachineBasicBlock *MBB,
2159
MachineBasicBlock *DispatchBB,
2160
int FI, int Offset) const {
2161
DebugLoc DL = MI.getDebugLoc();
2162
const VEInstrInfo *TII = Subtarget->getInstrInfo();
2163
2164
Register LabelReg =
2165
prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2166
2167
// Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2168
// referenced by longjmp (throw) later.
2169
MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2170
addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2171
MIB.addReg(LabelReg, getKillRegState(true));
2172
}
2173
2174
MachineBasicBlock *
2175
VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2176
MachineBasicBlock *MBB) const {
2177
DebugLoc DL = MI.getDebugLoc();
2178
MachineFunction *MF = MBB->getParent();
2179
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2180
const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2181
MachineRegisterInfo &MRI = MF->getRegInfo();
2182
2183
const BasicBlock *BB = MBB->getBasicBlock();
2184
MachineFunction::iterator I = ++MBB->getIterator();
2185
2186
// Memory Reference.
2187
SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2188
MI.memoperands_end());
2189
Register BufReg = MI.getOperand(1).getReg();
2190
2191
Register DstReg;
2192
2193
DstReg = MI.getOperand(0).getReg();
2194
const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2195
assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2196
(void)TRI;
2197
Register MainDestReg = MRI.createVirtualRegister(RC);
2198
Register RestoreDestReg = MRI.createVirtualRegister(RC);
2199
2200
// For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2201
// instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202
//
2203
// ThisMBB:
2204
// buf[3] = %s17 iff %s17 is used as BP
2205
// buf[1] = RestoreMBB as IC after longjmp
2206
// # SjLjSetup RestoreMBB
2207
//
2208
// MainMBB:
2209
// v_main = 0
2210
//
2211
// SinkMBB:
2212
// v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2213
// ...
2214
//
2215
// RestoreMBB:
2216
// %s17 = buf[3] = iff %s17 is used as BP
2217
// v_restore = 1
2218
// goto SinkMBB
2219
2220
MachineBasicBlock *ThisMBB = MBB;
2221
MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2222
MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2223
MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2224
MF->insert(I, MainMBB);
2225
MF->insert(I, SinkMBB);
2226
MF->push_back(RestoreMBB);
2227
RestoreMBB->setMachineBlockAddressTaken();
2228
2229
// Transfer the remainder of BB and its successor edges to SinkMBB.
2230
SinkMBB->splice(SinkMBB->begin(), MBB,
2231
std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2232
SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2233
2234
// ThisMBB:
2235
Register LabelReg =
2236
prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2237
2238
// Store BP in buf[3] iff this function is using BP.
2239
const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2240
if (TFI->hasBP(*MF)) {
2241
MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2242
MIB.addReg(BufReg);
2243
MIB.addImm(0);
2244
MIB.addImm(24);
2245
MIB.addReg(VE::SX17);
2246
MIB.setMemRefs(MMOs);
2247
}
2248
2249
// Store IP in buf[1].
2250
MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2251
MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2252
MIB.addImm(0);
2253
MIB.addImm(8);
2254
MIB.addReg(LabelReg, getKillRegState(true));
2255
MIB.setMemRefs(MMOs);
2256
2257
// SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2258
2259
// Insert setup.
2260
MIB =
2261
BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2262
2263
const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2264
MIB.addRegMask(RegInfo->getNoPreservedMask());
2265
ThisMBB->addSuccessor(MainMBB);
2266
ThisMBB->addSuccessor(RestoreMBB);
2267
2268
// MainMBB:
2269
BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2270
.addImm(0)
2271
.addImm(0)
2272
.addImm(0);
2273
MainMBB->addSuccessor(SinkMBB);
2274
2275
// SinkMBB:
2276
BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2277
.addReg(MainDestReg)
2278
.addMBB(MainMBB)
2279
.addReg(RestoreDestReg)
2280
.addMBB(RestoreMBB);
2281
2282
// RestoreMBB:
2283
// Restore BP from buf[3] iff this function is using BP. The address of
2284
// buf is in SX10.
2285
// FIXME: Better to not use SX10 here
2286
if (TFI->hasBP(*MF)) {
2287
MachineInstrBuilder MIB =
2288
BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2289
MIB.addReg(VE::SX10);
2290
MIB.addImm(0);
2291
MIB.addImm(24);
2292
MIB.setMemRefs(MMOs);
2293
}
2294
BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2295
.addImm(0)
2296
.addImm(0)
2297
.addImm(1);
2298
BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2299
RestoreMBB->addSuccessor(SinkMBB);
2300
2301
MI.eraseFromParent();
2302
return SinkMBB;
2303
}
2304
2305
MachineBasicBlock *
2306
VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2307
MachineBasicBlock *MBB) const {
2308
DebugLoc DL = MI.getDebugLoc();
2309
MachineFunction *MF = MBB->getParent();
2310
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2311
MachineRegisterInfo &MRI = MF->getRegInfo();
2312
2313
// Memory Reference.
2314
SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2315
MI.memoperands_end());
2316
Register BufReg = MI.getOperand(0).getReg();
2317
2318
Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2319
// Since FP is only updated here but NOT referenced, it's treated as GPR.
2320
Register FP = VE::SX9;
2321
Register SP = VE::SX11;
2322
2323
MachineInstrBuilder MIB;
2324
2325
MachineBasicBlock *ThisMBB = MBB;
2326
2327
// For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2328
//
2329
// ThisMBB:
2330
// %fp = load buf[0]
2331
// %jmp = load buf[1]
2332
// %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2333
// %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2334
// jmp %jmp
2335
2336
// Reload FP.
2337
MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2338
MIB.addReg(BufReg);
2339
MIB.addImm(0);
2340
MIB.addImm(0);
2341
MIB.setMemRefs(MMOs);
2342
2343
// Reload IP.
2344
MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2345
MIB.addReg(BufReg);
2346
MIB.addImm(0);
2347
MIB.addImm(8);
2348
MIB.setMemRefs(MMOs);
2349
2350
// Copy BufReg to SX10 for later use in setjmp.
2351
// FIXME: Better to not use SX10 here
2352
BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2353
.addReg(BufReg)
2354
.addImm(0);
2355
2356
// Reload SP.
2357
MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2358
MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2359
MIB.addImm(0);
2360
MIB.addImm(16);
2361
MIB.setMemRefs(MMOs);
2362
2363
// Jump.
2364
BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2365
.addReg(Tmp, getKillRegState(true))
2366
.addImm(0);
2367
2368
MI.eraseFromParent();
2369
return ThisMBB;
2370
}
2371
2372
MachineBasicBlock *
2373
VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2374
MachineBasicBlock *BB) const {
2375
DebugLoc DL = MI.getDebugLoc();
2376
MachineFunction *MF = BB->getParent();
2377
MachineFrameInfo &MFI = MF->getFrameInfo();
2378
MachineRegisterInfo &MRI = MF->getRegInfo();
2379
const VEInstrInfo *TII = Subtarget->getInstrInfo();
2380
int FI = MFI.getFunctionContextIndex();
2381
2382
// Get a mapping of the call site numbers to all of the landing pads they're
2383
// associated with.
2384
DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2385
unsigned MaxCSNum = 0;
2386
for (auto &MBB : *MF) {
2387
if (!MBB.isEHPad())
2388
continue;
2389
2390
MCSymbol *Sym = nullptr;
2391
for (const auto &MI : MBB) {
2392
if (MI.isDebugInstr())
2393
continue;
2394
2395
assert(MI.isEHLabel() && "expected EH_LABEL");
2396
Sym = MI.getOperand(0).getMCSymbol();
2397
break;
2398
}
2399
2400
if (!MF->hasCallSiteLandingPad(Sym))
2401
continue;
2402
2403
for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2404
CallSiteNumToLPad[CSI].push_back(&MBB);
2405
MaxCSNum = std::max(MaxCSNum, CSI);
2406
}
2407
}
2408
2409
// Get an ordered list of the machine basic blocks for the jump table.
2410
std::vector<MachineBasicBlock *> LPadList;
2411
SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2412
LPadList.reserve(CallSiteNumToLPad.size());
2413
2414
for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2415
for (auto &LP : CallSiteNumToLPad[CSI]) {
2416
LPadList.push_back(LP);
2417
InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2418
}
2419
}
2420
2421
assert(!LPadList.empty() &&
2422
"No landing pad destinations for the dispatch jump table!");
2423
2424
// The %fn_context is allocated like below (from --print-after=sjljehprepare):
2425
// %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2426
//
2427
// This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2428
// First `i64` is callsite, so callsite is FI+8.
2429
static const int OffsetIC = 72;
2430
static const int OffsetCS = 8;
2431
2432
// Create the MBBs for the dispatch code like following:
2433
//
2434
// ThisMBB:
2435
// Prepare DispatchBB address and store it to buf[1].
2436
// ...
2437
//
2438
// DispatchBB:
2439
// %s15 = GETGOT iff isPositionIndependent
2440
// %callsite = load callsite
2441
// brgt.l.t #size of callsites, %callsite, DispContBB
2442
//
2443
// TrapBB:
2444
// Call abort.
2445
//
2446
// DispContBB:
2447
// %breg = address of jump table
2448
// %pc = load and calculate next pc from %breg and %callsite
2449
// jmp %pc
2450
2451
// Shove the dispatch's address into the return slot in the function context.
2452
MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2453
DispatchBB->setIsEHPad(true);
2454
2455
// Trap BB will causes trap like `assert(0)`.
2456
MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2457
DispatchBB->addSuccessor(TrapBB);
2458
2459
MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2460
DispatchBB->addSuccessor(DispContBB);
2461
2462
// Insert MBBs.
2463
MF->push_back(DispatchBB);
2464
MF->push_back(DispContBB);
2465
MF->push_back(TrapBB);
2466
2467
// Insert code to call abort in the TrapBB.
2468
Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2469
/* Local */ false, /* Call */ true);
2470
BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2471
.addReg(Abort, getKillRegState(true))
2472
.addImm(0)
2473
.addImm(0);
2474
2475
// Insert code into the entry block that creates and registers the function
2476
// context.
2477
setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2478
2479
// Create the jump table and associated information
2480
unsigned JTE = getJumpTableEncoding();
2481
MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2482
unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2483
2484
const VERegisterInfo &RI = TII->getRegisterInfo();
2485
// Add a register mask with no preserved registers. This results in all
2486
// registers being marked as clobbered.
2487
BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2488
.addRegMask(RI.getNoPreservedMask());
2489
2490
if (isPositionIndependent()) {
2491
// Force to generate GETGOT, since current implementation doesn't store GOT
2492
// register.
2493
BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2494
}
2495
2496
// IReg is used as an index in a memory operand and therefore can't be SP
2497
const TargetRegisterClass *RC = &VE::I64RegClass;
2498
Register IReg = MRI.createVirtualRegister(RC);
2499
addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2500
OffsetCS);
2501
if (LPadList.size() < 64) {
2502
BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2503
.addImm(VECC::CC_ILE)
2504
.addImm(LPadList.size())
2505
.addReg(IReg)
2506
.addMBB(TrapBB);
2507
} else {
2508
assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2509
Register TmpReg = MRI.createVirtualRegister(RC);
2510
BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2511
.addImm(0)
2512
.addImm(0)
2513
.addImm(LPadList.size());
2514
BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2515
.addImm(VECC::CC_ILE)
2516
.addReg(TmpReg, getKillRegState(true))
2517
.addReg(IReg)
2518
.addMBB(TrapBB);
2519
}
2520
2521
Register BReg = MRI.createVirtualRegister(RC);
2522
Register Tmp1 = MRI.createVirtualRegister(RC);
2523
Register Tmp2 = MRI.createVirtualRegister(RC);
2524
2525
if (isPositionIndependent()) {
2526
// Create following instructions for local linkage PIC code.
2527
// lea %Tmp1, .LJTI0_0@gotoff_lo
2528
// and %Tmp2, %Tmp1, (32)0
2529
// lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2530
BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2531
.addImm(0)
2532
.addImm(0)
2533
.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2534
BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2535
.addReg(Tmp1, getKillRegState(true))
2536
.addImm(M0(32));
2537
BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2538
.addReg(VE::SX15)
2539
.addReg(Tmp2, getKillRegState(true))
2540
.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2541
} else {
2542
// Create following instructions for non-PIC code.
2543
// lea %Tmp1, .LJTI0_0@lo
2544
// and %Tmp2, %Tmp1, (32)0
2545
// lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2546
BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2547
.addImm(0)
2548
.addImm(0)
2549
.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2550
BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2551
.addReg(Tmp1, getKillRegState(true))
2552
.addImm(M0(32));
2553
BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2554
.addReg(Tmp2, getKillRegState(true))
2555
.addImm(0)
2556
.addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2557
}
2558
2559
switch (JTE) {
2560
case MachineJumpTableInfo::EK_BlockAddress: {
2561
// Generate simple block address code for no-PIC model.
2562
// sll %Tmp1, %IReg, 3
2563
// lds %TReg, 0(%Tmp1, %BReg)
2564
// bcfla %TReg
2565
2566
Register TReg = MRI.createVirtualRegister(RC);
2567
Register Tmp1 = MRI.createVirtualRegister(RC);
2568
2569
BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2570
.addReg(IReg, getKillRegState(true))
2571
.addImm(3);
2572
BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2573
.addReg(BReg, getKillRegState(true))
2574
.addReg(Tmp1, getKillRegState(true))
2575
.addImm(0);
2576
BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2577
.addReg(TReg, getKillRegState(true))
2578
.addImm(0);
2579
break;
2580
}
2581
case MachineJumpTableInfo::EK_Custom32: {
2582
// Generate block address code using differences from the function pointer
2583
// for PIC model.
2584
// sll %Tmp1, %IReg, 2
2585
// ldl.zx %OReg, 0(%Tmp1, %BReg)
2586
// Prepare function address in BReg2.
2587
// adds.l %TReg, %BReg2, %OReg
2588
// bcfla %TReg
2589
2590
assert(isPositionIndependent());
2591
Register OReg = MRI.createVirtualRegister(RC);
2592
Register TReg = MRI.createVirtualRegister(RC);
2593
Register Tmp1 = MRI.createVirtualRegister(RC);
2594
2595
BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2596
.addReg(IReg, getKillRegState(true))
2597
.addImm(2);
2598
BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2599
.addReg(BReg, getKillRegState(true))
2600
.addReg(Tmp1, getKillRegState(true))
2601
.addImm(0);
2602
Register BReg2 =
2603
prepareSymbol(*DispContBB, DispContBB->end(),
2604
DispContBB->getParent()->getName(), DL, /* Local */ true);
2605
BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2606
.addReg(OReg, getKillRegState(true))
2607
.addReg(BReg2, getKillRegState(true));
2608
BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2609
.addReg(TReg, getKillRegState(true))
2610
.addImm(0);
2611
break;
2612
}
2613
default:
2614
llvm_unreachable("Unexpected jump table encoding");
2615
}
2616
2617
// Add the jump table entries as successors to the MBB.
2618
SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2619
for (auto &LP : LPadList)
2620
if (SeenMBBs.insert(LP).second)
2621
DispContBB->addSuccessor(LP);
2622
2623
// N.B. the order the invoke BBs are processed in doesn't matter here.
2624
SmallVector<MachineBasicBlock *, 64> MBBLPads;
2625
const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2626
for (MachineBasicBlock *MBB : InvokeBBs) {
2627
// Remove the landing pad successor from the invoke block and replace it
2628
// with the new dispatch block.
2629
// Keep a copy of Successors since it's modified inside the loop.
2630
SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2631
MBB->succ_rend());
2632
// FIXME: Avoid quadratic complexity.
2633
for (auto *MBBS : Successors) {
2634
if (MBBS->isEHPad()) {
2635
MBB->removeSuccessor(MBBS);
2636
MBBLPads.push_back(MBBS);
2637
}
2638
}
2639
2640
MBB->addSuccessor(DispatchBB);
2641
2642
// Find the invoke call and mark all of the callee-saved registers as
2643
// 'implicit defined' so that they're spilled. This prevents code from
2644
// moving instructions to before the EH block, where they will never be
2645
// executed.
2646
for (auto &II : reverse(*MBB)) {
2647
if (!II.isCall())
2648
continue;
2649
2650
DenseMap<Register, bool> DefRegs;
2651
for (auto &MOp : II.operands())
2652
if (MOp.isReg())
2653
DefRegs[MOp.getReg()] = true;
2654
2655
MachineInstrBuilder MIB(*MF, &II);
2656
for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2657
Register Reg = SavedRegs[RI];
2658
if (!DefRegs[Reg])
2659
MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2660
}
2661
2662
break;
2663
}
2664
}
2665
2666
// Mark all former landing pads as non-landing pads. The dispatch is the only
2667
// landing pad now.
2668
for (auto &LP : MBBLPads)
2669
LP->setIsEHPad(false);
2670
2671
// The instruction is gone now.
2672
MI.eraseFromParent();
2673
return BB;
2674
}
2675
2676
MachineBasicBlock *
2677
VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2678
MachineBasicBlock *BB) const {
2679
switch (MI.getOpcode()) {
2680
default:
2681
llvm_unreachable("Unknown Custom Instruction!");
2682
case VE::EH_SjLj_LongJmp:
2683
return emitEHSjLjLongJmp(MI, BB);
2684
case VE::EH_SjLj_SetJmp:
2685
return emitEHSjLjSetJmp(MI, BB);
2686
case VE::EH_SjLj_Setup_Dispatch:
2687
return emitSjLjDispatchBlock(MI, BB);
2688
}
2689
}
2690
2691
static bool isSimm7(SDValue V) {
2692
EVT VT = V.getValueType();
2693
if (VT.isVector())
2694
return false;
2695
2696
if (VT.isInteger()) {
2697
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2698
return isInt<7>(C->getSExtValue());
2699
} else if (VT.isFloatingPoint()) {
2700
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2701
if (VT == MVT::f32 || VT == MVT::f64) {
2702
const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2703
uint64_t Val = Imm.getSExtValue();
2704
if (Imm.getBitWidth() == 32)
2705
Val <<= 32; // Immediate value of float place at higher bits on VE.
2706
return isInt<7>(Val);
2707
}
2708
}
2709
}
2710
return false;
2711
}
2712
2713
static bool isMImm(SDValue V) {
2714
EVT VT = V.getValueType();
2715
if (VT.isVector())
2716
return false;
2717
2718
if (VT.isInteger()) {
2719
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2720
return isMImmVal(getImmVal(C));
2721
} else if (VT.isFloatingPoint()) {
2722
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2723
if (VT == MVT::f32) {
2724
// Float value places at higher bits, so ignore lower 32 bits.
2725
return isMImm32Val(getFpImmVal(C) >> 32);
2726
} else if (VT == MVT::f64) {
2727
return isMImmVal(getFpImmVal(C));
2728
}
2729
}
2730
}
2731
return false;
2732
}
2733
2734
static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2735
if (SrcVT.isFloatingPoint()) {
2736
if (SrcVT == MVT::f128)
2737
return VEISD::CMPQ;
2738
return VEISD::CMPF;
2739
}
2740
return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2741
}
2742
2743
static EVT decideCompType(EVT SrcVT) {
2744
if (SrcVT == MVT::f128)
2745
return MVT::f64;
2746
return SrcVT;
2747
}
2748
2749
static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2750
bool WithCMov) {
2751
if (SrcVT.isFloatingPoint()) {
2752
// For the case of floating point setcc, only unordered comparison
2753
// or general comparison with -enable-no-nans-fp-math option reach
2754
// here, so it is safe even if values are NaN. Only f128 doesn't
2755
// safe since VE uses f64 result of f128 comparison.
2756
return SrcVT != MVT::f128;
2757
}
2758
if (isIntEqualitySetCC(CC)) {
2759
// For the case of equal or not equal, it is safe without comparison with 0.
2760
return true;
2761
}
2762
if (WithCMov) {
2763
// For the case of integer setcc with cmov, all signed comparison with 0
2764
// are safe.
2765
return isSignedIntSetCC(CC);
2766
}
2767
// For the case of integer setcc, only signed 64 bits comparison is safe.
2768
// For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2769
// less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2770
// uncoditional, so it is not safe too without CMPI..
2771
return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2772
}
2773
2774
static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2775
ISD::CondCode CC, bool WithCMov,
2776
const SDLoc &DL, SelectionDAG &DAG) {
2777
// Compare values. If RHS is 0 and it is safe to calculate without
2778
// comparison, we don't generate an instruction for comparison.
2779
EVT CompVT = decideCompType(VT);
2780
if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2781
(isNullConstant(RHS) || isNullFPConstant(RHS))) {
2782
return LHS;
2783
}
2784
return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2785
}
2786
2787
SDValue VETargetLowering::combineSelect(SDNode *N,
2788
DAGCombinerInfo &DCI) const {
2789
assert(N->getOpcode() == ISD::SELECT &&
2790
"Should be called with a SELECT node");
2791
ISD::CondCode CC = ISD::CondCode::SETNE;
2792
SDValue Cond = N->getOperand(0);
2793
SDValue True = N->getOperand(1);
2794
SDValue False = N->getOperand(2);
2795
2796
// We handle only scalar SELECT.
2797
EVT VT = N->getValueType(0);
2798
if (VT.isVector())
2799
return SDValue();
2800
2801
// Peform combineSelect after leagalize DAG.
2802
if (!DCI.isAfterLegalizeDAG())
2803
return SDValue();
2804
2805
EVT VT0 = Cond.getValueType();
2806
if (isMImm(True)) {
2807
// VE's condition move can handle MImm in True clause, so nothing to do.
2808
} else if (isMImm(False)) {
2809
// VE's condition move can handle MImm in True clause, so swap True and
2810
// False clauses if False has MImm value. And, update condition code.
2811
std::swap(True, False);
2812
CC = getSetCCInverse(CC, VT0);
2813
}
2814
2815
SDLoc DL(N);
2816
SelectionDAG &DAG = DCI.DAG;
2817
VECC::CondCode VECCVal;
2818
if (VT0.isFloatingPoint()) {
2819
VECCVal = fpCondCode2Fcc(CC);
2820
} else {
2821
VECCVal = intCondCode2Icc(CC);
2822
}
2823
SDValue Ops[] = {Cond, True, False,
2824
DAG.getConstant(VECCVal, DL, MVT::i32)};
2825
return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2826
}
2827
2828
SDValue VETargetLowering::combineSelectCC(SDNode *N,
2829
DAGCombinerInfo &DCI) const {
2830
assert(N->getOpcode() == ISD::SELECT_CC &&
2831
"Should be called with a SELECT_CC node");
2832
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2833
SDValue LHS = N->getOperand(0);
2834
SDValue RHS = N->getOperand(1);
2835
SDValue True = N->getOperand(2);
2836
SDValue False = N->getOperand(3);
2837
2838
// We handle only scalar SELECT_CC.
2839
EVT VT = N->getValueType(0);
2840
if (VT.isVector())
2841
return SDValue();
2842
2843
// Peform combineSelectCC after leagalize DAG.
2844
if (!DCI.isAfterLegalizeDAG())
2845
return SDValue();
2846
2847
// We handle only i32/i64/f32/f64/f128 comparisons.
2848
EVT LHSVT = LHS.getValueType();
2849
assert(LHSVT == RHS.getValueType());
2850
switch (LHSVT.getSimpleVT().SimpleTy) {
2851
case MVT::i32:
2852
case MVT::i64:
2853
case MVT::f32:
2854
case MVT::f64:
2855
case MVT::f128:
2856
break;
2857
default:
2858
// Return SDValue to let llvm handle other types.
2859
return SDValue();
2860
}
2861
2862
if (isMImm(RHS)) {
2863
// VE's comparison can handle MImm in RHS, so nothing to do.
2864
} else if (isSimm7(RHS)) {
2865
// VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2866
// update condition code.
2867
std::swap(LHS, RHS);
2868
CC = getSetCCSwappedOperands(CC);
2869
}
2870
if (isMImm(True)) {
2871
// VE's condition move can handle MImm in True clause, so nothing to do.
2872
} else if (isMImm(False)) {
2873
// VE's condition move can handle MImm in True clause, so swap True and
2874
// False clauses if False has MImm value. And, update condition code.
2875
std::swap(True, False);
2876
CC = getSetCCInverse(CC, LHSVT);
2877
}
2878
2879
SDLoc DL(N);
2880
SelectionDAG &DAG = DCI.DAG;
2881
2882
bool WithCMov = true;
2883
SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2884
2885
VECC::CondCode VECCVal;
2886
if (LHSVT.isFloatingPoint()) {
2887
VECCVal = fpCondCode2Fcc(CC);
2888
} else {
2889
VECCVal = intCondCode2Icc(CC);
2890
}
2891
SDValue Ops[] = {CompNode, True, False,
2892
DAG.getConstant(VECCVal, DL, MVT::i32)};
2893
return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2894
}
2895
2896
static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2897
static bool isI32Insn(const SDNode *User, const SDNode *N) {
2898
switch (User->getOpcode()) {
2899
default:
2900
return false;
2901
case ISD::ADD:
2902
case ISD::SUB:
2903
case ISD::MUL:
2904
case ISD::SDIV:
2905
case ISD::UDIV:
2906
case ISD::SETCC:
2907
case ISD::SMIN:
2908
case ISD::SMAX:
2909
case ISD::SHL:
2910
case ISD::SRA:
2911
case ISD::BSWAP:
2912
case ISD::SINT_TO_FP:
2913
case ISD::UINT_TO_FP:
2914
case ISD::BR_CC:
2915
case ISD::BITCAST:
2916
case ISD::ATOMIC_CMP_SWAP:
2917
case ISD::ATOMIC_SWAP:
2918
case VEISD::CMPU:
2919
case VEISD::CMPI:
2920
return true;
2921
case ISD::SRL:
2922
if (N->getOperand(0).getOpcode() != ISD::SRL)
2923
return true;
2924
// (srl (trunc (srl ...))) may be optimized by combining srl, so
2925
// doesn't optimize trunc now.
2926
return false;
2927
case ISD::SELECT_CC:
2928
if (User->getOperand(2).getNode() != N &&
2929
User->getOperand(3).getNode() != N)
2930
return true;
2931
return isI32InsnAllUses(User, N);
2932
case VEISD::CMOV:
2933
// CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2934
// However, trunc in true or false clauses is not safe.
2935
if (User->getOperand(1).getNode() != N &&
2936
User->getOperand(2).getNode() != N &&
2937
isa<ConstantSDNode>(User->getOperand(3))) {
2938
VECC::CondCode VECCVal =
2939
static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2940
return isIntVECondCode(VECCVal);
2941
}
2942
[[fallthrough]];
2943
case ISD::AND:
2944
case ISD::OR:
2945
case ISD::XOR:
2946
case ISD::SELECT:
2947
case ISD::CopyToReg:
2948
// Check all use of selections, bit operations, and copies. If all of them
2949
// are safe, optimize truncate to extract_subreg.
2950
return isI32InsnAllUses(User, N);
2951
}
2952
}
2953
2954
static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2955
// Check all use of User node. If all of them are safe, optimize
2956
// truncate to extract_subreg.
2957
for (const SDNode *U : User->uses()) {
2958
switch (U->getOpcode()) {
2959
default:
2960
// If the use is an instruction which treats the source operand as i32,
2961
// it is safe to avoid truncate here.
2962
if (isI32Insn(U, N))
2963
continue;
2964
break;
2965
case ISD::ANY_EXTEND:
2966
case ISD::SIGN_EXTEND:
2967
case ISD::ZERO_EXTEND: {
2968
// Special optimizations to the combination of ext and trunc.
2969
// (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2970
// since this truncate instruction clears higher 32 bits which is filled
2971
// by one of ext instructions later.
2972
assert(N->getValueType(0) == MVT::i32 &&
2973
"find truncate to not i32 integer");
2974
if (User->getOpcode() == ISD::SELECT_CC ||
2975
User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2976
continue;
2977
break;
2978
}
2979
}
2980
return false;
2981
}
2982
return true;
2983
}
2984
2985
// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2986
// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2987
// is sometime too late. So, doing it at here.
2988
SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2989
DAGCombinerInfo &DCI) const {
2990
assert(N->getOpcode() == ISD::TRUNCATE &&
2991
"Should be called with a TRUNCATE node");
2992
2993
SelectionDAG &DAG = DCI.DAG;
2994
SDLoc DL(N);
2995
EVT VT = N->getValueType(0);
2996
2997
// We prefer to do this when all types are legal.
2998
if (!DCI.isAfterLegalizeDAG())
2999
return SDValue();
3000
3001
// Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3002
if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3003
isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3004
isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3005
return SDValue();
3006
3007
// Check all use of this TRUNCATE.
3008
for (const SDNode *User : N->uses()) {
3009
// Make sure that we're not going to replace TRUNCATE for non i32
3010
// instructions.
3011
//
3012
// FIXME: Although we could sometimes handle this, and it does occur in
3013
// practice that one of the condition inputs to the select is also one of
3014
// the outputs, we currently can't deal with this.
3015
if (isI32Insn(User, N))
3016
continue;
3017
3018
return SDValue();
3019
}
3020
3021
SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3022
return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3023
N->getOperand(0), SubI32),
3024
0);
3025
}
3026
3027
SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3028
DAGCombinerInfo &DCI) const {
3029
switch (N->getOpcode()) {
3030
default:
3031
break;
3032
case ISD::SELECT:
3033
return combineSelect(N, DCI);
3034
case ISD::SELECT_CC:
3035
return combineSelectCC(N, DCI);
3036
case ISD::TRUNCATE:
3037
return combineTRUNCATE(N, DCI);
3038
}
3039
3040
return SDValue();
3041
}
3042
3043
//===----------------------------------------------------------------------===//
3044
// VE Inline Assembly Support
3045
//===----------------------------------------------------------------------===//
3046
3047
VETargetLowering::ConstraintType
3048
VETargetLowering::getConstraintType(StringRef Constraint) const {
3049
if (Constraint.size() == 1) {
3050
switch (Constraint[0]) {
3051
default:
3052
break;
3053
case 'v': // vector registers
3054
return C_RegisterClass;
3055
}
3056
}
3057
return TargetLowering::getConstraintType(Constraint);
3058
}
3059
3060
std::pair<unsigned, const TargetRegisterClass *>
3061
VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3062
StringRef Constraint,
3063
MVT VT) const {
3064
const TargetRegisterClass *RC = nullptr;
3065
if (Constraint.size() == 1) {
3066
switch (Constraint[0]) {
3067
default:
3068
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3069
case 'r':
3070
RC = &VE::I64RegClass;
3071
break;
3072
case 'v':
3073
RC = &VE::V64RegClass;
3074
break;
3075
}
3076
return std::make_pair(0U, RC);
3077
}
3078
3079
return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3080
}
3081
3082
//===----------------------------------------------------------------------===//
3083
// VE Target Optimization Support
3084
//===----------------------------------------------------------------------===//
3085
3086
unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3087
// Specify 8 for PIC model to relieve the impact of PIC load instructions.
3088
if (isJumpTableRelative())
3089
return 8;
3090
3091
return TargetLowering::getMinimumJumpTableEntries();
3092
}
3093
3094
bool VETargetLowering::hasAndNot(SDValue Y) const {
3095
EVT VT = Y.getValueType();
3096
3097
// VE doesn't have vector and not instruction.
3098
if (VT.isVector())
3099
return false;
3100
3101
// VE allows different immediate values for X and Y where ~X & Y.
3102
// Only simm7 works for X, and only mimm works for Y on VE. However, this
3103
// function is used to check whether an immediate value is OK for and-not
3104
// instruction as both X and Y. Generating additional instruction to
3105
// retrieve an immediate value is no good since the purpose of this
3106
// function is to convert a series of 3 instructions to another series of
3107
// 3 instructions with better parallelism. Therefore, we return false
3108
// for all immediate values now.
3109
// FIXME: Change hasAndNot function to have two operands to make it work
3110
// correctly with Aurora VE.
3111
if (isa<ConstantSDNode>(Y))
3112
return false;
3113
3114
// It's ok for generic registers.
3115
return true;
3116
}
3117
3118
SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3119
SelectionDAG &DAG) const {
3120
assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3121
MVT VT = Op.getOperand(0).getSimpleValueType();
3122
3123
// Special treatment for packed V64 types.
3124
assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3125
(void)VT;
3126
// Example of codes:
3127
// %packed_v = extractelt %vr, %idx / 2
3128
// %v = %packed_v >> (%idx % 2 * 32)
3129
// %res = %v & 0xffffffff
3130
3131
SDValue Vec = Op.getOperand(0);
3132
SDValue Idx = Op.getOperand(1);
3133
SDLoc DL(Op);
3134
SDValue Result = Op;
3135
if (false /* Idx->isConstant() */) {
3136
// TODO: optimized implementation using constant values
3137
} else {
3138
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3139
SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3140
SDValue PackedElt =
3141
SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3142
SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3143
SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3144
SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3145
Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3146
PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3147
SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3148
PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3149
SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3150
Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3151
MVT::i32, PackedElt, SubI32),
3152
0);
3153
3154
if (Op.getSimpleValueType() == MVT::f32) {
3155
Result = DAG.getBitcast(MVT::f32, Result);
3156
} else {
3157
assert(Op.getSimpleValueType() == MVT::i32);
3158
}
3159
}
3160
return Result;
3161
}
3162
3163
SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3164
SelectionDAG &DAG) const {
3165
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3166
MVT VT = Op.getOperand(0).getSimpleValueType();
3167
3168
// Special treatment for packed V64 types.
3169
assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3170
(void)VT;
3171
// The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3172
// bits" required `val << 32` from C implementation's point of view.
3173
//
3174
// Example of codes:
3175
// %packed_elt = extractelt %vr, (%idx >> 1)
3176
// %shift = ((%idx & 1) ^ 1) << 5
3177
// %packed_elt &= 0xffffffff00000000 >> shift
3178
// %packed_elt |= (zext %val) << shift
3179
// %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3180
3181
SDLoc DL(Op);
3182
SDValue Vec = Op.getOperand(0);
3183
SDValue Val = Op.getOperand(1);
3184
SDValue Idx = Op.getOperand(2);
3185
if (Idx.getSimpleValueType() == MVT::i32)
3186
Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3187
if (Val.getSimpleValueType() == MVT::f32)
3188
Val = DAG.getBitcast(MVT::i32, Val);
3189
assert(Val.getSimpleValueType() == MVT::i32);
3190
Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3191
3192
SDValue Result = Op;
3193
if (false /* Idx->isConstant()*/) {
3194
// TODO: optimized implementation using constant values
3195
} else {
3196
SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3197
SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3198
SDValue PackedElt =
3199
SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3200
SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3201
SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3202
SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3203
Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3204
SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3205
Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3206
PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3207
Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3208
PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3209
Result =
3210
SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3211
{HalfIdx, PackedElt, Vec}),
3212
0);
3213
}
3214
return Result;
3215
}
3216
3217