CoCalc -- VEISelLowering.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/VE/VEISelLowering.cpp
³⁵²⁹⁴ views
1
//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the interfaces that VE uses to lower LLVM code into a
10
// selection DAG.
11
//
12
//===----------------------------------------------------------------------===//
13

14
#include "VEISelLowering.h"
15
#include "MCTargetDesc/VEMCExpr.h"
16
#include "VECustomDAG.h"
17
#include "VEInstrBuilder.h"
18
#include "VEMachineFunctionInfo.h"
19
#include "VERegisterInfo.h"
20
#include "VETargetMachine.h"
21
#include "llvm/ADT/StringSwitch.h"
22
#include "llvm/CodeGen/CallingConvLower.h"
23
#include "llvm/CodeGen/MachineFrameInfo.h"
24
#include "llvm/CodeGen/MachineFunction.h"
25
#include "llvm/CodeGen/MachineInstrBuilder.h"
26
#include "llvm/CodeGen/MachineJumpTableInfo.h"
27
#include "llvm/CodeGen/MachineModuleInfo.h"
28
#include "llvm/CodeGen/MachineRegisterInfo.h"
29
#include "llvm/CodeGen/SelectionDAG.h"
30
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31
#include "llvm/IR/DerivedTypes.h"
32
#include "llvm/IR/Function.h"
33
#include "llvm/IR/IRBuilder.h"
34
#include "llvm/IR/Module.h"
35
#include "llvm/Support/ErrorHandling.h"
36
#include "llvm/Support/KnownBits.h"
37
using namespace llvm;
38

39
#define DEBUG_TYPE "ve-lower"
40

41
//===----------------------------------------------------------------------===//
42
// Calling Convention Implementation
43
//===----------------------------------------------------------------------===//
44

45
#include "VEGenCallingConv.inc"
46

47
CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
48
  switch (CallConv) {
49
  default:
50
    return RetCC_VE_C;
51
  case CallingConv::Fast:
52
    return RetCC_VE_Fast;
53
  }
54
}
55

56
CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57
  if (IsVarArg)
58
    return CC_VE2;
59
  switch (CallConv) {
60
  default:
61
    return CC_VE_C;
62
  case CallingConv::Fast:
63
    return CC_VE_Fast;
64
  }
65
}
66

67
bool VETargetLowering::CanLowerReturn(
68
    CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69
    const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70
  CCAssignFn *RetCC = getReturnCC(CallConv);
71
  SmallVector<CCValAssign, 16> RVLocs;
72
  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73
  return CCInfo.CheckReturn(Outs, RetCC);
74
}
75

76
static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
77
                                   MVT::v256f32, MVT::v512f32, MVT::v256f64};
78

79
static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
80

81
static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
82

83
void VETargetLowering::initRegisterClasses() {
84
  // Set up the register classes.
85
  addRegisterClass(MVT::i32, &VE::I32RegClass);
86
  addRegisterClass(MVT::i64, &VE::I64RegClass);
87
  addRegisterClass(MVT::f32, &VE::F32RegClass);
88
  addRegisterClass(MVT::f64, &VE::I64RegClass);
89
  addRegisterClass(MVT::f128, &VE::F128RegClass);
90

91
  if (Subtarget->enableVPU()) {
92
    for (MVT VecVT : AllVectorVTs)
93
      addRegisterClass(VecVT, &VE::V64RegClass);
94
    addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95
    addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96
  }
97
}
98

99
void VETargetLowering::initSPUActions() {
100
  const auto &TM = getTargetMachine();
101
  /// Load & Store {
102

103
  // VE doesn't have i1 sign extending load.
104
  for (MVT VT : MVT::integer_valuetypes()) {
105
    setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
106
    setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
107
    setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
108
    setTruncStoreAction(VT, MVT::i1, Expand);
109
  }
110

111
  // VE doesn't have floating point extload/truncstore, so expand them.
112
  for (MVT FPVT : MVT::fp_valuetypes()) {
113
    for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114
      setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115
      setTruncStoreAction(FPVT, OtherFPVT, Expand);
116
    }
117
  }
118

119
  // VE doesn't have fp128 load/store, so expand them in custom lower.
120
  setOperationAction(ISD::LOAD, MVT::f128, Custom);
121
  setOperationAction(ISD::STORE, MVT::f128, Custom);
122

123
  /// } Load & Store
124

125
  // Custom legalize address nodes into LO/HI parts.
126
  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
127
  setOperationAction(ISD::BlockAddress, PtrVT, Custom);
128
  setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
129
  setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
130
  setOperationAction(ISD::ConstantPool, PtrVT, Custom);
131
  setOperationAction(ISD::JumpTable, PtrVT, Custom);
132

133
  /// VAARG handling {
134
  setOperationAction(ISD::VASTART, MVT::Other, Custom);
135
  // VAARG needs to be lowered to access with 8 bytes alignment.
136
  setOperationAction(ISD::VAARG, MVT::Other, Custom);
137
  // Use the default implementation.
138
  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
139
  setOperationAction(ISD::VAEND, MVT::Other, Expand);
140
  /// } VAARG handling
141

142
  /// Stack {
143
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
144
  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
145

146
  // Use the default implementation.
147
  setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
148
  setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
149
  /// } Stack
150

151
  /// Branch {
152

153
  // VE doesn't have BRCOND
154
  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
155

156
  // BR_JT is not implemented yet.
157
  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
158

159
  /// } Branch
160

161
  /// Int Ops {
162
  for (MVT IntVT : {MVT::i32, MVT::i64}) {
163
    // VE has no REM or DIVREM operations.
164
    setOperationAction(ISD::UREM, IntVT, Expand);
165
    setOperationAction(ISD::SREM, IntVT, Expand);
166
    setOperationAction(ISD::SDIVREM, IntVT, Expand);
167
    setOperationAction(ISD::UDIVREM, IntVT, Expand);
168

169
    // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
170
    setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
171
    setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
172
    setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
173

174
    // VE has no MULHU/S or U/SMUL_LOHI operations.
175
    // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
176
    setOperationAction(ISD::MULHU, IntVT, Expand);
177
    setOperationAction(ISD::MULHS, IntVT, Expand);
178
    setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
179
    setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
180

181
    // VE has no CTTZ, ROTL, ROTR operations.
182
    setOperationAction(ISD::CTTZ, IntVT, Expand);
183
    setOperationAction(ISD::ROTL, IntVT, Expand);
184
    setOperationAction(ISD::ROTR, IntVT, Expand);
185

186
    // VE has 64 bits instruction which works as i64 BSWAP operation.  This
187
    // instruction works fine as i32 BSWAP operation with an additional
188
    // parameter.  Use isel patterns to lower BSWAP.
189
    setOperationAction(ISD::BSWAP, IntVT, Legal);
190

191
    // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192
    // operations.  Use isel patterns for i64, promote for i32.
193
    LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
194
    setOperationAction(ISD::BITREVERSE, IntVT, Act);
195
    setOperationAction(ISD::CTLZ, IntVT, Act);
196
    setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
197
    setOperationAction(ISD::CTPOP, IntVT, Act);
198

199
    // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200
    // Use isel patterns for i64, promote for i32.
201
    setOperationAction(ISD::AND, IntVT, Act);
202
    setOperationAction(ISD::OR, IntVT, Act);
203
    setOperationAction(ISD::XOR, IntVT, Act);
204

205
    // Legal smax and smin
206
    setOperationAction(ISD::SMAX, IntVT, Legal);
207
    setOperationAction(ISD::SMIN, IntVT, Legal);
208
  }
209
  /// } Int Ops
210

211
  /// Conversion {
212
  // VE doesn't have instructions for fp<->uint, so expand them by llvm
213
  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
214
  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
215
  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
216
  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
217

218
  // fp16 not supported
219
  for (MVT FPVT : MVT::fp_valuetypes()) {
220
    setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
221
    setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
222
  }
223
  /// } Conversion
224

225
  /// Floating-point Ops {
226
  /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227
  ///       and fcmp.
228

229
  // VE doesn't have following floating point operations.
230
  for (MVT VT : MVT::fp_valuetypes()) {
231
    setOperationAction(ISD::FNEG, VT, Expand);
232
    setOperationAction(ISD::FREM, VT, Expand);
233
  }
234

235
  // VE doesn't have fdiv of f128.
236
  setOperationAction(ISD::FDIV, MVT::f128, Expand);
237

238
  for (MVT FPVT : {MVT::f32, MVT::f64}) {
239
    // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
240
    setOperationAction(ISD::ConstantFP, FPVT, Legal);
241
  }
242
  /// } Floating-point Ops
243

244
  /// Floating-point math functions {
245

246
  // VE doesn't have following floating point math functions.
247
  for (MVT VT : MVT::fp_valuetypes()) {
248
    setOperationAction(ISD::FABS, VT, Expand);
249
    setOperationAction(ISD::FCOPYSIGN, VT, Expand);
250
    setOperationAction(ISD::FCOS, VT, Expand);
251
    setOperationAction(ISD::FMA, VT, Expand);
252
    setOperationAction(ISD::FPOW, VT, Expand);
253
    setOperationAction(ISD::FSIN, VT, Expand);
254
    setOperationAction(ISD::FSQRT, VT, Expand);
255
  }
256

257
  // VE has single and double FMINNUM and FMAXNUM
258
  for (MVT VT : {MVT::f32, MVT::f64}) {
259
    setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
260
  }
261

262
  /// } Floating-point math functions
263

264
  /// Atomic instructions {
265

266
  setMaxAtomicSizeInBitsSupported(64);
267
  setMinCmpXchgSizeInBits(32);
268
  setSupportsUnalignedAtomics(false);
269

270
  // Use custom inserter for ATOMIC_FENCE.
271
  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
272

273
  // Other atomic instructions.
274
  for (MVT VT : MVT::integer_valuetypes()) {
275
    // Support i8/i16 atomic swap.
276
    setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
277

278
    // FIXME: Support "atmam" instructions.
279
    setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
280
    setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
281
    setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
282
    setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
283

284
    // VE doesn't have follwing instructions.
285
    setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
286
    setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
287
    setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
288
    setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
289
    setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
290
    setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
291
    setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
292
    setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
293
  }
294

295
  /// } Atomic instructions
296

297
  /// SJLJ instructions {
298
  setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
299
  setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
300
  setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
301
  if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302
    setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
303
  /// } SJLJ instructions
304

305
  // Intrinsic instructions
306
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
307
}
308

309
void VETargetLowering::initVPUActions() {
310
  for (MVT LegalMaskVT : AllMaskVTs)
311
    setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
312

313
  for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
314
    setOperationAction(Opc, MVT::v512i1, Custom);
315

316
  for (MVT LegalVecVT : AllVectorVTs) {
317
    setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
318
    setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
319
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
320
    // Translate all vector instructions with legal element types to VVP_*
321
    // nodes.
322
    // TODO We will custom-widen into VVP_* nodes in the future. While we are
323
    // buildling the infrastructure for this, we only do this for legal vector
324
    // VTs.
325
#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
326
  setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327
#define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
328
  setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329
    setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
330
    setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
331
#include "VVPNodes.def"
332
  }
333

334
  for (MVT LegalPackedVT : AllPackedVTs) {
335
    setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
336
    setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
337
  }
338

339
  // vNt32, vNt64 ops (legal element types)
340
  for (MVT VT : MVT::vector_valuetypes()) {
341
    MVT ElemVT = VT.getVectorElementType();
342
    unsigned ElemBits = ElemVT.getScalarSizeInBits();
343
    if (ElemBits != 32 && ElemBits != 64)
344
      continue;
345

346
    for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347
      setOperationAction(MemOpc, VT, Custom);
348

349
    const ISD::NodeType IntReductionOCs[] = {
350
        ISD::VECREDUCE_ADD,  ISD::VECREDUCE_MUL,  ISD::VECREDUCE_AND,
351
        ISD::VECREDUCE_OR,   ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMIN,
352
        ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
353

354
    for (unsigned IntRedOpc : IntReductionOCs)
355
      setOperationAction(IntRedOpc, VT, Custom);
356
  }
357

358
  // v256i1 and v512i1 ops
359
  for (MVT MaskVT : AllMaskVTs) {
360
    // Custom lower mask ops
361
    setOperationAction(ISD::STORE, MaskVT, Custom);
362
    setOperationAction(ISD::LOAD, MaskVT, Custom);
363
  }
364
}
365

366
SDValue
367
VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
368
                              bool IsVarArg,
369
                              const SmallVectorImpl<ISD::OutputArg> &Outs,
370
                              const SmallVectorImpl<SDValue> &OutVals,
371
                              const SDLoc &DL, SelectionDAG &DAG) const {
372
  // CCValAssign - represent the assignment of the return value to locations.
373
  SmallVector<CCValAssign, 16> RVLocs;
374

375
  // CCState - Info about the registers and stack slot.
376
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377
                 *DAG.getContext());
378

379
  // Analyze return values.
380
  CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
381

382
  SDValue Glue;
383
  SmallVector<SDValue, 4> RetOps(1, Chain);
384

385
  // Copy the result values into the output registers.
386
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
387
    CCValAssign &VA = RVLocs[i];
388
    assert(VA.isRegLoc() && "Can only return in registers!");
389
    assert(!VA.needsCustom() && "Unexpected custom lowering");
390
    SDValue OutVal = OutVals[i];
391

392
    // Integer return values must be sign or zero extended by the callee.
393
    switch (VA.getLocInfo()) {
394
    case CCValAssign::Full:
395
      break;
396
    case CCValAssign::SExt:
397
      OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
398
      break;
399
    case CCValAssign::ZExt:
400
      OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
401
      break;
402
    case CCValAssign::AExt:
403
      OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
404
      break;
405
    case CCValAssign::BCvt: {
406
      // Convert a float return value to i64 with padding.
407
      //     63     31   0
408
      //    +------+------+
409
      //    | float|   0  |
410
      //    +------+------+
411
      assert(VA.getLocVT() == MVT::i64);
412
      assert(VA.getValVT() == MVT::f32);
413
      SDValue Undef = SDValue(
414
          DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
415
      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
416
      OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
417
                                          MVT::i64, Undef, OutVal, Sub_f32),
418
                       0);
419
      break;
420
    }
421
    default:
422
      llvm_unreachable("Unknown loc info!");
423
    }
424

425
    Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
426

427
    // Guarantee that all emitted copies are stuck together with flags.
428
    Glue = Chain.getValue(1);
429
    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
430
  }
431

432
  RetOps[0] = Chain; // Update chain.
433

434
  // Add the glue if we have it.
435
  if (Glue.getNode())
436
    RetOps.push_back(Glue);
437

438
  return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
439
}
440

441
SDValue VETargetLowering::LowerFormalArguments(
442
    SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443
    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444
    SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
445
  MachineFunction &MF = DAG.getMachineFunction();
446

447
  // Get the base offset of the incoming arguments stack space.
448
  unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449
  // Get the size of the preserved arguments area
450
  unsigned ArgsPreserved = 64;
451

452
  // Analyze arguments according to CC_VE.
453
  SmallVector<CCValAssign, 16> ArgLocs;
454
  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455
                 *DAG.getContext());
456
  // Allocate the preserved area first.
457
  CCInfo.AllocateStack(ArgsPreserved, Align(8));
458
  // We already allocated the preserved area, so the stack offset computed
459
  // by CC_VE would be correct now.
460
  CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
461

462
  for (const CCValAssign &VA : ArgLocs) {
463
    assert(!VA.needsCustom() && "Unexpected custom lowering");
464
    if (VA.isRegLoc()) {
465
      // This argument is passed in a register.
466
      // All integer register arguments are promoted by the caller to i64.
467

468
      // Create a virtual register for the promoted live-in value.
469
      Register VReg =
470
          MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
471
      SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
472

473
      // The caller promoted the argument, so insert an Assert?ext SDNode so we
474
      // won't promote the value again in this function.
475
      switch (VA.getLocInfo()) {
476
      case CCValAssign::SExt:
477
        Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
478
                          DAG.getValueType(VA.getValVT()));
479
        break;
480
      case CCValAssign::ZExt:
481
        Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
482
                          DAG.getValueType(VA.getValVT()));
483
        break;
484
      case CCValAssign::BCvt: {
485
        // Extract a float argument from i64 with padding.
486
        //     63     31   0
487
        //    +------+------+
488
        //    | float|   0  |
489
        //    +------+------+
490
        assert(VA.getLocVT() == MVT::i64);
491
        assert(VA.getValVT() == MVT::f32);
492
        SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
493
        Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
494
                                         MVT::f32, Arg, Sub_f32),
495
                      0);
496
        break;
497
      }
498
      default:
499
        break;
500
      }
501

502
      // Truncate the register down to the argument type.
503
      if (VA.isExtInLoc())
504
        Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
505

506
      InVals.push_back(Arg);
507
      continue;
508
    }
509

510
    // The registers are exhausted. This argument was passed on the stack.
511
    assert(VA.isMemLoc());
512
    // The CC_VE_Full/Half functions compute stack offsets relative to the
513
    // beginning of the arguments area at %fp + the size of reserved area.
514
    unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
515
    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
516

517
    // Adjust offset for a float argument by adding 4 since the argument is
518
    // stored in 8 bytes buffer with offset like below.  LLVM generates
519
    // 4 bytes load instruction, so need to adjust offset here.  This
520
    // adjustment is required in only LowerFormalArguments.  In LowerCall,
521
    // a float argument is converted to i64 first, and stored as 8 bytes
522
    // data, which is required by ABI, so no need for adjustment.
523
    //    0      4
524
    //    +------+------+
525
    //    | empty| float|
526
    //    +------+------+
527
    if (VA.getValVT() == MVT::f32)
528
      Offset += 4;
529

530
    int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
531
    InVals.push_back(
532
        DAG.getLoad(VA.getValVT(), DL, Chain,
533
                    DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
534
                    MachinePointerInfo::getFixedStack(MF, FI)));
535
  }
536

537
  if (!IsVarArg)
538
    return Chain;
539

540
  // This function takes variable arguments, some of which may have been passed
541
  // in registers %s0-%s8.
542
  //
543
  // The va_start intrinsic needs to know the offset to the first variable
544
  // argument.
545
  // TODO: need to calculate offset correctly once we support f128.
546
  unsigned ArgOffset = ArgLocs.size() * 8;
547
  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
548
  // Skip the reserved area at the top of stack.
549
  FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
550

551
  return Chain;
552
}
553

554
// FIXME? Maybe this could be a TableGen attribute on some registers and
555
// this table could be generated automatically from RegInfo.
556
Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
557
                                             const MachineFunction &MF) const {
558
  Register Reg = StringSwitch<Register>(RegName)
559
                     .Case("sp", VE::SX11)    // Stack pointer
560
                     .Case("fp", VE::SX9)     // Frame pointer
561
                     .Case("sl", VE::SX8)     // Stack limit
562
                     .Case("lr", VE::SX10)    // Link register
563
                     .Case("tp", VE::SX14)    // Thread pointer
564
                     .Case("outer", VE::SX12) // Outer regiser
565
                     .Case("info", VE::SX17)  // Info area register
566
                     .Case("got", VE::SX15)   // Global offset table register
567
                     .Case("plt", VE::SX16) // Procedure linkage table register
568
                     .Default(0);
569

570
  if (Reg)
571
    return Reg;
572

573
  report_fatal_error("Invalid register name global variable");
574
}
575

576
//===----------------------------------------------------------------------===//
577
// TargetLowering Implementation
578
//===----------------------------------------------------------------------===//
579

580
SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
581
                                    SmallVectorImpl<SDValue> &InVals) const {
582
  SelectionDAG &DAG = CLI.DAG;
583
  SDLoc DL = CLI.DL;
584
  SDValue Chain = CLI.Chain;
585
  auto PtrVT = getPointerTy(DAG.getDataLayout());
586

587
  // VE target does not yet support tail call optimization.
588
  CLI.IsTailCall = false;
589

590
  // Get the base offset of the outgoing arguments stack space.
591
  unsigned ArgsBaseOffset = Subtarget->getRsaSize();
592
  // Get the size of the preserved arguments area
593
  unsigned ArgsPreserved = 8 * 8u;
594

595
  // Analyze operands of the call, assigning locations to each operand.
596
  SmallVector<CCValAssign, 16> ArgLocs;
597
  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
598
                 *DAG.getContext());
599
  // Allocate the preserved area first.
600
  CCInfo.AllocateStack(ArgsPreserved, Align(8));
601
  // We already allocated the preserved area, so the stack offset computed
602
  // by CC_VE would be correct now.
603
  CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
604

605
  // VE requires to use both register and stack for varargs or no-prototyped
606
  // functions.
607
  bool UseBoth = CLI.IsVarArg;
608

609
  // Analyze operands again if it is required to store BOTH.
610
  SmallVector<CCValAssign, 16> ArgLocs2;
611
  CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
612
                  ArgLocs2, *DAG.getContext());
613
  if (UseBoth)
614
    CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
615

616
  // Get the size of the outgoing arguments stack space requirement.
617
  unsigned ArgsSize = CCInfo.getStackSize();
618

619
  // Keep stack frames 16-byte aligned.
620
  ArgsSize = alignTo(ArgsSize, 16);
621

622
  // Adjust the stack pointer to make room for the arguments.
623
  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
624
  // with more than 6 arguments.
625
  Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
626

627
  // Collect the set of registers to pass to the function and their values.
628
  // This will be emitted as a sequence of CopyToReg nodes glued to the call
629
  // instruction.
630
  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
631

632
  // Collect chains from all the memory opeations that copy arguments to the
633
  // stack. They must follow the stack pointer adjustment above and precede the
634
  // call instruction itself.
635
  SmallVector<SDValue, 8> MemOpChains;
636

637
  // VE needs to get address of callee function in a register
638
  // So, prepare to copy it to SX12 here.
639

640
  // If the callee is a GlobalAddress node (quite common, every direct call is)
641
  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
642
  // Likewise ExternalSymbol -> TargetExternalSymbol.
643
  SDValue Callee = CLI.Callee;
644

645
  bool IsPICCall = isPositionIndependent();
646

647
  // PC-relative references to external symbols should go through $stub.
648
  // If so, we need to prepare GlobalBaseReg first.
649
  const TargetMachine &TM = DAG.getTarget();
650
  const GlobalValue *GV = nullptr;
651
  auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
652
  if (CalleeG)
653
    GV = CalleeG->getGlobal();
654
  bool Local = TM.shouldAssumeDSOLocal(GV);
655
  bool UsePlt = !Local;
656
  MachineFunction &MF = DAG.getMachineFunction();
657

658
  // Turn GlobalAddress/ExternalSymbol node into a value node
659
  // containing the address of them here.
660
  if (CalleeG) {
661
    if (IsPICCall) {
662
      if (UsePlt)
663
        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
664
      Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
665
      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
666
    } else {
667
      Callee =
668
          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
669
    }
670
  } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
671
    if (IsPICCall) {
672
      if (UsePlt)
673
        Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
674
      Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
675
      Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
676
    } else {
677
      Callee =
678
          makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
679
    }
680
  }
681

682
  RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
683

684
  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
685
    CCValAssign &VA = ArgLocs[i];
686
    SDValue Arg = CLI.OutVals[i];
687

688
    // Promote the value if needed.
689
    switch (VA.getLocInfo()) {
690
    default:
691
      llvm_unreachable("Unknown location info!");
692
    case CCValAssign::Full:
693
      break;
694
    case CCValAssign::SExt:
695
      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
696
      break;
697
    case CCValAssign::ZExt:
698
      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
699
      break;
700
    case CCValAssign::AExt:
701
      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
702
      break;
703
    case CCValAssign::BCvt: {
704
      // Convert a float argument to i64 with padding.
705
      //     63     31   0
706
      //    +------+------+
707
      //    | float|   0  |
708
      //    +------+------+
709
      assert(VA.getLocVT() == MVT::i64);
710
      assert(VA.getValVT() == MVT::f32);
711
      SDValue Undef = SDValue(
712
          DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
713
      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
714
      Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
715
                                       MVT::i64, Undef, Arg, Sub_f32),
716
                    0);
717
      break;
718
    }
719
    }
720

721
    if (VA.isRegLoc()) {
722
      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
723
      if (!UseBoth)
724
        continue;
725
      VA = ArgLocs2[i];
726
    }
727

728
    assert(VA.isMemLoc());
729

730
    // Create a store off the stack pointer for this argument.
731
    SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
732
    // The argument area starts at %fp/%sp + the size of reserved area.
733
    SDValue PtrOff =
734
        DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
735
    PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
736
    MemOpChains.push_back(
737
        DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
738
  }
739

740
  // Emit all stores, make sure they occur before the call.
741
  if (!MemOpChains.empty())
742
    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
743

744
  // Build a sequence of CopyToReg nodes glued together with token chain and
745
  // glue operands which copy the outgoing args into registers. The InGlue is
746
  // necessary since all emitted instructions must be stuck together in order
747
  // to pass the live physical registers.
748
  SDValue InGlue;
749
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
750
    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
751
                             RegsToPass[i].second, InGlue);
752
    InGlue = Chain.getValue(1);
753
  }
754

755
  // Build the operands for the call instruction itself.
756
  SmallVector<SDValue, 8> Ops;
757
  Ops.push_back(Chain);
758
  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
759
    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
760
                                  RegsToPass[i].second.getValueType()));
761

762
  // Add a register mask operand representing the call-preserved registers.
763
  const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
764
  const uint32_t *Mask =
765
      TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
766
  assert(Mask && "Missing call preserved mask for calling convention");
767
  Ops.push_back(DAG.getRegisterMask(Mask));
768

769
  // Make sure the CopyToReg nodes are glued to the call instruction which
770
  // consumes the registers.
771
  if (InGlue.getNode())
772
    Ops.push_back(InGlue);
773

774
  // Now the call itself.
775
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
776
  Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
777
  InGlue = Chain.getValue(1);
778

779
  // Revert the stack pointer immediately after the call.
780
  Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
781
  InGlue = Chain.getValue(1);
782

783
  // Now extract the return values. This is more or less the same as
784
  // LowerFormalArguments.
785

786
  // Assign locations to each value returned by this call.
787
  SmallVector<CCValAssign, 16> RVLocs;
788
  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
789
                 *DAG.getContext());
790

791
  // Set inreg flag manually for codegen generated library calls that
792
  // return float.
793
  if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
794
    CLI.Ins[0].Flags.setInReg();
795

796
  RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
797

798
  // Copy all of the result registers out of their specified physreg.
799
  for (unsigned i = 0; i != RVLocs.size(); ++i) {
800
    CCValAssign &VA = RVLocs[i];
801
    assert(!VA.needsCustom() && "Unexpected custom lowering");
802
    Register Reg = VA.getLocReg();
803

804
    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
805
    // reside in the same register in the high and low bits. Reuse the
806
    // CopyFromReg previous node to avoid duplicate copies.
807
    SDValue RV;
808
    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
809
      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
810
        RV = Chain.getValue(0);
811

812
    // But usually we'll create a new CopyFromReg for a different register.
813
    if (!RV.getNode()) {
814
      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
815
      Chain = RV.getValue(1);
816
      InGlue = Chain.getValue(2);
817
    }
818

819
    // The callee promoted the return value, so insert an Assert?ext SDNode so
820
    // we won't promote the value again in this function.
821
    switch (VA.getLocInfo()) {
822
    case CCValAssign::SExt:
823
      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
824
                       DAG.getValueType(VA.getValVT()));
825
      break;
826
    case CCValAssign::ZExt:
827
      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
828
                       DAG.getValueType(VA.getValVT()));
829
      break;
830
    case CCValAssign::BCvt: {
831
      // Extract a float return value from i64 with padding.
832
      //     63     31   0
833
      //    +------+------+
834
      //    | float|   0  |
835
      //    +------+------+
836
      assert(VA.getLocVT() == MVT::i64);
837
      assert(VA.getValVT() == MVT::f32);
838
      SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
839
      RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
840
                                      MVT::f32, RV, Sub_f32),
841
                   0);
842
      break;
843
    }
844
    default:
845
      break;
846
    }
847

848
    // Truncate the register down to the return value type.
849
    if (VA.isExtInLoc())
850
      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
851

852
    InVals.push_back(RV);
853
  }
854

855
  return Chain;
856
}
857

858
bool VETargetLowering::isOffsetFoldingLegal(
859
    const GlobalAddressSDNode *GA) const {
860
  // VE uses 64 bit addressing, so we need multiple instructions to generate
861
  // an address.  Folding address with offset increases the number of
862
  // instructions, so that we disable it here.  Offsets will be folded in
863
  // the DAG combine later if it worth to do so.
864
  return false;
865
}
866

867
/// isFPImmLegal - Returns true if the target can instruction select the
868
/// specified FP immediate natively. If false, the legalizer will
869
/// materialize the FP immediate as a load from a constant pool.
870
bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
871
                                    bool ForCodeSize) const {
872
  return VT == MVT::f32 || VT == MVT::f64;
873
}
874

875
/// Determine if the target supports unaligned memory accesses.
876
///
877
/// This function returns true if the target allows unaligned memory accesses
878
/// of the specified type in the given address space. If true, it also returns
879
/// whether the unaligned memory access is "fast" in the last argument by
880
/// reference. This is used, for example, in situations where an array
881
/// copy/move/set is converted to a sequence of store operations. Its use
882
/// helps to ensure that such replacements don't generate code that causes an
883
/// alignment error (trap) on the target machine.
884
bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
885
                                                      unsigned AddrSpace,
886
                                                      Align A,
887
                                                      MachineMemOperand::Flags,
888
                                                      unsigned *Fast) const {
889
  if (Fast) {
890
    // It's fast anytime on VE
891
    *Fast = 1;
892
  }
893
  return true;
894
}
895

896
VETargetLowering::VETargetLowering(const TargetMachine &TM,
897
                                   const VESubtarget &STI)
898
    : TargetLowering(TM), Subtarget(&STI) {
899
  // Instructions which use registers as conditionals examine all the
900
  // bits (as does the pseudo SELECT_CC expansion). I don't think it
901
  // matters much whether it's ZeroOrOneBooleanContent, or
902
  // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
903
  // former.
904
  setBooleanContents(ZeroOrOneBooleanContent);
905
  setBooleanVectorContents(ZeroOrOneBooleanContent);
906

907
  initRegisterClasses();
908
  initSPUActions();
909
  initVPUActions();
910

911
  setStackPointerRegisterToSaveRestore(VE::SX11);
912

913
  // We have target-specific dag combine patterns for the following nodes:
914
  setTargetDAGCombine(ISD::TRUNCATE);
915
  setTargetDAGCombine(ISD::SELECT);
916
  setTargetDAGCombine(ISD::SELECT_CC);
917

918
  // Set function alignment to 16 bytes
919
  setMinFunctionAlignment(Align(16));
920

921
  // VE stores all argument by 8 bytes alignment
922
  setMinStackArgumentAlignment(Align(8));
923

924
  computeRegisterProperties(Subtarget->getRegisterInfo());
925
}
926

927
const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
928
#define TARGET_NODE_CASE(NAME)                                                 \
929
  case VEISD::NAME:                                                            \
930
    return "VEISD::" #NAME;
931
  switch ((VEISD::NodeType)Opcode) {
932
  case VEISD::FIRST_NUMBER:
933
    break;
934
    TARGET_NODE_CASE(CMPI)
935
    TARGET_NODE_CASE(CMPU)
936
    TARGET_NODE_CASE(CMPF)
937
    TARGET_NODE_CASE(CMPQ)
938
    TARGET_NODE_CASE(CMOV)
939
    TARGET_NODE_CASE(CALL)
940
    TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
941
    TARGET_NODE_CASE(EH_SJLJ_SETJMP)
942
    TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
943
    TARGET_NODE_CASE(GETFUNPLT)
944
    TARGET_NODE_CASE(GETSTACKTOP)
945
    TARGET_NODE_CASE(GETTLSADDR)
946
    TARGET_NODE_CASE(GLOBAL_BASE_REG)
947
    TARGET_NODE_CASE(Hi)
948
    TARGET_NODE_CASE(Lo)
949
    TARGET_NODE_CASE(RET_GLUE)
950
    TARGET_NODE_CASE(TS1AM)
951
    TARGET_NODE_CASE(VEC_UNPACK_LO)
952
    TARGET_NODE_CASE(VEC_UNPACK_HI)
953
    TARGET_NODE_CASE(VEC_PACK)
954
    TARGET_NODE_CASE(VEC_BROADCAST)
955
    TARGET_NODE_CASE(REPL_I32)
956
    TARGET_NODE_CASE(REPL_F32)
957

958
    TARGET_NODE_CASE(LEGALAVL)
959

960
    // Register the VVP_* SDNodes.
961
#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
962
#include "VVPNodes.def"
963
  }
964
#undef TARGET_NODE_CASE
965
  return nullptr;
966
}
967

968
EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
969
                                         EVT VT) const {
970
  return MVT::i32;
971
}
972

973
// Convert to a target node and set target flags.
974
SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
975
                                          SelectionDAG &DAG) const {
976
  if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
977
    return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
978
                                      GA->getValueType(0), GA->getOffset(), TF);
979

980
  if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
981
    return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
982
                                     0, TF);
983

984
  if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
985
    return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
986
                                     CP->getAlign(), CP->getOffset(), TF);
987

988
  if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
989
    return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
990
                                       TF);
991

992
  if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
993
    return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
994

995
  llvm_unreachable("Unhandled address SDNode");
996
}
997

998
// Split Op into high and low parts according to HiTF and LoTF.
999
// Return an ADD node combining the parts.
1000
SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1001
                                       SelectionDAG &DAG) const {
1002
  SDLoc DL(Op);
1003
  EVT VT = Op.getValueType();
1004
  SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1005
  SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1006
  return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1007
}
1008

1009
// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1010
// or ExternalSymbol SDNode.
1011
SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1012
  SDLoc DL(Op);
1013
  EVT PtrVT = Op.getValueType();
1014

1015
  // Handle PIC mode first. VE needs a got load for every variable!
1016
  if (isPositionIndependent()) {
1017
    auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1018

1019
    if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1020
        (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1021
      // Create following instructions for local linkage PIC code.
1022
      //     lea %reg, label@gotoff_lo
1023
      //     and %reg, %reg, (32)0
1024
      //     lea.sl %reg, label@gotoff_hi(%reg, %got)
1025
      SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1026
                                  VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1027
      SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1028
      return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1029
    }
1030
    // Create following instructions for not local linkage PIC code.
1031
    //     lea %reg, label@got_lo
1032
    //     and %reg, %reg, (32)0
1033
    //     lea.sl %reg, label@got_hi(%reg)
1034
    //     ld %reg, (%reg, %got)
1035
    SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1036
                                VEMCExpr::VK_VE_GOT_LO32, DAG);
1037
    SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1038
    SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1039
    return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1040
                       MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1041
  }
1042

1043
  // This is one of the absolute code models.
1044
  switch (getTargetMachine().getCodeModel()) {
1045
  default:
1046
    llvm_unreachable("Unsupported absolute code model");
1047
  case CodeModel::Small:
1048
  case CodeModel::Medium:
1049
  case CodeModel::Large:
1050
    // abs64.
1051
    return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1052
  }
1053
}
1054

1055
/// Custom Lower {
1056

1057
// The mappings for emitLeading/TrailingFence for VE is designed by following
1058
// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1059
Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1060
                                                Instruction *Inst,
1061
                                                AtomicOrdering Ord) const {
1062
  switch (Ord) {
1063
  case AtomicOrdering::NotAtomic:
1064
  case AtomicOrdering::Unordered:
1065
    llvm_unreachable("Invalid fence: unordered/non-atomic");
1066
  case AtomicOrdering::Monotonic:
1067
  case AtomicOrdering::Acquire:
1068
    return nullptr; // Nothing to do
1069
  case AtomicOrdering::Release:
1070
  case AtomicOrdering::AcquireRelease:
1071
    return Builder.CreateFence(AtomicOrdering::Release);
1072
  case AtomicOrdering::SequentiallyConsistent:
1073
    if (!Inst->hasAtomicStore())
1074
      return nullptr; // Nothing to do
1075
    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1076
  }
1077
  llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1078
}
1079

1080
Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1081
                                                 Instruction *Inst,
1082
                                                 AtomicOrdering Ord) const {
1083
  switch (Ord) {
1084
  case AtomicOrdering::NotAtomic:
1085
  case AtomicOrdering::Unordered:
1086
    llvm_unreachable("Invalid fence: unordered/not-atomic");
1087
  case AtomicOrdering::Monotonic:
1088
  case AtomicOrdering::Release:
1089
    return nullptr; // Nothing to do
1090
  case AtomicOrdering::Acquire:
1091
  case AtomicOrdering::AcquireRelease:
1092
    return Builder.CreateFence(AtomicOrdering::Acquire);
1093
  case AtomicOrdering::SequentiallyConsistent:
1094
    return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1095
  }
1096
  llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1097
}
1098

1099
SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1100
                                            SelectionDAG &DAG) const {
1101
  SDLoc DL(Op);
1102
  AtomicOrdering FenceOrdering =
1103
      static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1104
  SyncScope::ID FenceSSID =
1105
      static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1106

1107
  // VE uses Release consistency, so need a fence instruction if it is a
1108
  // cross-thread fence.
1109
  if (FenceSSID == SyncScope::System) {
1110
    switch (FenceOrdering) {
1111
    case AtomicOrdering::NotAtomic:
1112
    case AtomicOrdering::Unordered:
1113
    case AtomicOrdering::Monotonic:
1114
      // No need to generate fencem instruction here.
1115
      break;
1116
    case AtomicOrdering::Acquire:
1117
      // Generate "fencem 2" as acquire fence.
1118
      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1119
                                        DAG.getTargetConstant(2, DL, MVT::i32),
1120
                                        Op.getOperand(0)),
1121
                     0);
1122
    case AtomicOrdering::Release:
1123
      // Generate "fencem 1" as release fence.
1124
      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1125
                                        DAG.getTargetConstant(1, DL, MVT::i32),
1126
                                        Op.getOperand(0)),
1127
                     0);
1128
    case AtomicOrdering::AcquireRelease:
1129
    case AtomicOrdering::SequentiallyConsistent:
1130
      // Generate "fencem 3" as acq_rel and seq_cst fence.
1131
      // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1132
      //        so  seq_cst may require more instruction for them.
1133
      return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1134
                                        DAG.getTargetConstant(3, DL, MVT::i32),
1135
                                        Op.getOperand(0)),
1136
                     0);
1137
    }
1138
  }
1139

1140
  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1141
  return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1142
}
1143

1144
TargetLowering::AtomicExpansionKind
1145
VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1146
  // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1147
  if (AI->getOperation() == AtomicRMWInst::Xchg) {
1148
    return AtomicExpansionKind::None;
1149
  }
1150
  // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1151

1152
  // Otherwise, expand it using compare and exchange instruction to not call
1153
  // __sync_fetch_and_* functions.
1154
  return AtomicExpansionKind::CmpXChg;
1155
}
1156

1157
static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1158
                            SDValue &Bits) {
1159
  SDLoc DL(Op);
1160
  AtomicSDNode *N = cast<AtomicSDNode>(Op);
1161
  SDValue Ptr = N->getOperand(1);
1162
  SDValue Val = N->getOperand(2);
1163
  EVT PtrVT = Ptr.getValueType();
1164
  bool Byte = N->getMemoryVT() == MVT::i8;
1165
  //   Remainder = AND Ptr, 3
1166
  //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1167
  //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1168
  //   Bits = Remainder << 3
1169
  //   NewVal = Val << Bits
1170
  SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1171
  SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1172
  SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1173
                      : DAG.getConstant(3, DL, MVT::i32);
1174
  Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1175
  Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1176
  return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1177
}
1178

1179
static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1180
                             SDValue Bits) {
1181
  SDLoc DL(Op);
1182
  EVT VT = Data.getValueType();
1183
  bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1184
  //   NewData = Data >> Bits
1185
  //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1186
  //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1187

1188
  SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1189
  return DAG.getNode(ISD::AND, DL, VT,
1190
                     {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1191
}
1192

1193
SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1194
                                           SelectionDAG &DAG) const {
1195
  SDLoc DL(Op);
1196
  AtomicSDNode *N = cast<AtomicSDNode>(Op);
1197

1198
  if (N->getMemoryVT() == MVT::i8) {
1199
    // For i8, use "ts1am"
1200
    //   Input:
1201
    //     ATOMIC_SWAP Ptr, Val, Order
1202
    //
1203
    //   Output:
1204
    //     Remainder = AND Ptr, 3
1205
    //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1206
    //     Bits = Remainder << 3
1207
    //     NewVal = Val << Bits
1208
    //
1209
    //     Aligned = AND Ptr, -4
1210
    //     Data = TS1AM Aligned, Flag, NewVal
1211
    //
1212
    //     NewData = Data >> Bits
1213
    //     Result = NewData & 0xff ; 1 byte result
1214
    SDValue Flag;
1215
    SDValue Bits;
1216
    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1217

1218
    SDValue Ptr = N->getOperand(1);
1219
    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1220
                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1221
    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1222
                                  DAG.getVTList(Op.getNode()->getValueType(0),
1223
                                                Op.getNode()->getValueType(1)),
1224
                                  {N->getChain(), Aligned, Flag, NewVal},
1225
                                  N->getMemOperand());
1226

1227
    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1228
    SDValue Chain = TS1AM.getValue(1);
1229
    return DAG.getMergeValues({Result, Chain}, DL);
1230
  }
1231
  if (N->getMemoryVT() == MVT::i16) {
1232
    // For i16, use "ts1am"
1233
    SDValue Flag;
1234
    SDValue Bits;
1235
    SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1236

1237
    SDValue Ptr = N->getOperand(1);
1238
    SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1239
                                  {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1240
    SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1241
                                  DAG.getVTList(Op.getNode()->getValueType(0),
1242
                                                Op.getNode()->getValueType(1)),
1243
                                  {N->getChain(), Aligned, Flag, NewVal},
1244
                                  N->getMemOperand());
1245

1246
    SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1247
    SDValue Chain = TS1AM.getValue(1);
1248
    return DAG.getMergeValues({Result, Chain}, DL);
1249
  }
1250
  // Otherwise, let llvm legalize it.
1251
  return Op;
1252
}
1253

1254
SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1255
                                             SelectionDAG &DAG) const {
1256
  return makeAddress(Op, DAG);
1257
}
1258

1259
SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1260
                                            SelectionDAG &DAG) const {
1261
  return makeAddress(Op, DAG);
1262
}
1263

1264
SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1265
                                            SelectionDAG &DAG) const {
1266
  return makeAddress(Op, DAG);
1267
}
1268

1269
SDValue
1270
VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1271
                                                SelectionDAG &DAG) const {
1272
  SDLoc DL(Op);
1273

1274
  // Generate the following code:
1275
  //   t1: ch,glue = callseq_start t0, 0, 0
1276
  //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1277
  //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1278
  //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1279
  SDValue Label = withTargetFlags(Op, 0, DAG);
1280
  EVT PtrVT = Op.getValueType();
1281

1282
  // Lowering the machine isd will make sure everything is in the right
1283
  // location.
1284
  SDValue Chain = DAG.getEntryNode();
1285
  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1286
  const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1287
      DAG.getMachineFunction(), CallingConv::C);
1288
  Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1289
  SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1290
  Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1291
  Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1292
  Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1293

1294
  // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1295
  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1296
  MFI.setHasCalls(true);
1297

1298
  // Also generate code to prepare a GOT register if it is PIC.
1299
  if (isPositionIndependent()) {
1300
    MachineFunction &MF = DAG.getMachineFunction();
1301
    Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1302
  }
1303

1304
  return Chain;
1305
}
1306

1307
SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1308
                                                SelectionDAG &DAG) const {
1309
  // The current implementation of nld (2.26) doesn't allow local exec model
1310
  // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1311
  // generate the general dynamic model code sequence.
1312
  //
1313
  // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1314
  return lowerToTLSGeneralDynamicModel(Op, DAG);
1315
}
1316

1317
SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1318
  return makeAddress(Op, DAG);
1319
}
1320

1321
// Lower a f128 load into two f64 loads.
1322
static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1323
  SDLoc DL(Op);
1324
  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1325
  assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1326
  Align Alignment = LdNode->getAlign();
1327
  if (Alignment > 8)
1328
    Alignment = Align(8);
1329

1330
  SDValue Lo64 =
1331
      DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1332
                  LdNode->getPointerInfo(), Alignment,
1333
                  LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1334
                                       : MachineMemOperand::MONone);
1335
  EVT AddrVT = LdNode->getBasePtr().getValueType();
1336
  SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1337
                              DAG.getConstant(8, DL, AddrVT));
1338
  SDValue Hi64 =
1339
      DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1340
                  LdNode->getPointerInfo(), Alignment,
1341
                  LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1342
                                       : MachineMemOperand::MONone);
1343

1344
  SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1345
  SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1346

1347
  // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1348
  SDNode *InFP128 =
1349
      DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1350
  InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1351
                               SDValue(InFP128, 0), Hi64, SubRegEven);
1352
  InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1353
                               SDValue(InFP128, 0), Lo64, SubRegOdd);
1354
  SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1355
                          SDValue(Hi64.getNode(), 1)};
1356
  SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1357
  SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1358
  return DAG.getMergeValues(Ops, DL);
1359
}
1360

1361
// Lower a vXi1 load into following instructions
1362
//   LDrii %1, (,%addr)
1363
//   LVMxir  %vm, 0, %1
1364
//   LDrii %2, 8(,%addr)
1365
//   LVMxir  %vm, 0, %2
1366
//   ...
1367
static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1368
  SDLoc DL(Op);
1369
  LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1370
  assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1371

1372
  SDValue BasePtr = LdNode->getBasePtr();
1373
  Align Alignment = LdNode->getAlign();
1374
  if (Alignment > 8)
1375
    Alignment = Align(8);
1376

1377
  EVT AddrVT = BasePtr.getValueType();
1378
  EVT MemVT = LdNode->getMemoryVT();
1379
  if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1380
    SDValue OutChains[4];
1381
    SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1382
    for (int i = 0; i < 4; ++i) {
1383
      // Generate load dag and prepare chains.
1384
      SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1385
                                 DAG.getConstant(8 * i, DL, AddrVT));
1386
      SDValue Val =
1387
          DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1388
                      LdNode->getPointerInfo(), Alignment,
1389
                      LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1390
                                           : MachineMemOperand::MONone);
1391
      OutChains[i] = SDValue(Val.getNode(), 1);
1392

1393
      VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1394
                              DAG.getTargetConstant(i, DL, MVT::i64), Val,
1395
                              SDValue(VM, 0));
1396
    }
1397
    SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1398
    SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1399
    return DAG.getMergeValues(Ops, DL);
1400
  } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1401
    SDValue OutChains[8];
1402
    SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1403
    for (int i = 0; i < 8; ++i) {
1404
      // Generate load dag and prepare chains.
1405
      SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1406
                                 DAG.getConstant(8 * i, DL, AddrVT));
1407
      SDValue Val =
1408
          DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1409
                      LdNode->getPointerInfo(), Alignment,
1410
                      LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1411
                                           : MachineMemOperand::MONone);
1412
      OutChains[i] = SDValue(Val.getNode(), 1);
1413

1414
      VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1415
                              DAG.getTargetConstant(i, DL, MVT::i64), Val,
1416
                              SDValue(VM, 0));
1417
    }
1418
    SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1419
    SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1420
    return DAG.getMergeValues(Ops, DL);
1421
  } else {
1422
    // Otherwise, ask llvm to expand it.
1423
    return SDValue();
1424
  }
1425
}
1426

1427
SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1428
  LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1429
  EVT MemVT = LdNode->getMemoryVT();
1430

1431
  // If VPU is enabled, always expand non-mask vector loads to VVP
1432
  if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1433
    return lowerToVVP(Op, DAG);
1434

1435
  SDValue BasePtr = LdNode->getBasePtr();
1436
  if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1437
    // Do not expand store instruction with frame index here because of
1438
    // dependency problems.  We expand it later in eliminateFrameIndex().
1439
    return Op;
1440
  }
1441

1442
  if (MemVT == MVT::f128)
1443
    return lowerLoadF128(Op, DAG);
1444
  if (isMaskType(MemVT))
1445
    return lowerLoadI1(Op, DAG);
1446

1447
  return Op;
1448
}
1449

1450
// Lower a f128 store into two f64 stores.
1451
static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1452
  SDLoc DL(Op);
1453
  StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1454
  assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1455

1456
  SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1457
  SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1458

1459
  SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1460
                                    StNode->getValue(), SubRegEven);
1461
  SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1462
                                    StNode->getValue(), SubRegOdd);
1463

1464
  Align Alignment = StNode->getAlign();
1465
  if (Alignment > 8)
1466
    Alignment = Align(8);
1467

1468
  // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1469
  SDValue OutChains[2];
1470
  OutChains[0] =
1471
      DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1472
                   StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1473
                   StNode->isVolatile() ? MachineMemOperand::MOVolatile
1474
                                        : MachineMemOperand::MONone);
1475
  EVT AddrVT = StNode->getBasePtr().getValueType();
1476
  SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1477
                              DAG.getConstant(8, DL, AddrVT));
1478
  OutChains[1] =
1479
      DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1480
                   MachinePointerInfo(), Alignment,
1481
                   StNode->isVolatile() ? MachineMemOperand::MOVolatile
1482
                                        : MachineMemOperand::MONone);
1483
  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1484
}
1485

1486
// Lower a vXi1 store into following instructions
1487
//   SVMi  %1, %vm, 0
1488
//   STrii %1, (,%addr)
1489
//   SVMi  %2, %vm, 1
1490
//   STrii %2, 8(,%addr)
1491
//   ...
1492
static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1493
  SDLoc DL(Op);
1494
  StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1495
  assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1496

1497
  SDValue BasePtr = StNode->getBasePtr();
1498
  Align Alignment = StNode->getAlign();
1499
  if (Alignment > 8)
1500
    Alignment = Align(8);
1501
  EVT AddrVT = BasePtr.getValueType();
1502
  EVT MemVT = StNode->getMemoryVT();
1503
  if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1504
    SDValue OutChains[4];
1505
    for (int i = 0; i < 4; ++i) {
1506
      SDNode *V =
1507
          DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1508
                             DAG.getTargetConstant(i, DL, MVT::i64));
1509
      SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1510
                                 DAG.getConstant(8 * i, DL, AddrVT));
1511
      OutChains[i] =
1512
          DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1513
                       MachinePointerInfo(), Alignment,
1514
                       StNode->isVolatile() ? MachineMemOperand::MOVolatile
1515
                                            : MachineMemOperand::MONone);
1516
    }
1517
    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1518
  } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1519
    SDValue OutChains[8];
1520
    for (int i = 0; i < 8; ++i) {
1521
      SDNode *V =
1522
          DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1523
                             DAG.getTargetConstant(i, DL, MVT::i64));
1524
      SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1525
                                 DAG.getConstant(8 * i, DL, AddrVT));
1526
      OutChains[i] =
1527
          DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1528
                       MachinePointerInfo(), Alignment,
1529
                       StNode->isVolatile() ? MachineMemOperand::MOVolatile
1530
                                            : MachineMemOperand::MONone);
1531
    }
1532
    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1533
  } else {
1534
    // Otherwise, ask llvm to expand it.
1535
    return SDValue();
1536
  }
1537
}
1538

1539
SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1540
  StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1541
  assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1542
  EVT MemVT = StNode->getMemoryVT();
1543

1544
  // If VPU is enabled, always expand non-mask vector stores to VVP
1545
  if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1546
    return lowerToVVP(Op, DAG);
1547

1548
  SDValue BasePtr = StNode->getBasePtr();
1549
  if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1550
    // Do not expand store instruction with frame index here because of
1551
    // dependency problems.  We expand it later in eliminateFrameIndex().
1552
    return Op;
1553
  }
1554

1555
  if (MemVT == MVT::f128)
1556
    return lowerStoreF128(Op, DAG);
1557
  if (isMaskType(MemVT))
1558
    return lowerStoreI1(Op, DAG);
1559

1560
  // Otherwise, ask llvm to expand it.
1561
  return SDValue();
1562
}
1563

1564
SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1565
  MachineFunction &MF = DAG.getMachineFunction();
1566
  VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1567
  auto PtrVT = getPointerTy(DAG.getDataLayout());
1568

1569
  // Need frame address to find the address of VarArgsFrameIndex.
1570
  MF.getFrameInfo().setFrameAddressIsTaken(true);
1571

1572
  // vastart just stores the address of the VarArgsFrameIndex slot into the
1573
  // memory location argument.
1574
  SDLoc DL(Op);
1575
  SDValue Offset =
1576
      DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1577
                  DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1578
  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1579
  return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1580
                      MachinePointerInfo(SV));
1581
}
1582

1583
SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1584
  SDNode *Node = Op.getNode();
1585
  EVT VT = Node->getValueType(0);
1586
  SDValue InChain = Node->getOperand(0);
1587
  SDValue VAListPtr = Node->getOperand(1);
1588
  EVT PtrVT = VAListPtr.getValueType();
1589
  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1590
  SDLoc DL(Node);
1591
  SDValue VAList =
1592
      DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1593
  SDValue Chain = VAList.getValue(1);
1594
  SDValue NextPtr;
1595

1596
  if (VT == MVT::f128) {
1597
    // VE f128 values must be stored with 16 bytes alignment.  We don't
1598
    // know the actual alignment of VAList, so we take alignment of it
1599
    // dynamically.
1600
    int Align = 16;
1601
    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1602
                         DAG.getConstant(Align - 1, DL, PtrVT));
1603
    VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1604
                         DAG.getConstant(-Align, DL, PtrVT));
1605
    // Increment the pointer, VAList, by 16 to the next vaarg.
1606
    NextPtr =
1607
        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1608
  } else if (VT == MVT::f32) {
1609
    // float --> need special handling like below.
1610
    //    0      4
1611
    //    +------+------+
1612
    //    | empty| float|
1613
    //    +------+------+
1614
    // Increment the pointer, VAList, by 8 to the next vaarg.
1615
    NextPtr =
1616
        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1617
    // Then, adjust VAList.
1618
    unsigned InternalOffset = 4;
1619
    VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1620
                         DAG.getConstant(InternalOffset, DL, PtrVT));
1621
  } else {
1622
    // Increment the pointer, VAList, by 8 to the next vaarg.
1623
    NextPtr =
1624
        DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1625
  }
1626

1627
  // Store the incremented VAList to the legalized pointer.
1628
  InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1629

1630
  // Load the actual argument out of the pointer VAList.
1631
  // We can't count on greater alignment than the word size.
1632
  return DAG.getLoad(
1633
      VT, DL, InChain, VAList, MachinePointerInfo(),
1634
      Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1635
}
1636

1637
SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1638
                                                  SelectionDAG &DAG) const {
1639
  // Generate following code.
1640
  //   (void)__llvm_grow_stack(size);
1641
  //   ret = GETSTACKTOP;        // pseudo instruction
1642
  SDLoc DL(Op);
1643

1644
  // Get the inputs.
1645
  SDNode *Node = Op.getNode();
1646
  SDValue Chain = Op.getOperand(0);
1647
  SDValue Size = Op.getOperand(1);
1648
  MaybeAlign Alignment(Op.getConstantOperandVal(2));
1649
  EVT VT = Node->getValueType(0);
1650

1651
  // Chain the dynamic stack allocation so that it doesn't modify the stack
1652
  // pointer when other instructions are using the stack.
1653
  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1654

1655
  const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1656
  Align StackAlign = TFI.getStackAlign();
1657
  bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1658

1659
  // Prepare arguments
1660
  TargetLowering::ArgListTy Args;
1661
  TargetLowering::ArgListEntry Entry;
1662
  Entry.Node = Size;
1663
  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1664
  Args.push_back(Entry);
1665
  if (NeedsAlign) {
1666
    Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1667
    Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1668
    Args.push_back(Entry);
1669
  }
1670
  Type *RetTy = Type::getVoidTy(*DAG.getContext());
1671

1672
  EVT PtrVT = Op.getValueType();
1673
  SDValue Callee;
1674
  if (NeedsAlign) {
1675
    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1676
  } else {
1677
    Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1678
  }
1679

1680
  TargetLowering::CallLoweringInfo CLI(DAG);
1681
  CLI.setDebugLoc(DL)
1682
      .setChain(Chain)
1683
      .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1684
      .setDiscardResult(true);
1685
  std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1686
  Chain = pair.second;
1687
  SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1688
  if (NeedsAlign) {
1689
    Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1690
                         DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1691
    Result = DAG.getNode(ISD::AND, DL, VT, Result,
1692
                         DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1693
  }
1694
  //  Chain = Result.getValue(1);
1695
  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1696

1697
  SDValue Ops[2] = {Result, Chain};
1698
  return DAG.getMergeValues(Ops, DL);
1699
}
1700

1701
SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1702
                                               SelectionDAG &DAG) const {
1703
  SDLoc DL(Op);
1704
  return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1705
                     Op.getOperand(1));
1706
}
1707

1708
SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1709
                                              SelectionDAG &DAG) const {
1710
  SDLoc DL(Op);
1711
  return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1712
                     DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1713
                     Op.getOperand(1));
1714
}
1715

1716
SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1717
                                                      SelectionDAG &DAG) const {
1718
  SDLoc DL(Op);
1719
  return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1720
                     Op.getOperand(0));
1721
}
1722

1723
static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1724
                              const VETargetLowering &TLI,
1725
                              const VESubtarget *Subtarget) {
1726
  SDLoc DL(Op);
1727
  MachineFunction &MF = DAG.getMachineFunction();
1728
  EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1729

1730
  MachineFrameInfo &MFI = MF.getFrameInfo();
1731
  MFI.setFrameAddressIsTaken(true);
1732

1733
  unsigned Depth = Op.getConstantOperandVal(0);
1734
  const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1735
  Register FrameReg = RegInfo->getFrameRegister(MF);
1736
  SDValue FrameAddr =
1737
      DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1738
  while (Depth--)
1739
    FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1740
                            FrameAddr, MachinePointerInfo());
1741
  return FrameAddr;
1742
}
1743

1744
static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1745
                               const VETargetLowering &TLI,
1746
                               const VESubtarget *Subtarget) {
1747
  MachineFunction &MF = DAG.getMachineFunction();
1748
  MachineFrameInfo &MFI = MF.getFrameInfo();
1749
  MFI.setReturnAddressIsTaken(true);
1750

1751
  if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1752
    return SDValue();
1753

1754
  SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1755

1756
  SDLoc DL(Op);
1757
  EVT VT = Op.getValueType();
1758
  SDValue Offset = DAG.getConstant(8, DL, VT);
1759
  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1760
                     DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1761
                     MachinePointerInfo());
1762
}
1763

1764
SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1765
                                                  SelectionDAG &DAG) const {
1766
  SDLoc DL(Op);
1767
  unsigned IntNo = Op.getConstantOperandVal(0);
1768
  switch (IntNo) {
1769
  default: // Don't custom lower most intrinsics.
1770
    return SDValue();
1771
  case Intrinsic::eh_sjlj_lsda: {
1772
    MachineFunction &MF = DAG.getMachineFunction();
1773
    MVT VT = Op.getSimpleValueType();
1774
    const VETargetMachine *TM =
1775
        static_cast<const VETargetMachine *>(&DAG.getTarget());
1776

1777
    // Create GCC_except_tableXX string.  The real symbol for that will be
1778
    // generated in EHStreamer::emitExceptionTable() later.  So, we just
1779
    // borrow it's name here.
1780
    TM->getStrList()->push_back(std::string(
1781
        (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1782
    SDValue Addr =
1783
        DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1784
    if (isPositionIndependent()) {
1785
      Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1786
                          VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1787
      SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1788
      return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1789
    }
1790
    return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1791
  }
1792
  }
1793
}
1794

1795
static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1796
  if (!isa<BuildVectorSDNode>(N))
1797
    return false;
1798
  const auto *BVN = cast<BuildVectorSDNode>(N);
1799

1800
  // Find first non-undef insertion.
1801
  unsigned Idx;
1802
  for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1803
    auto ElemV = BVN->getOperand(Idx);
1804
    if (!ElemV->isUndef())
1805
      break;
1806
  }
1807
  // Catch the (hypothetical) all-undef case.
1808
  if (Idx == BVN->getNumOperands())
1809
    return false;
1810
  // Remember insertion.
1811
  UniqueIdx = Idx++;
1812
  // Verify that all other insertions are undef.
1813
  for (; Idx < BVN->getNumOperands(); ++Idx) {
1814
    auto ElemV = BVN->getOperand(Idx);
1815
    if (!ElemV->isUndef())
1816
      return false;
1817
  }
1818
  return true;
1819
}
1820

1821
static SDValue getSplatValue(SDNode *N) {
1822
  if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1823
    return BuildVec->getSplatValue();
1824
  }
1825
  return SDValue();
1826
}
1827

1828
SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1829
                                            SelectionDAG &DAG) const {
1830
  VECustomDAG CDAG(DAG, Op);
1831
  MVT ResultVT = Op.getSimpleValueType();
1832

1833
  // If there is just one element, expand to INSERT_VECTOR_ELT.
1834
  unsigned UniqueIdx;
1835
  if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1836
    SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1837
    auto ElemV = Op->getOperand(UniqueIdx);
1838
    SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1839
    return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1840
  }
1841

1842
  // Else emit a broadcast.
1843
  if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1844
    unsigned NumEls = ResultVT.getVectorNumElements();
1845
    auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1846
    return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1847
  }
1848

1849
  // Expand
1850
  return SDValue();
1851
}
1852

1853
TargetLowering::LegalizeAction
1854
VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1855
  // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1856
  // these operations (transform nodes such that their AVL parameter refers to
1857
  // packs of 64bit, instead of number of elements.
1858

1859
  // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1860
  // re-visit them.
1861
  if (isPackingSupportOpcode(Op.getOpcode()))
1862
    return Legal;
1863

1864
  // Custom lower to legalize AVL for packed mode.
1865
  if (isVVPOrVEC(Op.getOpcode()))
1866
    return Custom;
1867
  return Legal;
1868
}
1869

1870
SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1871
  LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1872
  unsigned Opcode = Op.getOpcode();
1873

1874
  /// Scalar isel.
1875
  switch (Opcode) {
1876
  case ISD::ATOMIC_FENCE:
1877
    return lowerATOMIC_FENCE(Op, DAG);
1878
  case ISD::ATOMIC_SWAP:
1879
    return lowerATOMIC_SWAP(Op, DAG);
1880
  case ISD::BlockAddress:
1881
    return lowerBlockAddress(Op, DAG);
1882
  case ISD::ConstantPool:
1883
    return lowerConstantPool(Op, DAG);
1884
  case ISD::DYNAMIC_STACKALLOC:
1885
    return lowerDYNAMIC_STACKALLOC(Op, DAG);
1886
  case ISD::EH_SJLJ_LONGJMP:
1887
    return lowerEH_SJLJ_LONGJMP(Op, DAG);
1888
  case ISD::EH_SJLJ_SETJMP:
1889
    return lowerEH_SJLJ_SETJMP(Op, DAG);
1890
  case ISD::EH_SJLJ_SETUP_DISPATCH:
1891
    return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1892
  case ISD::FRAMEADDR:
1893
    return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1894
  case ISD::GlobalAddress:
1895
    return lowerGlobalAddress(Op, DAG);
1896
  case ISD::GlobalTLSAddress:
1897
    return lowerGlobalTLSAddress(Op, DAG);
1898
  case ISD::INTRINSIC_WO_CHAIN:
1899
    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1900
  case ISD::JumpTable:
1901
    return lowerJumpTable(Op, DAG);
1902
  case ISD::LOAD:
1903
    return lowerLOAD(Op, DAG);
1904
  case ISD::RETURNADDR:
1905
    return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1906
  case ISD::BUILD_VECTOR:
1907
    return lowerBUILD_VECTOR(Op, DAG);
1908
  case ISD::STORE:
1909
    return lowerSTORE(Op, DAG);
1910
  case ISD::VASTART:
1911
    return lowerVASTART(Op, DAG);
1912
  case ISD::VAARG:
1913
    return lowerVAARG(Op, DAG);
1914

1915
  case ISD::INSERT_VECTOR_ELT:
1916
    return lowerINSERT_VECTOR_ELT(Op, DAG);
1917
  case ISD::EXTRACT_VECTOR_ELT:
1918
    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1919
  }
1920

1921
  /// Vector isel.
1922
  if (ISD::isVPOpcode(Opcode))
1923
    return lowerToVVP(Op, DAG);
1924

1925
  switch (Opcode) {
1926
  default:
1927
    llvm_unreachable("Should not custom lower this!");
1928

1929
  // Legalize the AVL of this internal node.
1930
  case VEISD::VEC_BROADCAST:
1931
#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1932
#include "VVPNodes.def"
1933
    // AVL already legalized.
1934
    if (getAnnotatedNodeAVL(Op).second)
1935
      return Op;
1936
    return legalizeInternalVectorOp(Op, DAG);
1937

1938
    // Translate into a VEC_*/VVP_* layer operation.
1939
  case ISD::MLOAD:
1940
  case ISD::MSTORE:
1941
#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1942
#include "VVPNodes.def"
1943
    if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1944
      return splitMaskArithmetic(Op, DAG);
1945
    return lowerToVVP(Op, DAG);
1946
  }
1947
}
1948
/// } Custom Lower
1949

1950
void VETargetLowering::ReplaceNodeResults(SDNode *N,
1951
                                          SmallVectorImpl<SDValue> &Results,
1952
                                          SelectionDAG &DAG) const {
1953
  switch (N->getOpcode()) {
1954
  case ISD::ATOMIC_SWAP:
1955
    // Let LLVM expand atomic swap instruction through LowerOperation.
1956
    return;
1957
  default:
1958
    LLVM_DEBUG(N->dumpr(&DAG));
1959
    llvm_unreachable("Do not know how to custom type legalize this operation!");
1960
  }
1961
}
1962

1963
/// JumpTable for VE.
1964
///
1965
///   VE cannot generate relocatable symbol in jump table.  VE cannot
1966
///   generate expressions using symbols in both text segment and data
1967
///   segment like below.
1968
///             .4byte  .LBB0_2-.LJTI0_0
1969
///   So, we generate offset from the top of function like below as
1970
///   a custom label.
1971
///             .4byte  .LBB0_2-<function name>
1972

1973
unsigned VETargetLowering::getJumpTableEncoding() const {
1974
  // Use custom label for PIC.
1975
  if (isPositionIndependent())
1976
    return MachineJumpTableInfo::EK_Custom32;
1977

1978
  // Otherwise, use the normal jump table encoding heuristics.
1979
  return TargetLowering::getJumpTableEncoding();
1980
}
1981

1982
const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1983
    const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1984
    unsigned Uid, MCContext &Ctx) const {
1985
  assert(isPositionIndependent());
1986

1987
  // Generate custom label for PIC like below.
1988
  //    .4bytes  .LBB0_2-<function name>
1989
  const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1990
  MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1991
  const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1992
  return MCBinaryExpr::createSub(Value, Base, Ctx);
1993
}
1994

1995
SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1996
                                                   SelectionDAG &DAG) const {
1997
  assert(isPositionIndependent());
1998
  SDLoc DL(Table);
1999
  Function *Function = &DAG.getMachineFunction().getFunction();
2000
  assert(Function != nullptr);
2001
  auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2002

2003
  // In the jump table, we have following values in PIC mode.
2004
  //    .4bytes  .LBB0_2-<function name>
2005
  // We need to add this value and the address of this function to generate
2006
  // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
2007
  // instructions:
2008
  //     lea %reg, fun@gotoff_lo
2009
  //     and %reg, %reg, (32)0
2010
  //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
2011
  // In order to do so, we need to genarate correctly marked DAG node using
2012
  // makeHiLoPair.
2013
  SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2014
  SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2015
                              VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2016
  SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2017
  return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2018
}
2019

2020
Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2021
                                      MachineBasicBlock::iterator I,
2022
                                      MachineBasicBlock *TargetBB,
2023
                                      const DebugLoc &DL) const {
2024
  MachineFunction *MF = MBB.getParent();
2025
  MachineRegisterInfo &MRI = MF->getRegInfo();
2026
  const VEInstrInfo *TII = Subtarget->getInstrInfo();
2027

2028
  const TargetRegisterClass *RC = &VE::I64RegClass;
2029
  Register Tmp1 = MRI.createVirtualRegister(RC);
2030
  Register Tmp2 = MRI.createVirtualRegister(RC);
2031
  Register Result = MRI.createVirtualRegister(RC);
2032

2033
  if (isPositionIndependent()) {
2034
    // Create following instructions for local linkage PIC code.
2035
    //     lea %Tmp1, TargetBB@gotoff_lo
2036
    //     and %Tmp2, %Tmp1, (32)0
2037
    //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2038
    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2039
        .addImm(0)
2040
        .addImm(0)
2041
        .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2042
    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2043
        .addReg(Tmp1, getKillRegState(true))
2044
        .addImm(M0(32));
2045
    BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2046
        .addReg(VE::SX15)
2047
        .addReg(Tmp2, getKillRegState(true))
2048
        .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2049
  } else {
2050
    // Create following instructions for non-PIC code.
2051
    //     lea     %Tmp1, TargetBB@lo
2052
    //     and     %Tmp2, %Tmp1, (32)0
2053
    //     lea.sl  %Result, TargetBB@hi(%Tmp2)
2054
    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2055
        .addImm(0)
2056
        .addImm(0)
2057
        .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2058
    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2059
        .addReg(Tmp1, getKillRegState(true))
2060
        .addImm(M0(32));
2061
    BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2062
        .addReg(Tmp2, getKillRegState(true))
2063
        .addImm(0)
2064
        .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2065
  }
2066
  return Result;
2067
}
2068

2069
Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2070
                                         MachineBasicBlock::iterator I,
2071
                                         StringRef Symbol, const DebugLoc &DL,
2072
                                         bool IsLocal = false,
2073
                                         bool IsCall = false) const {
2074
  MachineFunction *MF = MBB.getParent();
2075
  MachineRegisterInfo &MRI = MF->getRegInfo();
2076
  const VEInstrInfo *TII = Subtarget->getInstrInfo();
2077

2078
  const TargetRegisterClass *RC = &VE::I64RegClass;
2079
  Register Result = MRI.createVirtualRegister(RC);
2080

2081
  if (isPositionIndependent()) {
2082
    if (IsCall && !IsLocal) {
2083
      // Create following instructions for non-local linkage PIC code function
2084
      // calls.  These instructions uses IC and magic number -24, so we expand
2085
      // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2086
      //     lea %Reg, Symbol@plt_lo(-24)
2087
      //     and %Reg, %Reg, (32)0
2088
      //     sic %s16
2089
      //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2090
      BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2091
          .addExternalSymbol("abort");
2092
    } else if (IsLocal) {
2093
      Register Tmp1 = MRI.createVirtualRegister(RC);
2094
      Register Tmp2 = MRI.createVirtualRegister(RC);
2095
      // Create following instructions for local linkage PIC code.
2096
      //     lea %Tmp1, Symbol@gotoff_lo
2097
      //     and %Tmp2, %Tmp1, (32)0
2098
      //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2099
      BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2100
          .addImm(0)
2101
          .addImm(0)
2102
          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2103
      BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2104
          .addReg(Tmp1, getKillRegState(true))
2105
          .addImm(M0(32));
2106
      BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2107
          .addReg(VE::SX15)
2108
          .addReg(Tmp2, getKillRegState(true))
2109
          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2110
    } else {
2111
      Register Tmp1 = MRI.createVirtualRegister(RC);
2112
      Register Tmp2 = MRI.createVirtualRegister(RC);
2113
      // Create following instructions for not local linkage PIC code.
2114
      //     lea %Tmp1, Symbol@got_lo
2115
      //     and %Tmp2, %Tmp1, (32)0
2116
      //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2117
      //     ld %Result, 0(%Tmp3)
2118
      Register Tmp3 = MRI.createVirtualRegister(RC);
2119
      BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2120
          .addImm(0)
2121
          .addImm(0)
2122
          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2123
      BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2124
          .addReg(Tmp1, getKillRegState(true))
2125
          .addImm(M0(32));
2126
      BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2127
          .addReg(VE::SX15)
2128
          .addReg(Tmp2, getKillRegState(true))
2129
          .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2130
      BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2131
          .addReg(Tmp3, getKillRegState(true))
2132
          .addImm(0)
2133
          .addImm(0);
2134
    }
2135
  } else {
2136
    Register Tmp1 = MRI.createVirtualRegister(RC);
2137
    Register Tmp2 = MRI.createVirtualRegister(RC);
2138
    // Create following instructions for non-PIC code.
2139
    //     lea     %Tmp1, Symbol@lo
2140
    //     and     %Tmp2, %Tmp1, (32)0
2141
    //     lea.sl  %Result, Symbol@hi(%Tmp2)
2142
    BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2143
        .addImm(0)
2144
        .addImm(0)
2145
        .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2146
    BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2147
        .addReg(Tmp1, getKillRegState(true))
2148
        .addImm(M0(32));
2149
    BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2150
        .addReg(Tmp2, getKillRegState(true))
2151
        .addImm(0)
2152
        .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2153
  }
2154
  return Result;
2155
}
2156

2157
void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2158
                                              MachineBasicBlock *MBB,
2159
                                              MachineBasicBlock *DispatchBB,
2160
                                              int FI, int Offset) const {
2161
  DebugLoc DL = MI.getDebugLoc();
2162
  const VEInstrInfo *TII = Subtarget->getInstrInfo();
2163

2164
  Register LabelReg =
2165
      prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2166

2167
  // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2168
  // referenced by longjmp (throw) later.
2169
  MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2170
  addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2171
  MIB.addReg(LabelReg, getKillRegState(true));
2172
}
2173

2174
MachineBasicBlock *
2175
VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2176
                                   MachineBasicBlock *MBB) const {
2177
  DebugLoc DL = MI.getDebugLoc();
2178
  MachineFunction *MF = MBB->getParent();
2179
  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2180
  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2181
  MachineRegisterInfo &MRI = MF->getRegInfo();
2182

2183
  const BasicBlock *BB = MBB->getBasicBlock();
2184
  MachineFunction::iterator I = ++MBB->getIterator();
2185

2186
  // Memory Reference.
2187
  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2188
                                           MI.memoperands_end());
2189
  Register BufReg = MI.getOperand(1).getReg();
2190

2191
  Register DstReg;
2192

2193
  DstReg = MI.getOperand(0).getReg();
2194
  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2195
  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2196
  (void)TRI;
2197
  Register MainDestReg = MRI.createVirtualRegister(RC);
2198
  Register RestoreDestReg = MRI.createVirtualRegister(RC);
2199

2200
  // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2201
  // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2202
  //
2203
  // ThisMBB:
2204
  //   buf[3] = %s17 iff %s17 is used as BP
2205
  //   buf[1] = RestoreMBB as IC after longjmp
2206
  //   # SjLjSetup RestoreMBB
2207
  //
2208
  // MainMBB:
2209
  //   v_main = 0
2210
  //
2211
  // SinkMBB:
2212
  //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2213
  //   ...
2214
  //
2215
  // RestoreMBB:
2216
  //   %s17 = buf[3] = iff %s17 is used as BP
2217
  //   v_restore = 1
2218
  //   goto SinkMBB
2219

2220
  MachineBasicBlock *ThisMBB = MBB;
2221
  MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2222
  MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2223
  MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2224
  MF->insert(I, MainMBB);
2225
  MF->insert(I, SinkMBB);
2226
  MF->push_back(RestoreMBB);
2227
  RestoreMBB->setMachineBlockAddressTaken();
2228

2229
  // Transfer the remainder of BB and its successor edges to SinkMBB.
2230
  SinkMBB->splice(SinkMBB->begin(), MBB,
2231
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2232
  SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2233

2234
  // ThisMBB:
2235
  Register LabelReg =
2236
      prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2237

2238
  // Store BP in buf[3] iff this function is using BP.
2239
  const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2240
  if (TFI->hasBP(*MF)) {
2241
    MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2242
    MIB.addReg(BufReg);
2243
    MIB.addImm(0);
2244
    MIB.addImm(24);
2245
    MIB.addReg(VE::SX17);
2246
    MIB.setMemRefs(MMOs);
2247
  }
2248

2249
  // Store IP in buf[1].
2250
  MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2251
  MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2252
  MIB.addImm(0);
2253
  MIB.addImm(8);
2254
  MIB.addReg(LabelReg, getKillRegState(true));
2255
  MIB.setMemRefs(MMOs);
2256

2257
  // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2258

2259
  // Insert setup.
2260
  MIB =
2261
      BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2262

2263
  const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2264
  MIB.addRegMask(RegInfo->getNoPreservedMask());
2265
  ThisMBB->addSuccessor(MainMBB);
2266
  ThisMBB->addSuccessor(RestoreMBB);
2267

2268
  // MainMBB:
2269
  BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2270
      .addImm(0)
2271
      .addImm(0)
2272
      .addImm(0);
2273
  MainMBB->addSuccessor(SinkMBB);
2274

2275
  // SinkMBB:
2276
  BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2277
      .addReg(MainDestReg)
2278
      .addMBB(MainMBB)
2279
      .addReg(RestoreDestReg)
2280
      .addMBB(RestoreMBB);
2281

2282
  // RestoreMBB:
2283
  // Restore BP from buf[3] iff this function is using BP.  The address of
2284
  // buf is in SX10.
2285
  // FIXME: Better to not use SX10 here
2286
  if (TFI->hasBP(*MF)) {
2287
    MachineInstrBuilder MIB =
2288
        BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2289
    MIB.addReg(VE::SX10);
2290
    MIB.addImm(0);
2291
    MIB.addImm(24);
2292
    MIB.setMemRefs(MMOs);
2293
  }
2294
  BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2295
      .addImm(0)
2296
      .addImm(0)
2297
      .addImm(1);
2298
  BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2299
  RestoreMBB->addSuccessor(SinkMBB);
2300

2301
  MI.eraseFromParent();
2302
  return SinkMBB;
2303
}
2304

2305
MachineBasicBlock *
2306
VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2307
                                    MachineBasicBlock *MBB) const {
2308
  DebugLoc DL = MI.getDebugLoc();
2309
  MachineFunction *MF = MBB->getParent();
2310
  const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2311
  MachineRegisterInfo &MRI = MF->getRegInfo();
2312

2313
  // Memory Reference.
2314
  SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2315
                                           MI.memoperands_end());
2316
  Register BufReg = MI.getOperand(0).getReg();
2317

2318
  Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2319
  // Since FP is only updated here but NOT referenced, it's treated as GPR.
2320
  Register FP = VE::SX9;
2321
  Register SP = VE::SX11;
2322

2323
  MachineInstrBuilder MIB;
2324

2325
  MachineBasicBlock *ThisMBB = MBB;
2326

2327
  // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2328
  //
2329
  // ThisMBB:
2330
  //   %fp = load buf[0]
2331
  //   %jmp = load buf[1]
2332
  //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2333
  //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2334
  //   jmp %jmp
2335

2336
  // Reload FP.
2337
  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2338
  MIB.addReg(BufReg);
2339
  MIB.addImm(0);
2340
  MIB.addImm(0);
2341
  MIB.setMemRefs(MMOs);
2342

2343
  // Reload IP.
2344
  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2345
  MIB.addReg(BufReg);
2346
  MIB.addImm(0);
2347
  MIB.addImm(8);
2348
  MIB.setMemRefs(MMOs);
2349

2350
  // Copy BufReg to SX10 for later use in setjmp.
2351
  // FIXME: Better to not use SX10 here
2352
  BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2353
      .addReg(BufReg)
2354
      .addImm(0);
2355

2356
  // Reload SP.
2357
  MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2358
  MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2359
  MIB.addImm(0);
2360
  MIB.addImm(16);
2361
  MIB.setMemRefs(MMOs);
2362

2363
  // Jump.
2364
  BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2365
      .addReg(Tmp, getKillRegState(true))
2366
      .addImm(0);
2367

2368
  MI.eraseFromParent();
2369
  return ThisMBB;
2370
}
2371

2372
MachineBasicBlock *
2373
VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2374
                                        MachineBasicBlock *BB) const {
2375
  DebugLoc DL = MI.getDebugLoc();
2376
  MachineFunction *MF = BB->getParent();
2377
  MachineFrameInfo &MFI = MF->getFrameInfo();
2378
  MachineRegisterInfo &MRI = MF->getRegInfo();
2379
  const VEInstrInfo *TII = Subtarget->getInstrInfo();
2380
  int FI = MFI.getFunctionContextIndex();
2381

2382
  // Get a mapping of the call site numbers to all of the landing pads they're
2383
  // associated with.
2384
  DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2385
  unsigned MaxCSNum = 0;
2386
  for (auto &MBB : *MF) {
2387
    if (!MBB.isEHPad())
2388
      continue;
2389

2390
    MCSymbol *Sym = nullptr;
2391
    for (const auto &MI : MBB) {
2392
      if (MI.isDebugInstr())
2393
        continue;
2394

2395
      assert(MI.isEHLabel() && "expected EH_LABEL");
2396
      Sym = MI.getOperand(0).getMCSymbol();
2397
      break;
2398
    }
2399

2400
    if (!MF->hasCallSiteLandingPad(Sym))
2401
      continue;
2402

2403
    for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2404
      CallSiteNumToLPad[CSI].push_back(&MBB);
2405
      MaxCSNum = std::max(MaxCSNum, CSI);
2406
    }
2407
  }
2408

2409
  // Get an ordered list of the machine basic blocks for the jump table.
2410
  std::vector<MachineBasicBlock *> LPadList;
2411
  SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2412
  LPadList.reserve(CallSiteNumToLPad.size());
2413

2414
  for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2415
    for (auto &LP : CallSiteNumToLPad[CSI]) {
2416
      LPadList.push_back(LP);
2417
      InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2418
    }
2419
  }
2420

2421
  assert(!LPadList.empty() &&
2422
         "No landing pad destinations for the dispatch jump table!");
2423

2424
  // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2425
  //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2426
  //
2427
  // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2428
  // First `i64` is callsite, so callsite is FI+8.
2429
  static const int OffsetIC = 72;
2430
  static const int OffsetCS = 8;
2431

2432
  // Create the MBBs for the dispatch code like following:
2433
  //
2434
  // ThisMBB:
2435
  //   Prepare DispatchBB address and store it to buf[1].
2436
  //   ...
2437
  //
2438
  // DispatchBB:
2439
  //   %s15 = GETGOT iff isPositionIndependent
2440
  //   %callsite = load callsite
2441
  //   brgt.l.t #size of callsites, %callsite, DispContBB
2442
  //
2443
  // TrapBB:
2444
  //   Call abort.
2445
  //
2446
  // DispContBB:
2447
  //   %breg = address of jump table
2448
  //   %pc = load and calculate next pc from %breg and %callsite
2449
  //   jmp %pc
2450

2451
  // Shove the dispatch's address into the return slot in the function context.
2452
  MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2453
  DispatchBB->setIsEHPad(true);
2454

2455
  // Trap BB will causes trap like `assert(0)`.
2456
  MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2457
  DispatchBB->addSuccessor(TrapBB);
2458

2459
  MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2460
  DispatchBB->addSuccessor(DispContBB);
2461

2462
  // Insert MBBs.
2463
  MF->push_back(DispatchBB);
2464
  MF->push_back(DispContBB);
2465
  MF->push_back(TrapBB);
2466

2467
  // Insert code to call abort in the TrapBB.
2468
  Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2469
                                 /* Local */ false, /* Call */ true);
2470
  BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2471
      .addReg(Abort, getKillRegState(true))
2472
      .addImm(0)
2473
      .addImm(0);
2474

2475
  // Insert code into the entry block that creates and registers the function
2476
  // context.
2477
  setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2478

2479
  // Create the jump table and associated information
2480
  unsigned JTE = getJumpTableEncoding();
2481
  MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2482
  unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2483

2484
  const VERegisterInfo &RI = TII->getRegisterInfo();
2485
  // Add a register mask with no preserved registers.  This results in all
2486
  // registers being marked as clobbered.
2487
  BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2488
      .addRegMask(RI.getNoPreservedMask());
2489

2490
  if (isPositionIndependent()) {
2491
    // Force to generate GETGOT, since current implementation doesn't store GOT
2492
    // register.
2493
    BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2494
  }
2495

2496
  // IReg is used as an index in a memory operand and therefore can't be SP
2497
  const TargetRegisterClass *RC = &VE::I64RegClass;
2498
  Register IReg = MRI.createVirtualRegister(RC);
2499
  addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2500
                    OffsetCS);
2501
  if (LPadList.size() < 64) {
2502
    BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2503
        .addImm(VECC::CC_ILE)
2504
        .addImm(LPadList.size())
2505
        .addReg(IReg)
2506
        .addMBB(TrapBB);
2507
  } else {
2508
    assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2509
    Register TmpReg = MRI.createVirtualRegister(RC);
2510
    BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2511
        .addImm(0)
2512
        .addImm(0)
2513
        .addImm(LPadList.size());
2514
    BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2515
        .addImm(VECC::CC_ILE)
2516
        .addReg(TmpReg, getKillRegState(true))
2517
        .addReg(IReg)
2518
        .addMBB(TrapBB);
2519
  }
2520

2521
  Register BReg = MRI.createVirtualRegister(RC);
2522
  Register Tmp1 = MRI.createVirtualRegister(RC);
2523
  Register Tmp2 = MRI.createVirtualRegister(RC);
2524

2525
  if (isPositionIndependent()) {
2526
    // Create following instructions for local linkage PIC code.
2527
    //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2528
    //     and    %Tmp2, %Tmp1, (32)0
2529
    //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2530
    BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2531
        .addImm(0)
2532
        .addImm(0)
2533
        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2534
    BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2535
        .addReg(Tmp1, getKillRegState(true))
2536
        .addImm(M0(32));
2537
    BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2538
        .addReg(VE::SX15)
2539
        .addReg(Tmp2, getKillRegState(true))
2540
        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2541
  } else {
2542
    // Create following instructions for non-PIC code.
2543
    //     lea     %Tmp1, .LJTI0_0@lo
2544
    //     and     %Tmp2, %Tmp1, (32)0
2545
    //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2546
    BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2547
        .addImm(0)
2548
        .addImm(0)
2549
        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2550
    BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2551
        .addReg(Tmp1, getKillRegState(true))
2552
        .addImm(M0(32));
2553
    BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2554
        .addReg(Tmp2, getKillRegState(true))
2555
        .addImm(0)
2556
        .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2557
  }
2558

2559
  switch (JTE) {
2560
  case MachineJumpTableInfo::EK_BlockAddress: {
2561
    // Generate simple block address code for no-PIC model.
2562
    //     sll %Tmp1, %IReg, 3
2563
    //     lds %TReg, 0(%Tmp1, %BReg)
2564
    //     bcfla %TReg
2565

2566
    Register TReg = MRI.createVirtualRegister(RC);
2567
    Register Tmp1 = MRI.createVirtualRegister(RC);
2568

2569
    BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2570
        .addReg(IReg, getKillRegState(true))
2571
        .addImm(3);
2572
    BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2573
        .addReg(BReg, getKillRegState(true))
2574
        .addReg(Tmp1, getKillRegState(true))
2575
        .addImm(0);
2576
    BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2577
        .addReg(TReg, getKillRegState(true))
2578
        .addImm(0);
2579
    break;
2580
  }
2581
  case MachineJumpTableInfo::EK_Custom32: {
2582
    // Generate block address code using differences from the function pointer
2583
    // for PIC model.
2584
    //     sll %Tmp1, %IReg, 2
2585
    //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2586
    //     Prepare function address in BReg2.
2587
    //     adds.l %TReg, %BReg2, %OReg
2588
    //     bcfla %TReg
2589

2590
    assert(isPositionIndependent());
2591
    Register OReg = MRI.createVirtualRegister(RC);
2592
    Register TReg = MRI.createVirtualRegister(RC);
2593
    Register Tmp1 = MRI.createVirtualRegister(RC);
2594

2595
    BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2596
        .addReg(IReg, getKillRegState(true))
2597
        .addImm(2);
2598
    BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2599
        .addReg(BReg, getKillRegState(true))
2600
        .addReg(Tmp1, getKillRegState(true))
2601
        .addImm(0);
2602
    Register BReg2 =
2603
        prepareSymbol(*DispContBB, DispContBB->end(),
2604
                      DispContBB->getParent()->getName(), DL, /* Local */ true);
2605
    BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2606
        .addReg(OReg, getKillRegState(true))
2607
        .addReg(BReg2, getKillRegState(true));
2608
    BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2609
        .addReg(TReg, getKillRegState(true))
2610
        .addImm(0);
2611
    break;
2612
  }
2613
  default:
2614
    llvm_unreachable("Unexpected jump table encoding");
2615
  }
2616

2617
  // Add the jump table entries as successors to the MBB.
2618
  SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2619
  for (auto &LP : LPadList)
2620
    if (SeenMBBs.insert(LP).second)
2621
      DispContBB->addSuccessor(LP);
2622

2623
  // N.B. the order the invoke BBs are processed in doesn't matter here.
2624
  SmallVector<MachineBasicBlock *, 64> MBBLPads;
2625
  const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2626
  for (MachineBasicBlock *MBB : InvokeBBs) {
2627
    // Remove the landing pad successor from the invoke block and replace it
2628
    // with the new dispatch block.
2629
    // Keep a copy of Successors since it's modified inside the loop.
2630
    SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2631
                                                   MBB->succ_rend());
2632
    // FIXME: Avoid quadratic complexity.
2633
    for (auto *MBBS : Successors) {
2634
      if (MBBS->isEHPad()) {
2635
        MBB->removeSuccessor(MBBS);
2636
        MBBLPads.push_back(MBBS);
2637
      }
2638
    }
2639

2640
    MBB->addSuccessor(DispatchBB);
2641

2642
    // Find the invoke call and mark all of the callee-saved registers as
2643
    // 'implicit defined' so that they're spilled.  This prevents code from
2644
    // moving instructions to before the EH block, where they will never be
2645
    // executed.
2646
    for (auto &II : reverse(*MBB)) {
2647
      if (!II.isCall())
2648
        continue;
2649

2650
      DenseMap<Register, bool> DefRegs;
2651
      for (auto &MOp : II.operands())
2652
        if (MOp.isReg())
2653
          DefRegs[MOp.getReg()] = true;
2654

2655
      MachineInstrBuilder MIB(*MF, &II);
2656
      for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2657
        Register Reg = SavedRegs[RI];
2658
        if (!DefRegs[Reg])
2659
          MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2660
      }
2661

2662
      break;
2663
    }
2664
  }
2665

2666
  // Mark all former landing pads as non-landing pads.  The dispatch is the only
2667
  // landing pad now.
2668
  for (auto &LP : MBBLPads)
2669
    LP->setIsEHPad(false);
2670

2671
  // The instruction is gone now.
2672
  MI.eraseFromParent();
2673
  return BB;
2674
}
2675

2676
MachineBasicBlock *
2677
VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2678
                                              MachineBasicBlock *BB) const {
2679
  switch (MI.getOpcode()) {
2680
  default:
2681
    llvm_unreachable("Unknown Custom Instruction!");
2682
  case VE::EH_SjLj_LongJmp:
2683
    return emitEHSjLjLongJmp(MI, BB);
2684
  case VE::EH_SjLj_SetJmp:
2685
    return emitEHSjLjSetJmp(MI, BB);
2686
  case VE::EH_SjLj_Setup_Dispatch:
2687
    return emitSjLjDispatchBlock(MI, BB);
2688
  }
2689
}
2690

2691
static bool isSimm7(SDValue V) {
2692
  EVT VT = V.getValueType();
2693
  if (VT.isVector())
2694
    return false;
2695

2696
  if (VT.isInteger()) {
2697
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2698
      return isInt<7>(C->getSExtValue());
2699
  } else if (VT.isFloatingPoint()) {
2700
    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2701
      if (VT == MVT::f32 || VT == MVT::f64) {
2702
        const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2703
        uint64_t Val = Imm.getSExtValue();
2704
        if (Imm.getBitWidth() == 32)
2705
          Val <<= 32; // Immediate value of float place at higher bits on VE.
2706
        return isInt<7>(Val);
2707
      }
2708
    }
2709
  }
2710
  return false;
2711
}
2712

2713
static bool isMImm(SDValue V) {
2714
  EVT VT = V.getValueType();
2715
  if (VT.isVector())
2716
    return false;
2717

2718
  if (VT.isInteger()) {
2719
    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2720
      return isMImmVal(getImmVal(C));
2721
  } else if (VT.isFloatingPoint()) {
2722
    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2723
      if (VT == MVT::f32) {
2724
        // Float value places at higher bits, so ignore lower 32 bits.
2725
        return isMImm32Val(getFpImmVal(C) >> 32);
2726
      } else if (VT == MVT::f64) {
2727
        return isMImmVal(getFpImmVal(C));
2728
      }
2729
    }
2730
  }
2731
  return false;
2732
}
2733

2734
static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2735
  if (SrcVT.isFloatingPoint()) {
2736
    if (SrcVT == MVT::f128)
2737
      return VEISD::CMPQ;
2738
    return VEISD::CMPF;
2739
  }
2740
  return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2741
}
2742

2743
static EVT decideCompType(EVT SrcVT) {
2744
  if (SrcVT == MVT::f128)
2745
    return MVT::f64;
2746
  return SrcVT;
2747
}
2748

2749
static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2750
                                    bool WithCMov) {
2751
  if (SrcVT.isFloatingPoint()) {
2752
    // For the case of floating point setcc, only unordered comparison
2753
    // or general comparison with -enable-no-nans-fp-math option reach
2754
    // here, so it is safe even if values are NaN.  Only f128 doesn't
2755
    // safe since VE uses f64 result of f128 comparison.
2756
    return SrcVT != MVT::f128;
2757
  }
2758
  if (isIntEqualitySetCC(CC)) {
2759
    // For the case of equal or not equal, it is safe without comparison with 0.
2760
    return true;
2761
  }
2762
  if (WithCMov) {
2763
    // For the case of integer setcc with cmov, all signed comparison with 0
2764
    // are safe.
2765
    return isSignedIntSetCC(CC);
2766
  }
2767
  // For the case of integer setcc, only signed 64 bits comparison is safe.
2768
  // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2769
  // less than 0 witout CMPU.  For 32 bits, other half of 32 bits are
2770
  // uncoditional, so it is not safe too without CMPI..
2771
  return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2772
}
2773

2774
static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2775
                                  ISD::CondCode CC, bool WithCMov,
2776
                                  const SDLoc &DL, SelectionDAG &DAG) {
2777
  // Compare values.  If RHS is 0 and it is safe to calculate without
2778
  // comparison, we don't generate an instruction for comparison.
2779
  EVT CompVT = decideCompType(VT);
2780
  if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2781
      (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2782
    return LHS;
2783
  }
2784
  return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2785
}
2786

2787
SDValue VETargetLowering::combineSelect(SDNode *N,
2788
                                        DAGCombinerInfo &DCI) const {
2789
  assert(N->getOpcode() == ISD::SELECT &&
2790
         "Should be called with a SELECT node");
2791
  ISD::CondCode CC = ISD::CondCode::SETNE;
2792
  SDValue Cond = N->getOperand(0);
2793
  SDValue True = N->getOperand(1);
2794
  SDValue False = N->getOperand(2);
2795

2796
  // We handle only scalar SELECT.
2797
  EVT VT = N->getValueType(0);
2798
  if (VT.isVector())
2799
    return SDValue();
2800

2801
  // Peform combineSelect after leagalize DAG.
2802
  if (!DCI.isAfterLegalizeDAG())
2803
    return SDValue();
2804

2805
  EVT VT0 = Cond.getValueType();
2806
  if (isMImm(True)) {
2807
    // VE's condition move can handle MImm in True clause, so nothing to do.
2808
  } else if (isMImm(False)) {
2809
    // VE's condition move can handle MImm in True clause, so swap True and
2810
    // False clauses if False has MImm value.  And, update condition code.
2811
    std::swap(True, False);
2812
    CC = getSetCCInverse(CC, VT0);
2813
  }
2814

2815
  SDLoc DL(N);
2816
  SelectionDAG &DAG = DCI.DAG;
2817
  VECC::CondCode VECCVal;
2818
  if (VT0.isFloatingPoint()) {
2819
    VECCVal = fpCondCode2Fcc(CC);
2820
  } else {
2821
    VECCVal = intCondCode2Icc(CC);
2822
  }
2823
  SDValue Ops[] = {Cond, True, False,
2824
                   DAG.getConstant(VECCVal, DL, MVT::i32)};
2825
  return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2826
}
2827

2828
SDValue VETargetLowering::combineSelectCC(SDNode *N,
2829
                                          DAGCombinerInfo &DCI) const {
2830
  assert(N->getOpcode() == ISD::SELECT_CC &&
2831
         "Should be called with a SELECT_CC node");
2832
  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2833
  SDValue LHS = N->getOperand(0);
2834
  SDValue RHS = N->getOperand(1);
2835
  SDValue True = N->getOperand(2);
2836
  SDValue False = N->getOperand(3);
2837

2838
  // We handle only scalar SELECT_CC.
2839
  EVT VT = N->getValueType(0);
2840
  if (VT.isVector())
2841
    return SDValue();
2842

2843
  // Peform combineSelectCC after leagalize DAG.
2844
  if (!DCI.isAfterLegalizeDAG())
2845
    return SDValue();
2846

2847
  // We handle only i32/i64/f32/f64/f128 comparisons.
2848
  EVT LHSVT = LHS.getValueType();
2849
  assert(LHSVT == RHS.getValueType());
2850
  switch (LHSVT.getSimpleVT().SimpleTy) {
2851
  case MVT::i32:
2852
  case MVT::i64:
2853
  case MVT::f32:
2854
  case MVT::f64:
2855
  case MVT::f128:
2856
    break;
2857
  default:
2858
    // Return SDValue to let llvm handle other types.
2859
    return SDValue();
2860
  }
2861

2862
  if (isMImm(RHS)) {
2863
    // VE's comparison can handle MImm in RHS, so nothing to do.
2864
  } else if (isSimm7(RHS)) {
2865
    // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2866
    // update condition code.
2867
    std::swap(LHS, RHS);
2868
    CC = getSetCCSwappedOperands(CC);
2869
  }
2870
  if (isMImm(True)) {
2871
    // VE's condition move can handle MImm in True clause, so nothing to do.
2872
  } else if (isMImm(False)) {
2873
    // VE's condition move can handle MImm in True clause, so swap True and
2874
    // False clauses if False has MImm value.  And, update condition code.
2875
    std::swap(True, False);
2876
    CC = getSetCCInverse(CC, LHSVT);
2877
  }
2878

2879
  SDLoc DL(N);
2880
  SelectionDAG &DAG = DCI.DAG;
2881

2882
  bool WithCMov = true;
2883
  SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2884

2885
  VECC::CondCode VECCVal;
2886
  if (LHSVT.isFloatingPoint()) {
2887
    VECCVal = fpCondCode2Fcc(CC);
2888
  } else {
2889
    VECCVal = intCondCode2Icc(CC);
2890
  }
2891
  SDValue Ops[] = {CompNode, True, False,
2892
                   DAG.getConstant(VECCVal, DL, MVT::i32)};
2893
  return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2894
}
2895

2896
static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2897
static bool isI32Insn(const SDNode *User, const SDNode *N) {
2898
  switch (User->getOpcode()) {
2899
  default:
2900
    return false;
2901
  case ISD::ADD:
2902
  case ISD::SUB:
2903
  case ISD::MUL:
2904
  case ISD::SDIV:
2905
  case ISD::UDIV:
2906
  case ISD::SETCC:
2907
  case ISD::SMIN:
2908
  case ISD::SMAX:
2909
  case ISD::SHL:
2910
  case ISD::SRA:
2911
  case ISD::BSWAP:
2912
  case ISD::SINT_TO_FP:
2913
  case ISD::UINT_TO_FP:
2914
  case ISD::BR_CC:
2915
  case ISD::BITCAST:
2916
  case ISD::ATOMIC_CMP_SWAP:
2917
  case ISD::ATOMIC_SWAP:
2918
  case VEISD::CMPU:
2919
  case VEISD::CMPI:
2920
    return true;
2921
  case ISD::SRL:
2922
    if (N->getOperand(0).getOpcode() != ISD::SRL)
2923
      return true;
2924
    // (srl (trunc (srl ...))) may be optimized by combining srl, so
2925
    // doesn't optimize trunc now.
2926
    return false;
2927
  case ISD::SELECT_CC:
2928
    if (User->getOperand(2).getNode() != N &&
2929
        User->getOperand(3).getNode() != N)
2930
      return true;
2931
    return isI32InsnAllUses(User, N);
2932
  case VEISD::CMOV:
2933
    // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2934
    // However, trunc in true or false clauses is not safe.
2935
    if (User->getOperand(1).getNode() != N &&
2936
        User->getOperand(2).getNode() != N &&
2937
        isa<ConstantSDNode>(User->getOperand(3))) {
2938
      VECC::CondCode VECCVal =
2939
          static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2940
      return isIntVECondCode(VECCVal);
2941
    }
2942
    [[fallthrough]];
2943
  case ISD::AND:
2944
  case ISD::OR:
2945
  case ISD::XOR:
2946
  case ISD::SELECT:
2947
  case ISD::CopyToReg:
2948
    // Check all use of selections, bit operations, and copies.  If all of them
2949
    // are safe, optimize truncate to extract_subreg.
2950
    return isI32InsnAllUses(User, N);
2951
  }
2952
}
2953

2954
static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2955
  // Check all use of User node.  If all of them are safe, optimize
2956
  // truncate to extract_subreg.
2957
  for (const SDNode *U : User->uses()) {
2958
    switch (U->getOpcode()) {
2959
    default:
2960
      // If the use is an instruction which treats the source operand as i32,
2961
      // it is safe to avoid truncate here.
2962
      if (isI32Insn(U, N))
2963
        continue;
2964
      break;
2965
    case ISD::ANY_EXTEND:
2966
    case ISD::SIGN_EXTEND:
2967
    case ISD::ZERO_EXTEND: {
2968
      // Special optimizations to the combination of ext and trunc.
2969
      // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2970
      // since this truncate instruction clears higher 32 bits which is filled
2971
      // by one of ext instructions later.
2972
      assert(N->getValueType(0) == MVT::i32 &&
2973
             "find truncate to not i32 integer");
2974
      if (User->getOpcode() == ISD::SELECT_CC ||
2975
          User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2976
        continue;
2977
      break;
2978
    }
2979
    }
2980
    return false;
2981
  }
2982
  return true;
2983
}
2984

2985
// Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2986
// sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2987
// is sometime too late.  So, doing it at here.
2988
SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2989
                                          DAGCombinerInfo &DCI) const {
2990
  assert(N->getOpcode() == ISD::TRUNCATE &&
2991
         "Should be called with a TRUNCATE node");
2992

2993
  SelectionDAG &DAG = DCI.DAG;
2994
  SDLoc DL(N);
2995
  EVT VT = N->getValueType(0);
2996

2997
  // We prefer to do this when all types are legal.
2998
  if (!DCI.isAfterLegalizeDAG())
2999
    return SDValue();
3000

3001
  // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3002
  if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3003
      isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3004
      isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3005
    return SDValue();
3006

3007
  // Check all use of this TRUNCATE.
3008
  for (const SDNode *User : N->uses()) {
3009
    // Make sure that we're not going to replace TRUNCATE for non i32
3010
    // instructions.
3011
    //
3012
    // FIXME: Although we could sometimes handle this, and it does occur in
3013
    // practice that one of the condition inputs to the select is also one of
3014
    // the outputs, we currently can't deal with this.
3015
    if (isI32Insn(User, N))
3016
      continue;
3017

3018
    return SDValue();
3019
  }
3020

3021
  SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3022
  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3023
                                    N->getOperand(0), SubI32),
3024
                 0);
3025
}
3026

3027
SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3028
                                            DAGCombinerInfo &DCI) const {
3029
  switch (N->getOpcode()) {
3030
  default:
3031
    break;
3032
  case ISD::SELECT:
3033
    return combineSelect(N, DCI);
3034
  case ISD::SELECT_CC:
3035
    return combineSelectCC(N, DCI);
3036
  case ISD::TRUNCATE:
3037
    return combineTRUNCATE(N, DCI);
3038
  }
3039

3040
  return SDValue();
3041
}
3042

3043
//===----------------------------------------------------------------------===//
3044
// VE Inline Assembly Support
3045
//===----------------------------------------------------------------------===//
3046

3047
VETargetLowering::ConstraintType
3048
VETargetLowering::getConstraintType(StringRef Constraint) const {
3049
  if (Constraint.size() == 1) {
3050
    switch (Constraint[0]) {
3051
    default:
3052
      break;
3053
    case 'v': // vector registers
3054
      return C_RegisterClass;
3055
    }
3056
  }
3057
  return TargetLowering::getConstraintType(Constraint);
3058
}
3059

3060
std::pair<unsigned, const TargetRegisterClass *>
3061
VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3062
                                               StringRef Constraint,
3063
                                               MVT VT) const {
3064
  const TargetRegisterClass *RC = nullptr;
3065
  if (Constraint.size() == 1) {
3066
    switch (Constraint[0]) {
3067
    default:
3068
      return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3069
    case 'r':
3070
      RC = &VE::I64RegClass;
3071
      break;
3072
    case 'v':
3073
      RC = &VE::V64RegClass;
3074
      break;
3075
    }
3076
    return std::make_pair(0U, RC);
3077
  }
3078

3079
  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3080
}
3081

3082
//===----------------------------------------------------------------------===//
3083
// VE Target Optimization Support
3084
//===----------------------------------------------------------------------===//
3085

3086
unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3087
  // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3088
  if (isJumpTableRelative())
3089
    return 8;
3090

3091
  return TargetLowering::getMinimumJumpTableEntries();
3092
}
3093

3094
bool VETargetLowering::hasAndNot(SDValue Y) const {
3095
  EVT VT = Y.getValueType();
3096

3097
  // VE doesn't have vector and not instruction.
3098
  if (VT.isVector())
3099
    return false;
3100

3101
  // VE allows different immediate values for X and Y where ~X & Y.
3102
  // Only simm7 works for X, and only mimm works for Y on VE.  However, this
3103
  // function is used to check whether an immediate value is OK for and-not
3104
  // instruction as both X and Y.  Generating additional instruction to
3105
  // retrieve an immediate value is no good since the purpose of this
3106
  // function is to convert a series of 3 instructions to another series of
3107
  // 3 instructions with better parallelism.  Therefore, we return false
3108
  // for all immediate values now.
3109
  // FIXME: Change hasAndNot function to have two operands to make it work
3110
  //        correctly with Aurora VE.
3111
  if (isa<ConstantSDNode>(Y))
3112
    return false;
3113

3114
  // It's ok for generic registers.
3115
  return true;
3116
}
3117

3118
SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3119
                                                  SelectionDAG &DAG) const {
3120
  assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3121
  MVT VT = Op.getOperand(0).getSimpleValueType();
3122

3123
  // Special treatment for packed V64 types.
3124
  assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3125
  (void)VT;
3126
  // Example of codes:
3127
  //   %packed_v = extractelt %vr, %idx / 2
3128
  //   %v = %packed_v >> (%idx % 2 * 32)
3129
  //   %res = %v & 0xffffffff
3130

3131
  SDValue Vec = Op.getOperand(0);
3132
  SDValue Idx = Op.getOperand(1);
3133
  SDLoc DL(Op);
3134
  SDValue Result = Op;
3135
  if (false /* Idx->isConstant() */) {
3136
    // TODO: optimized implementation using constant values
3137
  } else {
3138
    SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3139
    SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3140
    SDValue PackedElt =
3141
        SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3142
    SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3143
    SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3144
    SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3145
    Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3146
    PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3147
    SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3148
    PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3149
    SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3150
    Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3151
                                        MVT::i32, PackedElt, SubI32),
3152
                     0);
3153

3154
    if (Op.getSimpleValueType() == MVT::f32) {
3155
      Result = DAG.getBitcast(MVT::f32, Result);
3156
    } else {
3157
      assert(Op.getSimpleValueType() == MVT::i32);
3158
    }
3159
  }
3160
  return Result;
3161
}
3162

3163
SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3164
                                                 SelectionDAG &DAG) const {
3165
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3166
  MVT VT = Op.getOperand(0).getSimpleValueType();
3167

3168
  // Special treatment for packed V64 types.
3169
  assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3170
  (void)VT;
3171
  // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
3172
  // bits" required `val << 32` from C implementation's point of view.
3173
  //
3174
  // Example of codes:
3175
  //   %packed_elt = extractelt %vr, (%idx >> 1)
3176
  //   %shift = ((%idx & 1) ^ 1) << 5
3177
  //   %packed_elt &= 0xffffffff00000000 >> shift
3178
  //   %packed_elt |= (zext %val) << shift
3179
  //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3180

3181
  SDLoc DL(Op);
3182
  SDValue Vec = Op.getOperand(0);
3183
  SDValue Val = Op.getOperand(1);
3184
  SDValue Idx = Op.getOperand(2);
3185
  if (Idx.getSimpleValueType() == MVT::i32)
3186
    Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3187
  if (Val.getSimpleValueType() == MVT::f32)
3188
    Val = DAG.getBitcast(MVT::i32, Val);
3189
  assert(Val.getSimpleValueType() == MVT::i32);
3190
  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3191

3192
  SDValue Result = Op;
3193
  if (false /* Idx->isConstant()*/) {
3194
    // TODO: optimized implementation using constant values
3195
  } else {
3196
    SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3197
    SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3198
    SDValue PackedElt =
3199
        SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3200
    SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3201
    SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3202
    SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3203
    Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3204
    SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3205
    Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3206
    PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3207
    Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3208
    PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3209
    Result =
3210
        SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3211
                                   {HalfIdx, PackedElt, Vec}),
3212
                0);
3213
  }
3214
  return Result;
3215
}
3216

3217
Product

Resources

Company